fffiloni commited on
Commit
c5fe591
·
verified ·
1 Parent(s): 8608d24

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -64
app.py CHANGED
@@ -59,80 +59,92 @@ def gradio_answer(speech, text_input, num_beams, temperature, top_p):
59
 
60
  return llm_message[0]
61
 
62
- title = """<h1 align="center">SALMONN: Speech Audio Language Music Open Neural Network</h1>"""
63
  image_src = """<h1 align="center"><a href="https://github.com/bytedance/SALMONN"><img src="https://raw.githubusercontent.com/bytedance/SALMONN/main/resource/salmon.png", alt="SALMONN" border="0" style="margin: 0 auto; height: 200px;" /></a> </h1>"""
64
- description = """<h3>This is the demo of SALMONN-7B. To experience SALMONN-13B, you can go to <a href="https://bytedance.github.io/SALMONN">https://bytedance.github.io/SALMONN</a>.\n Upload your audio and start chatting!</h3>"""
65
 
 
 
 
 
 
 
66
 
67
- with gr.Blocks() as demo:
68
- gr.Markdown(title)
69
- gr.Markdown(image_src)
70
- gr.Markdown(description)
71
-
72
- with gr.Row():
73
- with gr.Column():
74
- speech = gr.Audio(label="Audio", type='filepath')
75
-
76
- num_beams = gr.Slider(
77
- minimum=1,
78
- maximum=10,
79
- value=4,
80
- step=1,
81
- interactive=True,
82
- label="beam search numbers",
83
- )
84
-
85
- top_p = gr.Slider(
86
- minimum=0.1,
87
- maximum=1.0,
88
- value=0.9,
89
- step=0.1,
90
- interactive=True,
91
- label="top p",
92
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
- temperature = gr.Slider(
95
- minimum=0.8,
96
- maximum=2.0,
97
- value=1.0,
98
- step=0.1,
99
- interactive=False,
100
- label="temperature",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  )
102
-
103
- with gr.Column():
104
-
105
- text_input = gr.Textbox(label='User', placeholder='Please upload your audio first', interactive=True)
106
- answer = gr.Textbox(label="Salmonn answer")
107
-
108
- with gr.Row():
109
- examples = gr.Examples(
110
- examples = [
111
- ["resource/audio_demo/gunshots.wav", "Recognize the speech and give me the transcription."],
112
- ["resource/audio_demo/gunshots.wav", "Listen to the speech and translate it into German."],
113
- ["resource/audio_demo/gunshots.wav", "Provide the phonetic transcription for the speech."],
114
- ["resource/audio_demo/gunshots.wav", "Please describe the audio."],
115
- ["resource/audio_demo/gunshots.wav", "Recognize what the speaker says and describe the background audio at the same time."],
116
- ["resource/audio_demo/gunshots.wav", "Use your strong reasoning skills to answer the speaker's question in detail based on the background sound."],
117
- ["resource/audio_demo/duck.wav", "Please list each event in the audio in order."],
118
- ["resource/audio_demo/duck.wav", "Based on the audio, write a story in detail. Your story should be highly related to the audio."],
119
- ["resource/audio_demo/duck.wav", "How many speakers did you hear in this audio? Who are they?"],
120
- ["resource/audio_demo/excitement.wav", "Describe the emotion of the speaker."],
121
- ["resource/audio_demo/mountain.wav", "Please answer the question in detail."],
122
- ["resource/audio_demo/jobs.wav", "Give me only three keywords of the text. Explain your reason."],
123
- ["resource/audio_demo/2_30.wav", "What is the time mentioned in the speech?"],
124
- ["resource/audio_demo/music.wav", "Please describe the music in detail."],
125
- ["resource/audio_demo/music.wav", "What is the emotion of the music? Explain the reason in detail."],
126
- ["resource/audio_demo/music.wav", "Can you write some lyrics of the song?"],
127
- ["resource/audio_demo/music.wav", "Give me a title of the music based on its rhythm and emotion."]
128
- ],
129
- inputs=[speech, text_input]
130
- )
131
 
132
 
133
  text_input.submit(
134
  gradio_answer, [speech, text_input, num_beams, temperature, top_p], [answer]
135
  )
 
 
 
136
 
137
 
138
  # demo.launch(share=True, enable_queue=True, server_port=int(args.port))
 
59
 
60
  return llm_message[0]
61
 
62
+ title = """<h1 style="text-align: center;">SALMONN: Speech Audio Language Music Open Neural Network</h1>"""
63
  image_src = """<h1 align="center"><a href="https://github.com/bytedance/SALMONN"><img src="https://raw.githubusercontent.com/bytedance/SALMONN/main/resource/salmon.png", alt="SALMONN" border="0" style="margin: 0 auto; height: 200px;" /></a> </h1>"""
64
+ description = """<h3 style="text-align: center;">This is the simplified demo for SALMONN-7B. To experience SALMONN-13B, you can go to <a href="https://bytedance.github.io/SALMONN">https://bytedance.github.io/SALMONN</a>.\n Upload your audio and ask a question!</h3>"""
65
 
66
+ css = """
67
+ div#col-container {
68
+ margin: 0 auto;
69
+ max-width: 840px;
70
+ }
71
+ """
72
 
73
+ with gr.Blocks(css=css) as demo:
74
+ with gr.Column(elem_id="col-container"):
75
+ gr.Markdown(title)
76
+ #gr.Markdown(image_src)
77
+ gr.Markdown(description)
78
+
79
+ with gr.Row():
80
+ with gr.Column():
81
+ speech = gr.Audio(label="Audio", type='filepath')
82
+
83
+ with gr.Accordion("Advanced Settings", open=False):
84
+ num_beams = gr.Slider(
85
+ minimum=1,
86
+ maximum=10,
87
+ value=4,
88
+ step=1,
89
+ interactive=True,
90
+ label="beam search numbers",
91
+ )
92
+
93
+ top_p = gr.Slider(
94
+ minimum=0.1,
95
+ maximum=1.0,
96
+ value=0.9,
97
+ step=0.1,
98
+ interactive=True,
99
+ label="top p",
100
+ )
101
+
102
+ temperature = gr.Slider(
103
+ minimum=0.8,
104
+ maximum=2.0,
105
+ value=1.0,
106
+ step=0.1,
107
+ interactive=False,
108
+ label="temperature",
109
+ )
110
+
111
+ with gr.Column():
112
+ with gr.Row():
113
+ text_input = gr.Textbox(label='User question', placeholder='Please upload your audio first', interactive=True)
114
+ submit_btn = gr.Button("Submit")
115
+ answer = gr.Textbox(label="Salmonn answer")
116
 
117
+ with gr.Row():
118
+ examples = gr.Examples(
119
+ examples = [
120
+ ["resource/audio_demo/gunshots.wav", "Recognize the speech and give me the transcription."],
121
+ ["resource/audio_demo/gunshots.wav", "Listen to the speech and translate it into German."],
122
+ ["resource/audio_demo/gunshots.wav", "Provide the phonetic transcription for the speech."],
123
+ ["resource/audio_demo/gunshots.wav", "Please describe the audio."],
124
+ ["resource/audio_demo/gunshots.wav", "Recognize what the speaker says and describe the background audio at the same time."],
125
+ ["resource/audio_demo/gunshots.wav", "Use your strong reasoning skills to answer the speaker's question in detail based on the background sound."],
126
+ ["resource/audio_demo/duck.wav", "Please list each event in the audio in order."],
127
+ ["resource/audio_demo/duck.wav", "Based on the audio, write a story in detail. Your story should be highly related to the audio."],
128
+ ["resource/audio_demo/duck.wav", "How many speakers did you hear in this audio? Who are they?"],
129
+ ["resource/audio_demo/excitement.wav", "Describe the emotion of the speaker."],
130
+ ["resource/audio_demo/mountain.wav", "Please answer the question in detail."],
131
+ ["resource/audio_demo/jobs.wav", "Give me only three keywords of the text. Explain your reason."],
132
+ ["resource/audio_demo/2_30.wav", "What is the time mentioned in the speech?"],
133
+ ["resource/audio_demo/music.wav", "Please describe the music in detail."],
134
+ ["resource/audio_demo/music.wav", "What is the emotion of the music? Explain the reason in detail."],
135
+ ["resource/audio_demo/music.wav", "Can you write some lyrics of the song?"],
136
+ ["resource/audio_demo/music.wav", "Give me a title of the music based on its rhythm and emotion."]
137
+ ],
138
+ inputs=[speech, text_input]
139
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
 
141
 
142
  text_input.submit(
143
  gradio_answer, [speech, text_input, num_beams, temperature, top_p], [answer]
144
  )
145
+ submit_btn.click(
146
+ gradio_answer, [speech, text_input, num_beams, temperature, top_p], [answer]
147
+ )
148
 
149
 
150
  # demo.launch(share=True, enable_queue=True, server_port=int(args.port))