Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -7,7 +7,6 @@ import numpy as np
|
|
7 |
import IPython.display as ipd
|
8 |
import os
|
9 |
|
10 |
-
# Load models outside the function (at startup)
|
11 |
ASR_MODEL_NAME = "openai/whisper-large-v2"
|
12 |
asr_pipe = pipeline(
|
13 |
task="automatic-speech-recognition",
|
@@ -65,4 +64,20 @@ def process_audio(microphone, state, task="transcribe"):
|
|
65 |
|
66 |
return updated_state, updated_state, audio_output
|
67 |
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
import IPython.display as ipd
|
8 |
import os
|
9 |
|
|
|
10 |
ASR_MODEL_NAME = "openai/whisper-large-v2"
|
11 |
asr_pipe = pipeline(
|
12 |
task="automatic-speech-recognition",
|
|
|
64 |
|
65 |
return updated_state, updated_state, audio_output
|
66 |
|
67 |
+
|
68 |
+
|
69 |
+
with gr.Blocks(title="Whisper, Vicuna, & Bark Demo") as demo:
|
70 |
+
gr.Markdown("# Speech-to-Text-to-Speech Demo with Vicuna and Bark")
|
71 |
+
gr.Markdown("Speak into your microphone, get a transcription, Vicuna will process it, and then you'll hear the result!")
|
72 |
+
with gr.Tab("Transcribe & Synthesize"):
|
73 |
+
mic_input = gr.Audio(sources="microphone", type="filepath", label="Speak Here")
|
74 |
+
transcription_output = gr.Textbox(lines=5, label="Transcription and Vicuna Response")
|
75 |
+
audio_output = gr.Audio(label="Synthesized Speech", type="numpy")
|
76 |
+
transcription_state = gr.State(value="")
|
77 |
+
mic_input.change(
|
78 |
+
fn=process_audio, # Call the combined function
|
79 |
+
inputs=[mic_input, transcription_state],
|
80 |
+
outputs=[transcription_output, transcription_state, audio_output]
|
81 |
+
)
|
82 |
+
|
83 |
+
demo.launch(share=False)
|