vicuna-clip

Running on Zero

ford442 commited on Feb 11

Commit

f055d9c

verified ·

1 Parent(s): f4d388e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,7 +7,6 @@ import numpy as np
 import IPython.display as ipd
 import os
-# Load models outside the function (at startup)
 ASR_MODEL_NAME = "openai/whisper-large-v2"
 asr_pipe = pipeline(
     task="automatic-speech-recognition",
@@ -65,4 +64,20 @@ def process_audio(microphone, state, task="transcribe"):
     return updated_state, updated_state, audio_output
-# ... (rest of the Gradio code remains the same)

 import IPython.display as ipd
 import os
 ASR_MODEL_NAME = "openai/whisper-large-v2"
 asr_pipe = pipeline(
     task="automatic-speech-recognition",
     return updated_state, updated_state, audio_output
+with gr.Blocks(title="Whisper, Vicuna, & Bark Demo") as demo:
+    gr.Markdown("# Speech-to-Text-to-Speech Demo with Vicuna and Bark")
+    gr.Markdown("Speak into your microphone, get a transcription, Vicuna will process it, and then you'll hear the result!")
+    with gr.Tab("Transcribe & Synthesize"):
+        mic_input = gr.Audio(sources="microphone", type="filepath", label="Speak Here")
+        transcription_output = gr.Textbox(lines=5, label="Transcription and Vicuna Response")
+        audio_output = gr.Audio(label="Synthesized Speech", type="numpy")
+        transcription_state = gr.State(value="")
+        mic_input.change(
+            fn=process_audio,  # Call the combined function
+            inputs=[mic_input, transcription_state],
+            outputs=[transcription_output, transcription_state, audio_output]
+        )
+demo.launch(share=False)