ford442 commited on
Commit
f055d9c
·
verified ·
1 Parent(s): f4d388e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -2
app.py CHANGED
@@ -7,7 +7,6 @@ import numpy as np
7
  import IPython.display as ipd
8
  import os
9
 
10
- # Load models outside the function (at startup)
11
  ASR_MODEL_NAME = "openai/whisper-large-v2"
12
  asr_pipe = pipeline(
13
  task="automatic-speech-recognition",
@@ -65,4 +64,20 @@ def process_audio(microphone, state, task="transcribe"):
65
 
66
  return updated_state, updated_state, audio_output
67
 
68
- # ... (rest of the Gradio code remains the same)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  import IPython.display as ipd
8
  import os
9
 
 
10
  ASR_MODEL_NAME = "openai/whisper-large-v2"
11
  asr_pipe = pipeline(
12
  task="automatic-speech-recognition",
 
64
 
65
  return updated_state, updated_state, audio_output
66
 
67
+
68
+
69
+ with gr.Blocks(title="Whisper, Vicuna, & Bark Demo") as demo:
70
+ gr.Markdown("# Speech-to-Text-to-Speech Demo with Vicuna and Bark")
71
+ gr.Markdown("Speak into your microphone, get a transcription, Vicuna will process it, and then you'll hear the result!")
72
+ with gr.Tab("Transcribe & Synthesize"):
73
+ mic_input = gr.Audio(sources="microphone", type="filepath", label="Speak Here")
74
+ transcription_output = gr.Textbox(lines=5, label="Transcription and Vicuna Response")
75
+ audio_output = gr.Audio(label="Synthesized Speech", type="numpy")
76
+ transcription_state = gr.State(value="")
77
+ mic_input.change(
78
+ fn=process_audio, # Call the combined function
79
+ inputs=[mic_input, transcription_state],
80
+ outputs=[transcription_output, transcription_state, audio_output]
81
+ )
82
+
83
+ demo.launch(share=False)