vicuna-clip

Sleeping

ford442 commited on Feb 10

Commit

d7978a0

verified ·

1 Parent(s): 6fe541d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,11 +1,12 @@
 import torch
 import gradio as gr
-from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, AutoProcessor
-# Note: No AutoModelForTextToSpeech needed
 import soundfile as sf
 import numpy as np
-from espnet2.bin.tts_inference import Text2Speech  # Import Text2Speech from espnet2
 # --- Whisper (ASR) Setup ---
 ASR_MODEL_NAME = "openai/whisper-large-v2"
@@ -20,12 +21,24 @@ all_special_ids = asr_pipe.tokenizer.all_special_ids
 transcribe_token_id = all_special_ids[-5]
 translate_token_id = all_special_ids[-6]
-# --- VITS (TTS) Setup - Using espnet2 ---
 TTS_MODEL_NAME = "espnet/kan_bayashi_ljspeech_vits"
 tts_device = "cuda" if torch.cuda.is_available() else "cpu"
-# Load the Text2Speech model from espnet2
-tts_model = Text2Speech.from_pretrained(TTS_MODEL_NAME).to(tts_device)
 # --- Vicuna (LLM) Setup ---

 import torch
 import gradio as gr
+from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 import soundfile as sf
 import numpy as np
+# No more fairseq imports
+# No espnet2 imports here
+import IPython.display as ipd  # For notebook use (optional)
+import os, pathlib
 # --- Whisper (ASR) Setup ---
 ASR_MODEL_NAME = "openai/whisper-large-v2"
 transcribe_token_id = all_special_ids[-5]
 translate_token_id = all_special_ids[-6]
+# --- VITS (TTS) Setup ---
 TTS_MODEL_NAME = "espnet/kan_bayashi_ljspeech_vits"
 tts_device = "cuda" if torch.cuda.is_available() else "cpu"
+# Download the ESPnet model (if it hasn't been downloaded yet)
+#  We use a try-except block here to handle potential download issues gracefully.
+try:
+  from espnet_model_zoo.downloader import ModelDownloader
+  d = ModelDownloader()
+  tts_model_path = d.download_and_unpack(TTS_MODEL_NAME)
+except Exception as e:
+  print(f"Error downloading ESPnet model: {e}")
+  print("Make sure you have espnet_model_zoo installed: `pip install espnet_model_zoo`")
+  raise  # Re-raise the exception to stop execution
+#Now import and set up the text to speech
+from espnet2.bin.tts_inference import Text2Speech
+tts_model = Text2Speech(tts_model_path, device=tts_device)
 # --- Vicuna (LLM) Setup ---