vicuna-clip

Running on Zero

App Files Files Community

ford442 commited on Feb 10

Commit

913ceff

verified ·

1 Parent(s): 9b1d5ab

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -22

app.py CHANGED Viewed

@@ -1,43 +1,29 @@
 import torch
 import gradio as gr
 from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 import soundfile as sf
 import numpy as np
 from espnet2.bin.tts_inference import Text2Speech
-import IPython.display as ipd  # For notebook use (optional)
 import os
 from huggingface_hub import snapshot_download
-# --- Whisper (ASR) Setup ---
-ASR_MODEL_NAME = "openai/whisper-large-v2"
-asr_device = "cuda" if torch.cuda.is_available() else "cpu"
-asr_pipe = pipeline(
-    task="automatic-speech-recognition",
-    model=ASR_MODEL_NAME,
-    chunk_length_s=30,
-    device=asr_device,
-)
-all_special_ids = asr_pipe.tokenizer.all_special_ids
-transcribe_token_id = all_special_ids[-5]
-translate_token_id = all_special_ids[-6]
 # --- VITS (TTS) Setup ---
 TTS_MODEL_NAME = "espnet/kan_bayashi_ljspeech_vits"
 tts_device = "cuda" if torch.cuda.is_available() else "cpu"
-# Download the ESPnet model files (using huggingface_hub)
-model_dir = "vits_model"  # Choose a directory name
 if not os.path.exists(model_dir):
     os.makedirs(model_dir)
-    try:
-        download_path = snapshot_download(repo_id=TTS_MODEL_NAME, local_dir=model_dir, local_dir_use_symlinks=False)
-        print(f"Downloaded ESPnet model to: {download_path}")
-    except Exception as e:
-        print(f"Error downloading model: {e}")
-        raise
 # Construct *absolute* paths to the config and model files.
-#   This is the KEY change.
 config_path = os.path.join(download_path, "exp/tts_train_vits_raw_phn_tacotron_g2p_en_no_space/config.yaml")
 model_path = os.path.join(download_path, "exp/tts_train_vits_raw_phn_tacotron_g2p_en_no_space/train.total_count.ave_10best.pth")

+import spaces
 import torch
 import gradio as gr
 from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 import soundfile as sf
 import numpy as np
 from espnet2.bin.tts_inference import Text2Speech
+import IPython.display as ipd
 import os
 from huggingface_hub import snapshot_download
+# ... (Whisper and Vicuna setup remain the same)
 # --- VITS (TTS) Setup ---
 TTS_MODEL_NAME = "espnet/kan_bayashi_ljspeech_vits"
 tts_device = "cuda" if torch.cuda.is_available() else "cpu"
+# Download the ESPnet model files and get the download path
+model_dir = "vits_model"
 if not os.path.exists(model_dir):
     os.makedirs(model_dir)
+download_path = snapshot_download(repo_id=TTS_MODEL_NAME, local_dir=model_dir, local_dir_use_symlinks=False)
+print(f"Downloaded ESPnet model to: {download_path}") # Print the path!
 # Construct *absolute* paths to the config and model files.
 config_path = os.path.join(download_path, "exp/tts_train_vits_raw_phn_tacotron_g2p_en_no_space/config.yaml")
 model_path = os.path.join(download_path, "exp/tts_train_vits_raw_phn_tacotron_g2p_en_no_space/train.total_count.ave_10best.pth")