ford442 commited on
Commit
913ceff
·
verified ·
1 Parent(s): 9b1d5ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -22
app.py CHANGED
@@ -1,43 +1,29 @@
 
1
  import torch
2
  import gradio as gr
3
  from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
4
  import soundfile as sf
5
  import numpy as np
6
  from espnet2.bin.tts_inference import Text2Speech
7
- import IPython.display as ipd # For notebook use (optional)
8
  import os
9
  from huggingface_hub import snapshot_download
10
 
11
- # --- Whisper (ASR) Setup ---
12
- ASR_MODEL_NAME = "openai/whisper-large-v2"
13
- asr_device = "cuda" if torch.cuda.is_available() else "cpu"
14
- asr_pipe = pipeline(
15
- task="automatic-speech-recognition",
16
- model=ASR_MODEL_NAME,
17
- chunk_length_s=30,
18
- device=asr_device,
19
- )
20
- all_special_ids = asr_pipe.tokenizer.all_special_ids
21
- transcribe_token_id = all_special_ids[-5]
22
- translate_token_id = all_special_ids[-6]
23
 
24
  # --- VITS (TTS) Setup ---
25
  TTS_MODEL_NAME = "espnet/kan_bayashi_ljspeech_vits"
26
  tts_device = "cuda" if torch.cuda.is_available() else "cpu"
27
 
28
- # Download the ESPnet model files (using huggingface_hub)
29
- model_dir = "vits_model" # Choose a directory name
30
  if not os.path.exists(model_dir):
31
  os.makedirs(model_dir)
32
- try:
33
- download_path = snapshot_download(repo_id=TTS_MODEL_NAME, local_dir=model_dir, local_dir_use_symlinks=False)
34
- print(f"Downloaded ESPnet model to: {download_path}")
35
- except Exception as e:
36
- print(f"Error downloading model: {e}")
37
- raise
38
 
39
  # Construct *absolute* paths to the config and model files.
40
- # This is the KEY change.
41
  config_path = os.path.join(download_path, "exp/tts_train_vits_raw_phn_tacotron_g2p_en_no_space/config.yaml")
42
  model_path = os.path.join(download_path, "exp/tts_train_vits_raw_phn_tacotron_g2p_en_no_space/train.total_count.ave_10best.pth")
43
 
 
1
+ import spaces
2
  import torch
3
  import gradio as gr
4
  from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
5
  import soundfile as sf
6
  import numpy as np
7
  from espnet2.bin.tts_inference import Text2Speech
8
+ import IPython.display as ipd
9
  import os
10
  from huggingface_hub import snapshot_download
11
 
12
+ # ... (Whisper and Vicuna setup remain the same)
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  # --- VITS (TTS) Setup ---
15
  TTS_MODEL_NAME = "espnet/kan_bayashi_ljspeech_vits"
16
  tts_device = "cuda" if torch.cuda.is_available() else "cpu"
17
 
18
+ # Download the ESPnet model files and get the download path
19
+ model_dir = "vits_model"
20
  if not os.path.exists(model_dir):
21
  os.makedirs(model_dir)
22
+
23
+ download_path = snapshot_download(repo_id=TTS_MODEL_NAME, local_dir=model_dir, local_dir_use_symlinks=False)
24
+ print(f"Downloaded ESPnet model to: {download_path}") # Print the path!
 
 
 
25
 
26
  # Construct *absolute* paths to the config and model files.
 
27
  config_path = os.path.join(download_path, "exp/tts_train_vits_raw_phn_tacotron_g2p_en_no_space/config.yaml")
28
  model_path = os.path.join(download_path, "exp/tts_train_vits_raw_phn_tacotron_g2p_en_no_space/train.total_count.ave_10best.pth")
29