ford442 commited on
Commit
d70f358
·
verified ·
1 Parent(s): 4b1b54b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -4
app.py CHANGED
@@ -2,12 +2,22 @@ import spaces
2
  import torch
3
  import gradio as gr
4
  from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, AutoModel
5
- #import soundfile as sf # Removed: Not directly used for outputting audio to Gradio
6
  import numpy as np
7
  from espnet2.bin.tts_inference import Text2Speech
8
  import yaml # Import yaml for config loading (though not used in the current code, kept for potential future use)
9
  import os # Kept for potential future use (e.g., if loading config from files)
10
  import requests # Corrected: Import the 'requests' library
 
 
 
 
 
 
 
 
 
 
 
11
 
12
 
13
  # Load Whisper model
@@ -25,8 +35,7 @@ translate_token_id = all_special_ids[-6]
25
 
26
  def _preload_and_load_models():
27
  global vicuna_tokenizer, vicuna_model
28
- #VICUNA_MODEL_NAME = "EleutherAI/gpt-neo-2.7B" # Or another model
29
- VICUNA_MODEL_NAME = "lmsys/vicuna-13b-v1.5" # Or another model
30
  vicuna_tokenizer = AutoTokenizer.from_pretrained(VICUNA_MODEL_NAME)
31
  vicuna_model = AutoModelForCausalLM.from_pretrained(
32
  VICUNA_MODEL_NAME,
@@ -52,7 +61,7 @@ def process_audio(microphone, state, task="transcribe"):
52
  prompt = f"{system_prompt}\nUser: {text}"
53
  with torch.no_grad():
54
  vicuna_input = vicuna_tokenizer(prompt, return_tensors="pt").to('cuda')
55
- vicuna_output = vicuna_model.generate(**vicuna_input, max_new_tokens=96)
56
  vicuna_response = vicuna_tokenizer.decode(vicuna_output[0], skip_special_tokens=True) # Access the first sequence [0]
57
  vicuna_response = vicuna_response.replace(prompt, "").strip()
58
  updated_state = state + "\nUser: " + text + "\n" + "Tutor: " + vicuna_response # Include user input in state
 
2
  import torch
3
  import gradio as gr
4
  from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, AutoModel
 
5
  import numpy as np
6
  from espnet2.bin.tts_inference import Text2Speech
7
  import yaml # Import yaml for config loading (though not used in the current code, kept for potential future use)
8
  import os # Kept for potential future use (e.g., if loading config from files)
9
  import requests # Corrected: Import the 'requests' library
10
+ import nltk # Import nltk
11
+
12
+ # Download required NLTK resources
13
+ try:
14
+ nltk.data.find('taggers/averaged_perceptron_tagger_eng')
15
+ except LookupError:
16
+ nltk.download('averaged_perceptron_tagger_eng')
17
+ try:
18
+ nltk.data.find('corpora/cmudict') # Check for cmudict
19
+ except LookupError:
20
+ nltk.download('cmudict')
21
 
22
 
23
  # Load Whisper model
 
35
 
36
  def _preload_and_load_models():
37
  global vicuna_tokenizer, vicuna_model
38
+ VICUNA_MODEL_NAME = "EleutherAI/gpt-neo-2.7B" # Or another model
 
39
  vicuna_tokenizer = AutoTokenizer.from_pretrained(VICUNA_MODEL_NAME)
40
  vicuna_model = AutoModelForCausalLM.from_pretrained(
41
  VICUNA_MODEL_NAME,
 
61
  prompt = f"{system_prompt}\nUser: {text}"
62
  with torch.no_grad():
63
  vicuna_input = vicuna_tokenizer(prompt, return_tensors="pt").to('cuda')
64
+ vicuna_output = vicuna_model.generate(**vicuna_input, max_new_tokens=192)
65
  vicuna_response = vicuna_tokenizer.decode(vicuna_output[0], skip_special_tokens=True) # Access the first sequence [0]
66
  vicuna_response = vicuna_response.replace(prompt, "").strip()
67
  updated_state = state + "\nUser: " + text + "\n" + "Tutor: " + vicuna_response # Include user input in state