vicuna-clip

Sleeping

ford442 commited on Feb 11

Commit

eb936fd

verified ·

1 Parent(s): a736521

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -27,7 +27,7 @@ VICUNA_MODEL_NAME = "lmsys/vicuna-7b-v1.5"
 vicuna_tokenizer = AutoTokenizer.from_pretrained(VICUNA_MODEL_NAME)
 vicuna_model = AutoModelForCausalLM.from_pretrained(
     VICUNA_MODEL_NAME,
-    torch_dtype=torch.bfloat16,
     device_map="auto",
 )
@@ -45,14 +45,14 @@ def process_audio(microphone, state, task="transcribe"):
     prompt = f"{system_prompt}\nUser: {text}"
     with torch.no_grad():
         vicuna_input = vicuna_tokenizer(prompt, return_tensors="pt").to('cuda')
-        vicuna_output = vicuna_model.generate(**vicuna_input, max_new_tokens=256)
         vicuna_response = vicuna_tokenizer.decode(vicuna_output[0], skip_special_tokens=True)
         vicuna_response = vicuna_response.replace(prompt, "").strip()
     updated_state = state + "\n" + vicuna_response
     try:
         with torch.no_grad():
             inputs = tts_processor(vicuna_response, return_tensors="pt").to('cuda')
-            output = tts_model.generate(**inputs, do_sample=True)
         waveform_np = output[0].cpu().numpy()
         audio_output = (tts_model.generation_config.sample_rate, waveform_np)
     except Exception as e:

 vicuna_tokenizer = AutoTokenizer.from_pretrained(VICUNA_MODEL_NAME)
 vicuna_model = AutoModelForCausalLM.from_pretrained(
     VICUNA_MODEL_NAME,
+    torch_dtype=torch.float16,
     device_map="auto",
 )
     prompt = f"{system_prompt}\nUser: {text}"
     with torch.no_grad():
         vicuna_input = vicuna_tokenizer(prompt, return_tensors="pt").to('cuda')
+        vicuna_output = vicuna_model.generate(**vicuna_input, max_new_tokens=192)
         vicuna_response = vicuna_tokenizer.decode(vicuna_output[0], skip_special_tokens=True)
         vicuna_response = vicuna_response.replace(prompt, "").strip()
     updated_state = state + "\n" + vicuna_response
     try:
         with torch.no_grad():
             inputs = tts_processor(vicuna_response, return_tensors="pt").to('cuda')
+            output = tts_model.generate(**inputs, do_sample=False)
         waveform_np = output[0].cpu().numpy()
         audio_output = (tts_model.generation_config.sample_rate, waveform_np)
     except Exception as e: