Spaces:

ccibeekeoc42
/

Aware-Demo

Runtime error

App Files Files Community

ccibeekeoc42 commited on Feb 10

Commit

23976f4

verified ·

1 Parent(s): 59c6107

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -33

app.py CHANGED Viewed

@@ -1,17 +1,7 @@
-# Loading the ST Model (Whisper)
 import os
 import torch
 from transformers import pipeline
-device = "cuda:0" if torch.cuda.is_available() else "cpu"
-pipe = pipeline("automatic-speech-recognition", model="okezieowen/whisper-small-multilingual-naija-11-03-2024", device=device)
-# Take audio and return translated text
-def transcribe(audio):
-    outputs = pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe"})
-    return outputs["text"]
 # The LLM Model
 from huggingface_hub import HfFolder
 from openai import OpenAI
@@ -29,35 +19,77 @@ client = OpenAI(
 )
 def generate_llm_response(text, model_id="ccibeekeoc42/Llama3.1-8b-base-SFT-2024-11-09"):
-    """Generates LLM response for given text with streaming support"""
     full_response = []
-    # Create streaming response
-    chat_completion = client.chat.completions.create(
-	      model="tgi",
-	      messages=[
-           {"role": "system", "content": "You are a BRIEF AND DIRECT assistant. A part of a speech pipeline so keep your responces short, fluent, and straight to the point. Avoid markdown in responses"},
-           {"role": "user", "content": text}
-           ],
-	      top_p=None,
-	      temperature=None,
-	      max_tokens=75,
-	      stream=True,
-	      seed=None,
-	      stop=None,
-	      frequency_penalty=None,
-	      presence_penalty=None
-      )
-    # Collect streamed response chunks
-    for chunk in chat_completion:
-        if chunk.choices[0].delta.content:
-            full_response.append(chunk.choices[0].delta.content)
-    return "".join(full_response)
 generate_llm_response("Explain Deep Learning in Igbo")
 # Helper Functions to Cleanup LLM Texts
 # Replacement rules
 import re

 import os
 import torch
 from transformers import pipeline
 # The LLM Model
 from huggingface_hub import HfFolder
 from openai import OpenAI
 )
+# def generate_llm_response(text, model_id="ccibeekeoc42/Llama3.1-8b-base-SFT-2024-11-09"):
+#     """Generates LLM response for given text with streaming support"""
+#     full_response = []
+#     # Create streaming response
+#     chat_completion = client.chat.completions.create(
+# 	      model="tgi",
+# 	      messages=[
+#            {"role": "system", "content": "You are a BRIEF AND DIRECT assistant. A part of a speech pipeline so keep your responces short, fluent, and straight to the point. Avoid markdown in responses"},
+#            {"role": "user", "content": text}
+#            ],
+# 	      top_p=None,
+# 	      temperature=None,
+# 	      max_tokens=75,
+# 	      stream=True,
+# 	      seed=None,
+# 	      stop=None,
+# 	      frequency_penalty=None,
+# 	      presence_penalty=None
+#       )
+#     # Collect streamed response chunks
+#     for chunk in chat_completion:
+#         if chunk.choices[0].delta.content:
+#             full_response.append(chunk.choices[0].delta.content)
+#     return "".join(full_response)
+import openai
 def generate_llm_response(text, model_id="ccibeekeoc42/Llama3.1-8b-base-SFT-2024-11-09"):
+    """Generates LLM response for given text with streaming support, handling GPU cold-start errors."""
     full_response = []
+    try:
+        chat_completion = client.chat.completions.create(
+            model="tgi",
+            messages=[
+                {"role": "system", "content": "You are a BRIEF AND DIRECT assistant. A part of a speech pipeline so keep your responses short, fluent, and straight to the point. Avoid markdown in responses."},
+                {"role": "user", "content": text}
+            ],
+            top_p=None,
+            temperature=None,
+            max_tokens=75,
+            stream=True,
+            seed=None,
+            stop=None,
+            frequency_penalty=None,
+            presence_penalty=None
+        )
+        # Collect streamed response chunks
+        for chunk in chat_completion:
+            if chunk.choices[0].delta.content:
+                full_response.append(chunk.choices[0].delta.content)
+        return "".join(full_response)
+    except openai.error.InternalServerError as e:
+        # If the error is due to the GPU scaling down, inform the user accordingly.
+        return "The GPU is currently booting up. Please wait about 10 minutes and try again."
 generate_llm_response("Explain Deep Learning in Igbo")
+# Loading the ST Model (Whisper)
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+pipe = pipeline("automatic-speech-recognition", model="okezieowen/whisper-small-multilingual-naija-11-03-2024", device=device)
+# Take audio and return translated text
+def transcribe(audio):
+    outputs = pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe"})
+    return outputs["text"]
 # Helper Functions to Cleanup LLM Texts
 # Replacement rules
 import re