ccibeekeoc42 commited on
Commit
23976f4
·
verified ·
1 Parent(s): 59c6107

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -33
app.py CHANGED
@@ -1,17 +1,7 @@
1
- # Loading the ST Model (Whisper)
2
  import os
3
  import torch
4
  from transformers import pipeline
5
 
6
- device = "cuda:0" if torch.cuda.is_available() else "cpu"
7
- pipe = pipeline("automatic-speech-recognition", model="okezieowen/whisper-small-multilingual-naija-11-03-2024", device=device)
8
-
9
- # Take audio and return translated text
10
- def transcribe(audio):
11
- outputs = pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe"})
12
- return outputs["text"]
13
-
14
-
15
  # The LLM Model
16
  from huggingface_hub import HfFolder
17
  from openai import OpenAI
@@ -29,35 +19,77 @@ client = OpenAI(
29
  )
30
 
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  def generate_llm_response(text, model_id="ccibeekeoc42/Llama3.1-8b-base-SFT-2024-11-09"):
33
- """Generates LLM response for given text with streaming support"""
34
  full_response = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- # Create streaming response
37
- chat_completion = client.chat.completions.create(
38
- model="tgi",
39
- messages=[
40
- {"role": "system", "content": "You are a BRIEF AND DIRECT assistant. A part of a speech pipeline so keep your responces short, fluent, and straight to the point. Avoid markdown in responses"},
41
- {"role": "user", "content": text}
42
- ],
43
- top_p=None,
44
- temperature=None,
45
- max_tokens=75,
46
- stream=True,
47
- seed=None,
48
- stop=None,
49
- frequency_penalty=None,
50
- presence_penalty=None
51
- )
52
- # Collect streamed response chunks
53
- for chunk in chat_completion:
54
- if chunk.choices[0].delta.content:
55
- full_response.append(chunk.choices[0].delta.content)
56
-
57
- return "".join(full_response)
58
 
59
  generate_llm_response("Explain Deep Learning in Igbo")
60
 
 
 
 
 
 
 
 
 
 
 
 
61
  # Helper Functions to Cleanup LLM Texts
62
  # Replacement rules
63
  import re
 
 
1
  import os
2
  import torch
3
  from transformers import pipeline
4
 
 
 
 
 
 
 
 
 
 
5
  # The LLM Model
6
  from huggingface_hub import HfFolder
7
  from openai import OpenAI
 
19
  )
20
 
21
 
22
+ # def generate_llm_response(text, model_id="ccibeekeoc42/Llama3.1-8b-base-SFT-2024-11-09"):
23
+ # """Generates LLM response for given text with streaming support"""
24
+ # full_response = []
25
+
26
+ # # Create streaming response
27
+ # chat_completion = client.chat.completions.create(
28
+ # model="tgi",
29
+ # messages=[
30
+ # {"role": "system", "content": "You are a BRIEF AND DIRECT assistant. A part of a speech pipeline so keep your responces short, fluent, and straight to the point. Avoid markdown in responses"},
31
+ # {"role": "user", "content": text}
32
+ # ],
33
+ # top_p=None,
34
+ # temperature=None,
35
+ # max_tokens=75,
36
+ # stream=True,
37
+ # seed=None,
38
+ # stop=None,
39
+ # frequency_penalty=None,
40
+ # presence_penalty=None
41
+ # )
42
+ # # Collect streamed response chunks
43
+ # for chunk in chat_completion:
44
+ # if chunk.choices[0].delta.content:
45
+ # full_response.append(chunk.choices[0].delta.content)
46
+
47
+ # return "".join(full_response)
48
+
49
+ import openai
50
+
51
  def generate_llm_response(text, model_id="ccibeekeoc42/Llama3.1-8b-base-SFT-2024-11-09"):
52
+ """Generates LLM response for given text with streaming support, handling GPU cold-start errors."""
53
  full_response = []
54
+ try:
55
+ chat_completion = client.chat.completions.create(
56
+ model="tgi",
57
+ messages=[
58
+ {"role": "system", "content": "You are a BRIEF AND DIRECT assistant. A part of a speech pipeline so keep your responses short, fluent, and straight to the point. Avoid markdown in responses."},
59
+ {"role": "user", "content": text}
60
+ ],
61
+ top_p=None,
62
+ temperature=None,
63
+ max_tokens=75,
64
+ stream=True,
65
+ seed=None,
66
+ stop=None,
67
+ frequency_penalty=None,
68
+ presence_penalty=None
69
+ )
70
+ # Collect streamed response chunks
71
+ for chunk in chat_completion:
72
+ if chunk.choices[0].delta.content:
73
+ full_response.append(chunk.choices[0].delta.content)
74
+ return "".join(full_response)
75
+ except openai.error.InternalServerError as e:
76
+ # If the error is due to the GPU scaling down, inform the user accordingly.
77
+ return "The GPU is currently booting up. Please wait about 10 minutes and try again."
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
  generate_llm_response("Explain Deep Learning in Igbo")
81
 
82
+
83
+ # Loading the ST Model (Whisper)
84
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
85
+ pipe = pipeline("automatic-speech-recognition", model="okezieowen/whisper-small-multilingual-naija-11-03-2024", device=device)
86
+
87
+ # Take audio and return translated text
88
+ def transcribe(audio):
89
+ outputs = pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe"})
90
+ return outputs["text"]
91
+
92
+
93
  # Helper Functions to Cleanup LLM Texts
94
  # Replacement rules
95
  import re