freeCS-dot-org commited on
Commit
d95f796
·
verified ·
1 Parent(s): 436bf67

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -56
app.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  import time
3
  import spaces
4
  import torch
5
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
6
  import gradio as gr
7
  from threading import Thread
8
 
@@ -31,17 +31,12 @@ h3 {
31
 
32
  class ConversationManager:
33
  def __init__(self):
34
- self.user_history = [] # For displaying to user (with markdown)
35
  self.model_history = [] # For feeding back to model (with original tags)
36
 
37
  def add_exchange(self, user_message, assistant_response, formatted_response):
38
  self.model_history.append((user_message, assistant_response))
39
  self.user_history.append((user_message, formatted_response))
40
- # Log the exchange
41
- print(f"\nModel History Exchange:")
42
- print(f"User: {user_message}")
43
- print(f"Assistant (Original): {assistant_response}")
44
- print(f"Assistant (Formatted): {formatted_response}")
45
 
46
  def get_model_history(self):
47
  return self.model_history
@@ -49,11 +44,9 @@ class ConversationManager:
49
  def get_user_history(self):
50
  return self.user_history
51
 
52
- def clear(self):
53
- self.user_history = []
54
- self.model_history = []
55
 
56
- device = "cuda" # for GPU usage or "cpu" for CPU usage
57
 
58
  tokenizer = AutoTokenizer.from_pretrained(MODEL)
59
  model = AutoModelForCausalLM.from_pretrained(
@@ -64,7 +57,7 @@ model = AutoModelForCausalLM.from_pretrained(
64
  end_of_sentence = tokenizer.convert_tokens_to_ids("<|im_end|>")
65
 
66
  def format_response(response):
67
- """Format the response for user display"""
68
  if "<|end_reasoning|>" in response:
69
  parts = response.split("<|end_reasoning|>")
70
  reasoning = parts[0]
@@ -75,7 +68,7 @@ def format_response(response):
75
  @spaces.GPU()
76
  def stream_chat(
77
  message: str,
78
- history_state: gr.State, # Access the internal history state
79
  system_prompt: str,
80
  temperature: float = 0.2,
81
  max_new_tokens: int = 4096,
@@ -83,16 +76,14 @@ def stream_chat(
83
  top_k: int = 1,
84
  penalty: float = 1.1,
85
  ):
86
- conversation_manager = history_state
87
-
88
  print(f'\nNew Chat Request:')
89
  print(f'Message: {message}')
90
- print(f'History from UI: {conversation_manager.get_user_history()}')
91
  print(f'System Prompt: {system_prompt}')
92
  print(f'Parameters: temp={temperature}, max_tokens={max_new_tokens}, top_p={top_p}, top_k={top_k}, penalty={penalty}')
93
 
94
  model_history = conversation_manager.get_model_history()
95
- print(f'Model History: {model_history}')
96
 
97
  conversation = []
98
  for prompt, answer in model_history:
@@ -106,15 +97,15 @@ def stream_chat(
106
  print(conversation)
107
 
108
  input_ids = tokenizer.apply_chat_template(
109
- conversation,
110
- add_generation_prompt=True,
111
  return_tensors="pt"
112
  ).to(model.device)
113
 
114
  streamer = TextIteratorStreamer(
115
- tokenizer,
116
- timeout=60.0,
117
- skip_prompt=True,
118
  skip_special_tokens=True
119
  )
120
 
@@ -140,28 +131,16 @@ def stream_chat(
140
  for new_text in streamer:
141
  buffer += new_text
142
  original_response += new_text
143
-
144
  formatted_buffer = format_response(buffer)
 
145
 
146
- if thread.is_alive() is False:
147
- print(f'\nGeneration Complete:')
148
- print(f'Original Response: {original_response}')
149
- print(f'Formatted Response: {formatted_buffer}')
150
-
151
- conversation_manager.add_exchange(
152
- message,
153
- original_response, # Original for model
154
- formatted_buffer # Formatted for user
155
- )
156
-
157
- yield formatted_buffer, conversation_manager
158
-
159
- def clear_chat(history_state: gr.State):
160
- history_state.clear()
161
- return None, history_state
162
 
163
- # Initialize the conversation manager outside of the function
164
- conversation_manager = ConversationManager()
165
 
166
  chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
167
 
@@ -171,17 +150,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
171
  value="Duplicate Space for private use",
172
  elem_classes="duplicate-button"
173
  )
174
-
175
- # Pass the initial state to the ChatInterface
176
- history_state = gr.State(conversation_manager)
177
-
178
- clear_inputs_button = gr.ClearButton(
179
- value="Clear Chat",
180
- components=[chatbot],
181
- )
182
- clear_inputs_button.click(fn=clear_chat, inputs=[history_state], outputs=[chatbot, history_state])
183
-
184
- chat_interface = gr.ChatInterface(
185
  fn=stream_chat,
186
  chatbot=chatbot,
187
  fill_height=True,
@@ -191,7 +160,6 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
191
  render=False
192
  ),
193
  additional_inputs=[
194
- history_state, # Pass the state to the ChatInterface
195
  gr.Textbox(
196
  value="",
197
  label="System Prompt",
@@ -240,12 +208,11 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
240
  ],
241
  examples=[
242
  ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],
243
- ["What are 5 creative things I could do with my kids' art? I don't want to throw them away, but it's also so much clutter."],
244
  ["Tell me a random fun fact about the Roman Empire."],
245
  ["Show me a code snippet of a website's sticky header in CSS and JavaScript."],
246
  ],
247
  cache_examples=False,
248
  )
249
 
250
- if __name__ == "__main__":
251
- demo.launch()
 
2
  import time
3
  import spaces
4
  import torch
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
6
  import gradio as gr
7
  from threading import Thread
8
 
 
31
 
32
  class ConversationManager:
33
  def __init__(self):
34
+ self.user_history = [] # For displaying to user (with markdown formatting)
35
  self.model_history = [] # For feeding back to model (with original tags)
36
 
37
  def add_exchange(self, user_message, assistant_response, formatted_response):
38
  self.model_history.append((user_message, assistant_response))
39
  self.user_history.append((user_message, formatted_response))
 
 
 
 
 
40
 
41
  def get_model_history(self):
42
  return self.model_history
 
44
  def get_user_history(self):
45
  return self.user_history
46
 
47
+ conversation_manager = ConversationManager()
 
 
48
 
49
+ device = "cuda" # Use "cpu" if no GPU available
50
 
51
  tokenizer = AutoTokenizer.from_pretrained(MODEL)
52
  model = AutoModelForCausalLM.from_pretrained(
 
57
  end_of_sentence = tokenizer.convert_tokens_to_ids("<|im_end|>")
58
 
59
  def format_response(response):
60
+ """Format the response for user display."""
61
  if "<|end_reasoning|>" in response:
62
  parts = response.split("<|end_reasoning|>")
63
  reasoning = parts[0]
 
68
  @spaces.GPU()
69
  def stream_chat(
70
  message: str,
71
+ history: list,
72
  system_prompt: str,
73
  temperature: float = 0.2,
74
  max_new_tokens: int = 4096,
 
76
  top_k: int = 1,
77
  penalty: float = 1.1,
78
  ):
 
 
79
  print(f'\nNew Chat Request:')
80
  print(f'Message: {message}')
81
+ print(f'History from UI: {history}')
82
  print(f'System Prompt: {system_prompt}')
83
  print(f'Parameters: temp={temperature}, max_tokens={max_new_tokens}, top_p={top_p}, top_k={top_k}, penalty={penalty}')
84
 
85
  model_history = conversation_manager.get_model_history()
86
+ print(f'Model History Before: {model_history}')
87
 
88
  conversation = []
89
  for prompt, answer in model_history:
 
97
  print(conversation)
98
 
99
  input_ids = tokenizer.apply_chat_template(
100
+ conversation,
101
+ add_generation_prompt=True,
102
  return_tensors="pt"
103
  ).to(model.device)
104
 
105
  streamer = TextIteratorStreamer(
106
+ tokenizer,
107
+ timeout=60.0,
108
+ skip_prompt=True,
109
  skip_special_tokens=True
110
  )
111
 
 
131
  for new_text in streamer:
132
  buffer += new_text
133
  original_response += new_text
 
134
  formatted_buffer = format_response(buffer)
135
+ yield formatted_buffer
136
 
137
+ conversation_manager.add_exchange(
138
+ message,
139
+ original_response, # Store original for model
140
+ format_response(original_response) # Store formatted for user
141
+ )
 
 
 
 
 
 
 
 
 
 
 
142
 
143
+ print(f'Model History After: {conversation_manager.get_model_history()}')
 
144
 
145
  chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
146
 
 
150
  value="Duplicate Space for private use",
151
  elem_classes="duplicate-button"
152
  )
153
+ gr.ChatInterface(
 
 
 
 
 
 
 
 
 
 
154
  fn=stream_chat,
155
  chatbot=chatbot,
156
  fill_height=True,
 
160
  render=False
161
  ),
162
  additional_inputs=[
 
163
  gr.Textbox(
164
  value="",
165
  label="System Prompt",
 
208
  ],
209
  examples=[
210
  ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],
211
+ ["What are 5 creative things I could do with my kids' art?"],
212
  ["Tell me a random fun fact about the Roman Empire."],
213
  ["Show me a code snippet of a website's sticky header in CSS and JavaScript."],
214
  ],
215
  cache_examples=False,
216
  )
217
 
218
+ demo.launch()