freeCS-dot-org commited on
Commit
42f0a1a
·
verified ·
1 Parent(s): befe84a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -109
app.py CHANGED
@@ -10,37 +10,52 @@ HF_TOKEN = os.environ.get("HF_TOKEN", None)
10
  MODEL = "AGI-0/Art-v0-3B"
11
 
12
  TITLE = """<h2>Link to the model: <a href="https://huggingface.co/AGI-0/Art-v0-3B">click here</a></h2>"""
 
 
13
 
14
- PLACEHOLDER = """
15
- <center>
16
- <p>Hi! How can I help you today?</p>
17
- </center>
18
- """
 
 
 
 
 
19
 
20
- CSS = """
21
- .duplicate-button {
22
- margin: auto !important;
23
- color: white !important;
24
- background: black !important;
25
- border-radius: 100vh !important;
26
- }
27
- h3 {
28
- text-align: center;
29
- }
30
- """
 
 
 
31
 
32
  class ConversationManager:
33
  def __init__(self):
34
  self.user_history = [] # For displaying to user (with markdown)
35
- self.model_history = [] # For feeding back to model (with original tags)
36
 
37
- def add_exchange(self, user_message, assistant_response, formatted_response):
38
- self.model_history.append((user_message, assistant_response))
39
- self.user_history.append((user_message, formatted_response))
 
 
 
 
 
40
  print(f"\nModel History Exchange:")
41
  print(f"User: {user_message}")
42
- print(f"Assistant (Original): {assistant_response}")
43
- print(f"Assistant (Formatted): {formatted_response}")
44
 
45
  def get_model_history(self):
46
  return self.model_history
@@ -50,8 +65,8 @@ class ConversationManager:
50
 
51
  conversation_manager = ConversationManager()
52
 
53
- device = "cuda" # for GPU usage or "cpu" for CPU usage
54
-
55
  tokenizer = AutoTokenizer.from_pretrained(MODEL)
56
  model = AutoModelForCausalLM.from_pretrained(
57
  MODEL,
@@ -60,15 +75,6 @@ model = AutoModelForCausalLM.from_pretrained(
60
  )
61
  end_of_sentence = tokenizer.convert_tokens_to_ids("<|im_end|>")
62
 
63
- def format_response(response):
64
- """Format the response for user display"""
65
- if "<|end_reasoning|>" in response:
66
- parts = response.split("<|end_reasoning|>")
67
- reasoning = parts[0]
68
- rest = parts[1] if len(parts) > 1 else ""
69
- return f"<details><summary>Click to see reasoning</summary>\n\n{reasoning}\n\n</details>\n\n{rest}"
70
- return response
71
-
72
  @spaces.GPU()
73
  def stream_chat(
74
  message: str,
@@ -83,29 +89,15 @@ def stream_chat(
83
  print(f'\nNew Chat Request:')
84
  print(f'Message: {message}')
85
  print(f'History from UI: {history}')
86
- print(f'System Prompt: {system_prompt}')
87
- print(f'Parameters: temp={temperature}, max_tokens={max_new_tokens}, top_p={top_p}, top_k={top_k}, penalty={penalty}')
88
 
89
- # Build conversation from UI history instead of model_history
90
  conversation = []
91
- for prompt, answer in (history or []):
92
- # Extract original response if it's in the details format
93
- if "<details>" in answer:
94
- # Extract content between <details> tags and after </details>
95
- parts = answer.split("</details>")
96
- if len(parts) > 1:
97
- # Get the content after the </details> tag
98
- answer_content = parts[1].strip()
99
- # Get the reasoning part
100
- reasoning = answer.split("<summary>")[1].split("</summary>")[1].strip()
101
- # Reconstruct the original format
102
- answer = f"{reasoning}<|end_reasoning|>{answer_content}"
103
- else:
104
- # If no </details> tag found, use the answer as is
105
- answer = answer
106
  conversation.extend([
107
  {"role": "user", "content": prompt},
108
- {"role": "assistant", "content": answer},
109
  ])
110
 
111
  conversation.append({"role": "user", "content": message})
@@ -138,7 +130,7 @@ def stream_chat(
138
  )
139
 
140
  buffer = ""
141
- original_response = ""
142
 
143
  with torch.no_grad():
144
  thread = Thread(target=model.generate, kwargs=generate_kwargs)
@@ -146,20 +138,14 @@ def stream_chat(
146
 
147
  for new_text in streamer:
148
  buffer += new_text
149
- original_response += new_text
150
 
151
- formatted_buffer = format_response(buffer)
 
152
 
153
  if thread.is_alive() is False:
154
- print(f'\nGeneration Complete:')
155
- print(f'Original Response: {original_response}')
156
- print(f'Formatted Response: {formatted_buffer}')
157
-
158
- conversation_manager.add_exchange(
159
- message,
160
- original_response, # Original for model
161
- formatted_buffer # Formatted for user
162
- )
163
 
164
  yield formatted_buffer
165
 
@@ -181,51 +167,12 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
181
  render=False
182
  ),
183
  additional_inputs=[
184
- gr.Textbox(
185
- value="",
186
- label="System Prompt",
187
- render=False,
188
- ),
189
- gr.Slider(
190
- minimum=0,
191
- maximum=1,
192
- step=0.1,
193
- value=0.2,
194
- label="Temperature",
195
- render=False,
196
- ),
197
- gr.Slider(
198
- minimum=128,
199
- maximum=8192,
200
- step=1,
201
- value=4096,
202
- label="Max new tokens",
203
- render=False,
204
- ),
205
- gr.Slider(
206
- minimum=0.0,
207
- maximum=1.0,
208
- step=0.1,
209
- value=1.0,
210
- label="top_p",
211
- render=False,
212
- ),
213
- gr.Slider(
214
- minimum=1,
215
- maximum=50,
216
- step=1,
217
- value=1,
218
- label="top_k",
219
- render=False,
220
- ),
221
- gr.Slider(
222
- minimum=0.0,
223
- maximum=2.0,
224
- step=0.1,
225
- value=1.1,
226
- label="Repetition penalty",
227
- render=False,
228
- ),
229
  ],
230
  examples=[
231
  ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],
 
10
  MODEL = "AGI-0/Art-v0-3B"
11
 
12
  TITLE = """<h2>Link to the model: <a href="https://huggingface.co/AGI-0/Art-v0-3B">click here</a></h2>"""
13
+ PLACEHOLDER = """<center><p>Hi! How can I help you today?</p></center>"""
14
+ CSS = """.duplicate-button { margin: auto !important; color: white !important; background: black !important; border-radius: 100vh !important; } h3 { text-align: center; }"""
15
 
16
+ def model_to_user_format(response):
17
+ """Convert model format (with reasoning tags) to user format (with markdown)"""
18
+ if "<|end_reasoning|>" in response:
19
+ # Split at the end reasoning tag
20
+ reasoning, content = response.split("<|end_reasoning|>")
21
+ # Remove start reasoning tag if present
22
+ reasoning = reasoning.replace("<|start_reasoning|>", "").strip()
23
+ # Format in markdown
24
+ return f"<details><summary>Click to see reasoning</summary>\n\n{reasoning}\n\n</details>\n\n{content.strip()}"
25
+ return response
26
 
27
+ def user_to_model_format(formatted_response):
28
+ """Convert user format (with markdown) to model format (with reasoning tags)"""
29
+ if "<details>" in formatted_response:
30
+ # Split into parts
31
+ parts = formatted_response.split("<details>")
32
+ if len(parts) > 1:
33
+ # Get the content between summary tags and details closing tag
34
+ details_content = parts[1].split("</details>")
35
+ if len(details_content) > 1:
36
+ reasoning = details_content[0].split("</summary>")[1].strip()
37
+ main_content = details_content[1].strip()
38
+ # Reconstruct with proper tags
39
+ return f"<|start_reasoning|>{reasoning}<|end_reasoning|>{main_content}"
40
+ return formatted_response
41
 
42
  class ConversationManager:
43
  def __init__(self):
44
  self.user_history = [] # For displaying to user (with markdown)
45
+ self.model_history = [] # For feeding back to model (with tags)
46
 
47
+ def add_exchange(self, user_message, model_response):
48
+ """Add a new exchange using model format and convert as needed"""
49
+ # Store original model format for model history
50
+ self.model_history.append((user_message, model_response))
51
+ # Convert to user format for display
52
+ user_format = model_to_user_format(model_response)
53
+ self.user_history.append((user_message, user_format))
54
+ # Log the exchange
55
  print(f"\nModel History Exchange:")
56
  print(f"User: {user_message}")
57
+ print(f"Assistant (Model Format): {model_response}")
58
+ print(f"Assistant (User Format): {user_format}")
59
 
60
  def get_model_history(self):
61
  return self.model_history
 
65
 
66
  conversation_manager = ConversationManager()
67
 
68
+ # Model initialization
69
+ device = "cuda"
70
  tokenizer = AutoTokenizer.from_pretrained(MODEL)
71
  model = AutoModelForCausalLM.from_pretrained(
72
  MODEL,
 
75
  )
76
  end_of_sentence = tokenizer.convert_tokens_to_ids("<|im_end|>")
77
 
 
 
 
 
 
 
 
 
 
78
  @spaces.GPU()
79
  def stream_chat(
80
  message: str,
 
89
  print(f'\nNew Chat Request:')
90
  print(f'Message: {message}')
91
  print(f'History from UI: {history}')
 
 
92
 
93
+ # Build conversation from UI history
94
  conversation = []
95
+ for prompt, formatted_answer in (history or []):
96
+ # Convert the UI formatted answer back to model format
97
+ model_format = user_to_model_format(formatted_answer)
 
 
 
 
 
 
 
 
 
 
 
 
98
  conversation.extend([
99
  {"role": "user", "content": prompt},
100
+ {"role": "assistant", "content": model_format},
101
  ])
102
 
103
  conversation.append({"role": "user", "content": message})
 
130
  )
131
 
132
  buffer = ""
133
+ model_response = ""
134
 
135
  with torch.no_grad():
136
  thread = Thread(target=model.generate, kwargs=generate_kwargs)
 
138
 
139
  for new_text in streamer:
140
  buffer += new_text
141
+ model_response += new_text
142
 
143
+ # Convert to user format for display
144
+ formatted_buffer = model_to_user_format(buffer)
145
 
146
  if thread.is_alive() is False:
147
+ # Store both formats
148
+ conversation_manager.add_exchange(message, model_response)
 
 
 
 
 
 
 
149
 
150
  yield formatted_buffer
151
 
 
167
  render=False
168
  ),
169
  additional_inputs=[
170
+ gr.Textbox(value="", label="System Prompt", render=False),
171
+ gr.Slider(minimum=0, maximum=1, step=0.1, value=0.2, label="Temperature", render=False),
172
+ gr.Slider(minimum=128, maximum=8192, step=1, value=4096, label="Max new tokens", render=False),
173
+ gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=1.0, label="top_p", render=False),
174
+ gr.Slider(minimum=1, maximum=50, step=1, value=1, label="top_k", render=False),
175
+ gr.Slider(minimum=0.0, maximum=2.0, step=0.1, value=1.1, label="Repetition penalty", render=False),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  ],
177
  examples=[
178
  ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],