freeCS-dot-org commited on
Commit
559ab3f
·
verified ·
1 Parent(s): d95f796

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -130
app.py CHANGED
@@ -9,61 +9,39 @@ from threading import Thread
9
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
10
  MODEL = "AGI-0/Art-v0-3B"
11
 
12
- TITLE = """<h2>Link to the model: <a href="https://huggingface.co/AGI-0/Art-v0-3B">click here</a></h2>"""
13
-
14
- PLACEHOLDER = """
15
- <center>
16
- <p>Hi! How can I help you today?</p>
17
- </center>
18
- """
19
-
20
- CSS = """
21
- .duplicate-button {
22
- margin: auto !important;
23
- color: white !important;
24
- background: black !important;
25
- border-radius: 100vh !important;
26
- }
27
- h3 {
28
- text-align: center;
29
- }
30
- """
31
 
32
  class ConversationManager:
33
  def __init__(self):
34
- self.user_history = [] # For displaying to user (with markdown formatting)
35
- self.model_history = [] # For feeding back to model (with original tags)
36
 
37
- def add_exchange(self, user_message, assistant_response, formatted_response):
38
- self.model_history.append((user_message, assistant_response))
 
39
  self.user_history.append((user_message, formatted_response))
40
 
41
- def get_model_history(self):
42
- return self.model_history
 
 
 
 
 
43
 
44
  def get_user_history(self):
45
  return self.user_history
46
 
47
- conversation_manager = ConversationManager()
48
-
49
- device = "cuda" # Use "cpu" if no GPU available
50
-
51
- tokenizer = AutoTokenizer.from_pretrained(MODEL)
52
- model = AutoModelForCausalLM.from_pretrained(
53
- MODEL,
54
- torch_dtype=torch.bfloat16,
55
- device_map="auto"
56
- )
57
- end_of_sentence = tokenizer.convert_tokens_to_ids("<|im_end|>")
58
 
59
- def format_response(response):
60
- """Format the response for user display."""
61
- if "<|end_reasoning|>" in response:
62
- parts = response.split("<|end_reasoning|>")
63
- reasoning = parts[0]
64
- rest = parts[1] if len(parts) > 1 else ""
65
- return f"<details><summary>Click to see reasoning</summary>\n\n{reasoning}\n\n</details>\n\n{rest}"
66
- return response
67
 
68
  @spaces.GPU()
69
  def stream_chat(
@@ -76,37 +54,23 @@ def stream_chat(
76
  top_k: int = 1,
77
  penalty: float = 1.1,
78
  ):
79
- print(f'\nNew Chat Request:')
80
- print(f'Message: {message}')
81
- print(f'History from UI: {history}')
82
- print(f'System Prompt: {system_prompt}')
83
- print(f'Parameters: temp={temperature}, max_tokens={max_new_tokens}, top_p={top_p}, top_k={top_k}, penalty={penalty}')
84
-
85
  model_history = conversation_manager.get_model_history()
86
- print(f'Model History Before: {model_history}')
87
-
88
  conversation = []
89
  for prompt, answer in model_history:
90
  conversation.extend([
91
  {"role": "user", "content": prompt},
92
  {"role": "assistant", "content": answer},
93
  ])
94
-
95
  conversation.append({"role": "user", "content": message})
96
- print(f'\nFormatted Conversation for Model:')
97
- print(conversation)
98
 
99
  input_ids = tokenizer.apply_chat_template(
100
- conversation,
101
- add_generation_prompt=True,
102
- return_tensors="pt"
103
  ).to(model.device)
104
 
105
  streamer = TextIteratorStreamer(
106
- tokenizer,
107
- timeout=60.0,
108
- skip_prompt=True,
109
- skip_special_tokens=True
110
  )
111
 
112
  generate_kwargs = dict(
@@ -131,88 +95,37 @@ def stream_chat(
131
  for new_text in streamer:
132
  buffer += new_text
133
  original_response += new_text
134
- formatted_buffer = format_response(buffer)
135
- yield formatted_buffer
136
 
137
- conversation_manager.add_exchange(
138
- message,
139
- original_response, # Store original for model
140
- format_response(original_response) # Store formatted for user
141
- )
142
-
143
- print(f'Model History After: {conversation_manager.get_model_history()}')
144
 
145
- chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
146
 
147
- with gr.Blocks(css=CSS, theme="soft") as demo:
148
- gr.HTML(TITLE)
149
- gr.DuplicateButton(
150
- value="Duplicate Space for private use",
151
- elem_classes="duplicate-button"
152
- )
153
  gr.ChatInterface(
154
  fn=stream_chat,
155
  chatbot=chatbot,
156
  fill_height=True,
157
- additional_inputs_accordion=gr.Accordion(
158
- label="⚙️ Parameters",
159
- open=False,
160
- render=False
161
- ),
162
  additional_inputs=[
163
- gr.Textbox(
164
- value="",
165
- label="System Prompt",
166
- render=False,
167
- ),
168
- gr.Slider(
169
- minimum=0,
170
- maximum=1,
171
- step=0.1,
172
- value=0.2,
173
- label="Temperature",
174
- render=False,
175
- ),
176
- gr.Slider(
177
- minimum=128,
178
- maximum=8192,
179
- step=1,
180
- value=4096,
181
- label="Max new tokens",
182
- render=False,
183
- ),
184
- gr.Slider(
185
- minimum=0.0,
186
- maximum=1.0,
187
- step=0.1,
188
- value=1.0,
189
- label="top_p",
190
- render=False,
191
- ),
192
- gr.Slider(
193
- minimum=1,
194
- maximum=50,
195
- step=1,
196
- value=1,
197
- label="top_k",
198
- render=False,
199
- ),
200
- gr.Slider(
201
- minimum=0.0,
202
- maximum=2.0,
203
- step=0.1,
204
- value=1.1,
205
- label="Repetition penalty",
206
- render=False,
207
- ),
208
  ],
209
  examples=[
210
  ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],
211
- ["What are 5 creative things I could do with my kids' art?"],
212
  ["Tell me a random fun fact about the Roman Empire."],
213
  ["Show me a code snippet of a website's sticky header in CSS and JavaScript."],
214
  ],
215
  cache_examples=False,
216
  )
217
 
218
- demo.launch()
 
 
9
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
10
  MODEL = "AGI-0/Art-v0-3B"
11
 
12
+ device = "cuda" # Use "cpu" if no GPU available
13
+
14
+ tokenizer = AutoTokenizer.from_pretrained(MODEL)
15
+ model = AutoModelForCausalLM.from_pretrained(
16
+ MODEL, torch_dtype=torch.bfloat16, device_map="auto"
17
+ )
18
+ end_of_sentence = tokenizer.convert_tokens_to_ids("<|im_end|>")
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  class ConversationManager:
21
  def __init__(self):
22
+ self.user_history = [] # User-facing history with formatting
23
+ self.model_history = [] # Model-facing history without formatting
24
 
25
+ def add_exchange(self, user_message, model_response):
26
+ formatted_response = self.format_response(model_response)
27
+ self.model_history.append((user_message, model_response))
28
  self.user_history.append((user_message, formatted_response))
29
 
30
+ def format_response(self, response):
31
+ """Format response for UI while keeping raw text for model."""
32
+ if "<|end_reasoning|>" in response:
33
+ parts = response.split("<|end_reasoning|>")
34
+ reasoning, rest = parts[0], parts[1] if len(parts) > 1 else ""
35
+ return f"<details><summary>Click to see reasoning</summary>\n\n{reasoning}\n\n</details>\n\n{rest}"
36
+ return response
37
 
38
  def get_user_history(self):
39
  return self.user_history
40
 
41
+ def get_model_history(self):
42
+ return self.model_history
 
 
 
 
 
 
 
 
 
43
 
44
+ conversation_manager = ConversationManager()
 
 
 
 
 
 
 
45
 
46
  @spaces.GPU()
47
  def stream_chat(
 
54
  top_k: int = 1,
55
  penalty: float = 1.1,
56
  ):
57
+ print(f'User Message: {message}')
58
+
 
 
 
 
59
  model_history = conversation_manager.get_model_history()
 
 
60
  conversation = []
61
  for prompt, answer in model_history:
62
  conversation.extend([
63
  {"role": "user", "content": prompt},
64
  {"role": "assistant", "content": answer},
65
  ])
 
66
  conversation.append({"role": "user", "content": message})
 
 
67
 
68
  input_ids = tokenizer.apply_chat_template(
69
+ conversation, add_generation_prompt=True, return_tensors="pt"
 
 
70
  ).to(model.device)
71
 
72
  streamer = TextIteratorStreamer(
73
+ tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True
 
 
 
74
  )
75
 
76
  generate_kwargs = dict(
 
95
  for new_text in streamer:
96
  buffer += new_text
97
  original_response += new_text
98
+ yield conversation_manager.format_response(buffer)
 
99
 
100
+ conversation_manager.add_exchange(message, original_response)
 
 
 
 
 
 
101
 
102
+ chatbot = gr.Chatbot(height=600, placeholder="<center><p>Hi! How can I help you today?</p></center>")
103
 
104
+ demo = gr.Blocks()
105
+ with demo:
106
+ gr.HTML("<h2>Link to the model: <a href='https://huggingface.co/AGI-0/Art-v0-3B'>click here</a></h2>")
107
+ gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
 
 
108
  gr.ChatInterface(
109
  fn=stream_chat,
110
  chatbot=chatbot,
111
  fill_height=True,
112
+ additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
 
 
 
 
113
  additional_inputs=[
114
+ gr.Textbox(value="", label="System Prompt", render=False),
115
+ gr.Slider(minimum=0, maximum=1, step=0.1, value=0.2, label="Temperature", render=False),
116
+ gr.Slider(minimum=128, maximum=8192, step=1, value=4096, label="Max new tokens", render=False),
117
+ gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=1.0, label="top_p", render=False),
118
+ gr.Slider(minimum=1, maximum=50, step=1, value=1, label="top_k", render=False),
119
+ gr.Slider(minimum=0.0, maximum=2.0, step=0.1, value=1.1, label="Repetition penalty", render=False),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  ],
121
  examples=[
122
  ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],
123
+ ["What are 5 creative things I could do with my kids' art? I don't want to throw them away, but it's also so much clutter."],
124
  ["Tell me a random fun fact about the Roman Empire."],
125
  ["Show me a code snippet of a website's sticky header in CSS and JavaScript."],
126
  ],
127
  cache_examples=False,
128
  )
129
 
130
+ if __name__ == "__main__":
131
+ demo.launch()