Art3B-chat

Running on Zero

App Files Files Community

freeCS-dot-org commited on Jan 21

Commit

42f0a1a

verified ·

1 Parent(s): befe84a

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -109

app.py CHANGED Viewed

@@ -10,37 +10,52 @@ HF_TOKEN = os.environ.get("HF_TOKEN", None)
 MODEL = "AGI-0/Art-v0-3B"
 TITLE = """<h2>Link to the model: <a href="https://huggingface.co/AGI-0/Art-v0-3B">click here</a></h2>"""
-PLACEHOLDER = """
-<center>
-<p>Hi! How can I help you today?</p>
-</center>
-"""
-CSS = """
-.duplicate-button {
-    margin: auto !important;
-    color: white !important;
-    background: black !important;
-    border-radius: 100vh !important;
-}
-h3 {
-    text-align: center;
-}
-"""
 class ConversationManager:
     def __init__(self):
         self.user_history = []  # For displaying to user (with markdown)
-        self.model_history = []  # For feeding back to model (with original tags)
-    def add_exchange(self, user_message, assistant_response, formatted_response):
-        self.model_history.append((user_message, assistant_response))
-        self.user_history.append((user_message, formatted_response))
         print(f"\nModel History Exchange:")
         print(f"User: {user_message}")
-        print(f"Assistant (Original): {assistant_response}")
-        print(f"Assistant (Formatted): {formatted_response}")
     def get_model_history(self):
         return self.model_history
@@ -50,8 +65,8 @@ class ConversationManager:
 conversation_manager = ConversationManager()
-device = "cuda"  # for GPU usage or "cpu" for CPU usage
 tokenizer = AutoTokenizer.from_pretrained(MODEL)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL,
@@ -60,15 +75,6 @@ model = AutoModelForCausalLM.from_pretrained(
 )
 end_of_sentence = tokenizer.convert_tokens_to_ids("<|im_end|>")
-def format_response(response):
-    """Format the response for user display"""
-    if "<|end_reasoning|>" in response:
-        parts = response.split("<|end_reasoning|>")
-        reasoning = parts[0]
-        rest = parts[1] if len(parts) > 1 else ""
-        return f"<details><summary>Click to see reasoning</summary>\n\n{reasoning}\n\n</details>\n\n{rest}"
-    return response
 @spaces.GPU()
 def stream_chat(
     message: str,
@@ -83,29 +89,15 @@ def stream_chat(
     print(f'\nNew Chat Request:')
     print(f'Message: {message}')
     print(f'History from UI: {history}')
-    print(f'System Prompt: {system_prompt}')
-    print(f'Parameters: temp={temperature}, max_tokens={max_new_tokens}, top_p={top_p}, top_k={top_k}, penalty={penalty}')
-    # Build conversation from UI history instead of model_history
     conversation = []
-    for prompt, answer in (history or []):
-        # Extract original response if it's in the details format
-        if "<details>" in answer:
-            # Extract content between <details> tags and after </details>
-            parts = answer.split("</details>")
-            if len(parts) > 1:
-                # Get the content after the </details> tag
-                answer_content = parts[1].strip()
-                # Get the reasoning part
-                reasoning = answer.split("<summary>")[1].split("</summary>")[1].strip()
-                # Reconstruct the original format
-                answer = f"{reasoning}<|end_reasoning|>{answer_content}"
-            else:
-                # If no </details> tag found, use the answer as is
-                answer = answer
         conversation.extend([
             {"role": "user", "content": prompt},
-            {"role": "assistant", "content": answer},
         ])
     conversation.append({"role": "user", "content": message})
@@ -138,7 +130,7 @@ def stream_chat(
     )
     buffer = ""
-    original_response = ""
     with torch.no_grad():
         thread = Thread(target=model.generate, kwargs=generate_kwargs)
@@ -146,20 +138,14 @@ def stream_chat(
         for new_text in streamer:
             buffer += new_text
-            original_response += new_text
-            formatted_buffer = format_response(buffer)
             if thread.is_alive() is False:
-                print(f'\nGeneration Complete:')
-                print(f'Original Response: {original_response}')
-                print(f'Formatted Response: {formatted_buffer}')
-                conversation_manager.add_exchange(
-                    message,
-                    original_response,  # Original for model
-                    formatted_buffer    # Formatted for user
-                )
             yield formatted_buffer
@@ -181,51 +167,12 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
             render=False
         ),
         additional_inputs=[
-            gr.Textbox(
-                value="",
-                label="System Prompt",
-                render=False,
-            ),
-            gr.Slider(
-                minimum=0,
-                maximum=1,
-                step=0.1,
-                value=0.2,
-                label="Temperature",
-                render=False,
-            ),
-            gr.Slider(
-                minimum=128,
-                maximum=8192,
-                step=1,
-                value=4096,
-                label="Max new tokens",
-                render=False,
-            ),
-            gr.Slider(
-                minimum=0.0,
-                maximum=1.0,
-                step=0.1,
-                value=1.0,
-                label="top_p",
-                render=False,
-            ),
-            gr.Slider(
-                minimum=1,
-                maximum=50,
-                step=1,
-                value=1,
-                label="top_k",
-                render=False,
-            ),
-            gr.Slider(
-                minimum=0.0,
-                maximum=2.0,
-                step=0.1,
-                value=1.1,
-                label="Repetition penalty",
-                render=False,
-            ),
         ],
         examples=[
             ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],

 MODEL = "AGI-0/Art-v0-3B"
 TITLE = """<h2>Link to the model: <a href="https://huggingface.co/AGI-0/Art-v0-3B">click here</a></h2>"""
+PLACEHOLDER = """<center><p>Hi! How can I help you today?</p></center>"""
+CSS = """.duplicate-button { margin: auto !important; color: white !important; background: black !important; border-radius: 100vh !important; } h3 { text-align: center; }"""
+def model_to_user_format(response):
+    """Convert model format (with reasoning tags) to user format (with markdown)"""
+    if "<|end_reasoning|>" in response:
+        # Split at the end reasoning tag
+        reasoning, content = response.split("<|end_reasoning|>")
+        # Remove start reasoning tag if present
+        reasoning = reasoning.replace("<|start_reasoning|>", "").strip()
+        # Format in markdown
+        return f"<details><summary>Click to see reasoning</summary>\n\n{reasoning}\n\n</details>\n\n{content.strip()}"
+    return response
+def user_to_model_format(formatted_response):
+    """Convert user format (with markdown) to model format (with reasoning tags)"""
+    if "<details>" in formatted_response:
+        # Split into parts
+        parts = formatted_response.split("<details>")
+        if len(parts) > 1:
+            # Get the content between summary tags and details closing tag
+            details_content = parts[1].split("</details>")
+            if len(details_content) > 1:
+                reasoning = details_content[0].split("</summary>")[1].strip()
+                main_content = details_content[1].strip()
+                # Reconstruct with proper tags
+                return f"<|start_reasoning|>{reasoning}<|end_reasoning|>{main_content}"
+    return formatted_response
 class ConversationManager:
     def __init__(self):
         self.user_history = []  # For displaying to user (with markdown)
+        self.model_history = []  # For feeding back to model (with tags)
+    def add_exchange(self, user_message, model_response):
+        """Add a new exchange using model format and convert as needed"""
+        # Store original model format for model history
+        self.model_history.append((user_message, model_response))
+        # Convert to user format for display
+        user_format = model_to_user_format(model_response)
+        self.user_history.append((user_message, user_format))
+        # Log the exchange
         print(f"\nModel History Exchange:")
         print(f"User: {user_message}")
+        print(f"Assistant (Model Format): {model_response}")
+        print(f"Assistant (User Format): {user_format}")
     def get_model_history(self):
         return self.model_history
 conversation_manager = ConversationManager()
+# Model initialization
+device = "cuda"
 tokenizer = AutoTokenizer.from_pretrained(MODEL)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL,
 )
 end_of_sentence = tokenizer.convert_tokens_to_ids("<|im_end|>")
 @spaces.GPU()
 def stream_chat(
     message: str,
     print(f'\nNew Chat Request:')
     print(f'Message: {message}')
     print(f'History from UI: {history}')
+    # Build conversation from UI history
     conversation = []
+    for prompt, formatted_answer in (history or []):
+        # Convert the UI formatted answer back to model format
+        model_format = user_to_model_format(formatted_answer)
         conversation.extend([
             {"role": "user", "content": prompt},
+            {"role": "assistant", "content": model_format},
         ])
     conversation.append({"role": "user", "content": message})
     )
     buffer = ""
+    model_response = ""
     with torch.no_grad():
         thread = Thread(target=model.generate, kwargs=generate_kwargs)
         for new_text in streamer:
             buffer += new_text
+            model_response += new_text
+            # Convert to user format for display
+            formatted_buffer = model_to_user_format(buffer)
             if thread.is_alive() is False:
+                # Store both formats
+                conversation_manager.add_exchange(message, model_response)
             yield formatted_buffer
             render=False
         ),
         additional_inputs=[
+            gr.Textbox(value="", label="System Prompt", render=False),
+            gr.Slider(minimum=0, maximum=1, step=0.1, value=0.2, label="Temperature", render=False),
+            gr.Slider(minimum=128, maximum=8192, step=1, value=4096, label="Max new tokens", render=False),
+            gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=1.0, label="top_p", render=False),
+            gr.Slider(minimum=1, maximum=50, step=1, value=1, label="top_k", render=False),
+            gr.Slider(minimum=0.0, maximum=2.0, step=0.1, value=1.1, label="Repetition penalty", render=False),
         ],
         examples=[
             ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],