Art3B-chat

Running on Zero

App Files Files Community

freeCS-dot-org commited on Jan 21

Commit

d95f796

verified ·

1 Parent(s): 436bf67

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -56

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 import time
 import spaces
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
 import gradio as gr
 from threading import Thread
@@ -31,17 +31,12 @@ h3 {
 class ConversationManager:
     def __init__(self):
-        self.user_history = []  # For displaying to user (with markdown)
         self.model_history = []  # For feeding back to model (with original tags)
     def add_exchange(self, user_message, assistant_response, formatted_response):
         self.model_history.append((user_message, assistant_response))
         self.user_history.append((user_message, formatted_response))
-        # Log the exchange
-        print(f"\nModel History Exchange:")
-        print(f"User: {user_message}")
-        print(f"Assistant (Original): {assistant_response}")
-        print(f"Assistant (Formatted): {formatted_response}")
     def get_model_history(self):
         return self.model_history
@@ -49,11 +44,9 @@ class ConversationManager:
     def get_user_history(self):
         return self.user_history
-    def clear(self):
-        self.user_history = []
-        self.model_history = []
-device = "cuda"  # for GPU usage or "cpu" for CPU usage
 tokenizer = AutoTokenizer.from_pretrained(MODEL)
 model = AutoModelForCausalLM.from_pretrained(
@@ -64,7 +57,7 @@ model = AutoModelForCausalLM.from_pretrained(
 end_of_sentence = tokenizer.convert_tokens_to_ids("<|im_end|>")
 def format_response(response):
-    """Format the response for user display"""
     if "<|end_reasoning|>" in response:
         parts = response.split("<|end_reasoning|>")
         reasoning = parts[0]
@@ -75,7 +68,7 @@ def format_response(response):
 @spaces.GPU()
 def stream_chat(
     message: str,
-    history_state: gr.State,  # Access the internal history state
     system_prompt: str,
     temperature: float = 0.2,
     max_new_tokens: int = 4096,
@@ -83,16 +76,14 @@ def stream_chat(
     top_k: int = 1,
     penalty: float = 1.1,
 ):
-    conversation_manager = history_state
     print(f'\nNew Chat Request:')
     print(f'Message: {message}')
-    print(f'History from UI: {conversation_manager.get_user_history()}')
     print(f'System Prompt: {system_prompt}')
     print(f'Parameters: temp={temperature}, max_tokens={max_new_tokens}, top_p={top_p}, top_k={top_k}, penalty={penalty}')
     model_history = conversation_manager.get_model_history()
-    print(f'Model History: {model_history}')
     conversation = []
     for prompt, answer in model_history:
@@ -106,15 +97,15 @@ def stream_chat(
     print(conversation)
     input_ids = tokenizer.apply_chat_template(
-        conversation,
-        add_generation_prompt=True,
         return_tensors="pt"
     ).to(model.device)
     streamer = TextIteratorStreamer(
-        tokenizer,
-        timeout=60.0,
-        skip_prompt=True,
         skip_special_tokens=True
     )
@@ -140,28 +131,16 @@ def stream_chat(
         for new_text in streamer:
             buffer += new_text
             original_response += new_text
             formatted_buffer = format_response(buffer)
-            if thread.is_alive() is False:
-                print(f'\nGeneration Complete:')
-                print(f'Original Response: {original_response}')
-                print(f'Formatted Response: {formatted_buffer}')
-                conversation_manager.add_exchange(
-                    message,
-                    original_response,  # Original for model
-                    formatted_buffer  # Formatted for user
-                )
-            yield formatted_buffer, conversation_manager
-def clear_chat(history_state: gr.State):
-    history_state.clear()
-    return None, history_state
-# Initialize the conversation manager outside of the function
-conversation_manager = ConversationManager()
 chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
@@ -171,17 +150,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
         value="Duplicate Space for private use",
         elem_classes="duplicate-button"
     )
-    # Pass the initial state to the ChatInterface
-    history_state = gr.State(conversation_manager)
-    clear_inputs_button = gr.ClearButton(
-        value="Clear Chat",
-        components=[chatbot],
-    )
-    clear_inputs_button.click(fn=clear_chat, inputs=[history_state], outputs=[chatbot, history_state])
-    chat_interface = gr.ChatInterface(
         fn=stream_chat,
         chatbot=chatbot,
         fill_height=True,
@@ -191,7 +160,6 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
             render=False
         ),
         additional_inputs=[
-            history_state,  # Pass the state to the ChatInterface
             gr.Textbox(
                 value="",
                 label="System Prompt",
@@ -240,12 +208,11 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
         ],
         examples=[
             ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],
-            ["What are 5 creative things I could do with my kids' art? I don't want to throw them away, but it's also so much clutter."],
             ["Tell me a random fun fact about the Roman Empire."],
             ["Show me a code snippet of a website's sticky header in CSS and JavaScript."],
         ],
         cache_examples=False,
     )
-if __name__ == "__main__":
-    demo.launch()

 import time
 import spaces
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 import gradio as gr
 from threading import Thread
 class ConversationManager:
     def __init__(self):
+        self.user_history = []  # For displaying to user (with markdown formatting)
         self.model_history = []  # For feeding back to model (with original tags)
     def add_exchange(self, user_message, assistant_response, formatted_response):
         self.model_history.append((user_message, assistant_response))
         self.user_history.append((user_message, formatted_response))
     def get_model_history(self):
         return self.model_history
     def get_user_history(self):
         return self.user_history
+conversation_manager = ConversationManager()
+device = "cuda"  # Use "cpu" if no GPU available
 tokenizer = AutoTokenizer.from_pretrained(MODEL)
 model = AutoModelForCausalLM.from_pretrained(
 end_of_sentence = tokenizer.convert_tokens_to_ids("<|im_end|>")
 def format_response(response):
+    """Format the response for user display."""
     if "<|end_reasoning|>" in response:
         parts = response.split("<|end_reasoning|>")
         reasoning = parts[0]
 @spaces.GPU()
 def stream_chat(
     message: str,
+    history: list,
     system_prompt: str,
     temperature: float = 0.2,
     max_new_tokens: int = 4096,
     top_k: int = 1,
     penalty: float = 1.1,
 ):
     print(f'\nNew Chat Request:')
     print(f'Message: {message}')
+    print(f'History from UI: {history}')
     print(f'System Prompt: {system_prompt}')
     print(f'Parameters: temp={temperature}, max_tokens={max_new_tokens}, top_p={top_p}, top_k={top_k}, penalty={penalty}')
     model_history = conversation_manager.get_model_history()
+    print(f'Model History Before: {model_history}')
     conversation = []
     for prompt, answer in model_history:
     print(conversation)
     input_ids = tokenizer.apply_chat_template(
+        conversation,
+        add_generation_prompt=True,
         return_tensors="pt"
     ).to(model.device)
     streamer = TextIteratorStreamer(
+        tokenizer,
+        timeout=60.0,
+        skip_prompt=True,
         skip_special_tokens=True
     )
         for new_text in streamer:
             buffer += new_text
             original_response += new_text
             formatted_buffer = format_response(buffer)
+            yield formatted_buffer
+    conversation_manager.add_exchange(
+        message,
+        original_response,  # Store original for model
+        format_response(original_response)  # Store formatted for user
+    )
+    print(f'Model History After: {conversation_manager.get_model_history()}')
 chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
         value="Duplicate Space for private use",
         elem_classes="duplicate-button"
     )
+    gr.ChatInterface(
         fn=stream_chat,
         chatbot=chatbot,
         fill_height=True,
             render=False
         ),
         additional_inputs=[
             gr.Textbox(
                 value="",
                 label="System Prompt",
         ],
         examples=[
             ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],
+            ["What are 5 creative things I could do with my kids' art?"],
             ["Tell me a random fun fact about the Roman Empire."],
             ["Show me a code snippet of a website's sticky header in CSS and JavaScript."],
         ],
         cache_examples=False,
     )
+demo.launch()