Art3B-chat

Running on Zero

App Files Files Community

freeCS-dot-org commited on Jan 19

Commit

f80f6ce

verified ·

1 Parent(s): 3bce535

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -30

app.py CHANGED Viewed

@@ -37,7 +37,6 @@ model = AutoModelForCausalLM.from_pretrained(
     torch_dtype=torch.bfloat16,
     device_map="auto")
 end_of_sentence = tokenizer.convert_tokens_to_ids("<|im_end|>")
-end_reasoning_token = "<|end_reasoning|>"
 @spaces.GPU()
 def stream_chat(
@@ -83,41 +82,23 @@ def stream_chat(
         thread.start()
     buffer = ""
-    reasoning_text = ""
-    final_text = ""
-    in_reasoning = True
     for new_text in streamer:
         buffer += new_text
-        if end_reasoning_token in buffer and in_reasoning:
-            # Split the buffer at the end_reasoning_token
-            parts = buffer.split(end_reasoning_token)
-            reasoning_text = parts[0]
-            final_text = parts[1] if len(parts) > 1 else ""
-            # Format the output with the details tag
-            formatted_output = (
-                "<details><summary>Click to see reasoning</summary>\n\n"
-                f"{reasoning_text}\n\n"
-                "</details>\n\n"
-                f"{final_text}"
-            )
-            in_reasoning = False
-            yield formatted_output
-        elif in_reasoning:
-            # Still collecting reasoning text
-            yield "<details><summary>Click to see reasoning</summary>\n\n" + buffer + "\n\n</details>"
-        else:
-            # After end_reasoning_token, just append to the existing formatted output
-            formatted_output = (
-                "<details><summary>Click to see reasoning</summary>\n\n"
-                f"{reasoning_text}\n\n"
-                "</details>\n\n"
-                f"{buffer}"
-            )
-            yield formatted_output
 chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
 with gr.Blocks(css=CSS, theme="soft") as demo:

     torch_dtype=torch.bfloat16,
     device_map="auto")
 end_of_sentence = tokenizer.convert_tokens_to_ids("<|im_end|>")
 @spaces.GPU()
 def stream_chat(
         thread.start()
     buffer = ""
+    found_token = False
     for new_text in streamer:
         buffer += new_text
+        if "<|end_reasoning|>" in buffer and not found_token:
+            # Split at the token
+            parts = buffer.split("<|end_reasoning|>")
+            reasoning = parts[0]
+            rest = parts[1] if len(parts) > 1 else ""
+            # Format with markdown and continue
+            buffer = f"<details><summary>Click to see reasoning</summary>\n\n{reasoning}\n\n</details>\n\n{rest}"
+            found_token = True
+        yield buffer
 chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
 with gr.Blocks(css=CSS, theme="soft") as demo: