Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -37,7 +37,6 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
37 |
torch_dtype=torch.bfloat16,
|
38 |
device_map="auto")
|
39 |
end_of_sentence = tokenizer.convert_tokens_to_ids("<|im_end|>")
|
40 |
-
end_reasoning_token = "<|end_reasoning|>"
|
41 |
|
42 |
@spaces.GPU()
|
43 |
def stream_chat(
|
@@ -83,41 +82,23 @@ def stream_chat(
|
|
83 |
thread.start()
|
84 |
|
85 |
buffer = ""
|
86 |
-
|
87 |
-
final_text = ""
|
88 |
-
in_reasoning = True
|
89 |
|
90 |
for new_text in streamer:
|
91 |
buffer += new_text
|
92 |
|
93 |
-
if
|
94 |
-
# Split
|
95 |
-
parts = buffer.split(
|
96 |
-
|
97 |
-
|
98 |
|
99 |
-
# Format
|
100 |
-
|
101 |
-
|
102 |
-
f"{reasoning_text}\n\n"
|
103 |
-
"</details>\n\n"
|
104 |
-
f"{final_text}"
|
105 |
-
)
|
106 |
-
in_reasoning = False
|
107 |
-
yield formatted_output
|
108 |
-
elif in_reasoning:
|
109 |
-
# Still collecting reasoning text
|
110 |
-
yield "<details><summary>Click to see reasoning</summary>\n\n" + buffer + "\n\n</details>"
|
111 |
-
else:
|
112 |
-
# After end_reasoning_token, just append to the existing formatted output
|
113 |
-
formatted_output = (
|
114 |
-
"<details><summary>Click to see reasoning</summary>\n\n"
|
115 |
-
f"{reasoning_text}\n\n"
|
116 |
-
"</details>\n\n"
|
117 |
-
f"{buffer}"
|
118 |
-
)
|
119 |
-
yield formatted_output
|
120 |
|
|
|
|
|
121 |
chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
|
122 |
|
123 |
with gr.Blocks(css=CSS, theme="soft") as demo:
|
|
|
37 |
torch_dtype=torch.bfloat16,
|
38 |
device_map="auto")
|
39 |
end_of_sentence = tokenizer.convert_tokens_to_ids("<|im_end|>")
|
|
|
40 |
|
41 |
@spaces.GPU()
|
42 |
def stream_chat(
|
|
|
82 |
thread.start()
|
83 |
|
84 |
buffer = ""
|
85 |
+
found_token = False
|
|
|
|
|
86 |
|
87 |
for new_text in streamer:
|
88 |
buffer += new_text
|
89 |
|
90 |
+
if "<|end_reasoning|>" in buffer and not found_token:
|
91 |
+
# Split at the token
|
92 |
+
parts = buffer.split("<|end_reasoning|>")
|
93 |
+
reasoning = parts[0]
|
94 |
+
rest = parts[1] if len(parts) > 1 else ""
|
95 |
|
96 |
+
# Format with markdown and continue
|
97 |
+
buffer = f"<details><summary>Click to see reasoning</summary>\n\n{reasoning}\n\n</details>\n\n{rest}"
|
98 |
+
found_token = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
+
yield buffer
|
101 |
+
|
102 |
chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
|
103 |
|
104 |
with gr.Blocks(css=CSS, theme="soft") as demo:
|