Spaces:

Ali2206
/

test

Paused

App Files Files Community

Ali2206 commited on 17 days ago

Commit

c47b2de

verified ·

1 Parent(s): 41dec39

Upload 11 files

Browse files

Files changed (11) hide show

.gitattributes +3 -0
img/q1.gif +3 -0
img/q2.gif +3 -0
img/q3.gif +3 -0
pyproject.toml +2 -2
run_example.py +28 -0
run_txagent_app.py +293 -0
src/txagent/__init__.py +6 -0
src/txagent/toolrag.py +60 -0
src/txagent/txagent.py +937 -0
src/txagent/utils.py +117 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+img/q1.gif filter=lfs diff=lfs merge=lfs -text
+img/q2.gif filter=lfs diff=lfs merge=lfs -text
+img/q3.gif filter=lfs diff=lfs merge=lfs -text

img/q1.gif ADDED Viewed

Git LFS Details

SHA256: f0cbda2e1ec46defdae51233c03aee0ddea1ad1f28ad9ed79e4ea72a8f13edf9
Pointer size: 132 Bytes
Size of remote file: 7.65 MB

img/q2.gif ADDED Viewed

Git LFS Details

SHA256: a453c339ddcc333e28bc9626b287d9d6fa1554edec7b127611617bcb27b90591
Pointer size: 132 Bytes
Size of remote file: 6.31 MB

img/q3.gif ADDED Viewed

Git LFS Details

SHA256: ded44920ea272367247ac4f1a222c0a55932ad6b1173c2b14009a3ec4a79f524
Pointer size: 132 Bytes
Size of remote file: 8.83 MB

pyproject.toml CHANGED Viewed

@@ -1,3 +1,3 @@
-[build-system]
-requires = ["setuptools", "wheel"]
 build-backend = "setuptools.build_meta"

+[build-system]
+requires = ["setuptools", "wheel"]
 build-backend = "setuptools.build_meta"

run_example.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from txagent import TxAgent
+import os
+os.environ["MKL_THREADING_LAYER"] = "GNU"
+model_name = 'mims-harvard/TxAgent-T1-Llama-3.1-8B'
+rag_model_name = 'mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B'
+multiagent = False
+max_round = 20
+init_rag_num = 0
+step_rag_num = 10
+agent = TxAgent(model_name,
+                rag_model_name,
+                enable_summary=False)
+agent.init_model()
+question = "Given a 50-year-old patient experiencing severe acute pain and considering the use of the newly approved medication, Journavx, how should the dosage be adjusted considering the presence of moderate hepatic impairment?"
+response = agent.run_multistep_agent(
+    question,
+    temperature=0.3,
+    max_new_tokens=1024,
+    max_token=90240,
+    call_agent=multiagent,
+    max_round=max_round)
+print(f"\033[94m{response}\033[0m")

run_txagent_app.py ADDED Viewed

	@@ -0,0 +1,293 @@

+import random
+import datetime
+import sys
+from txagent import TxAgent
+import spaces
+import gradio as gr
+import os
+import os
+# Determine the directory where the current file is located
+current_dir = os.path.dirname(os.path.abspath(__file__))
+os.environ["MKL_THREADING_LAYER"] = "GNU"
+# Set an environment variable
+HF_TOKEN = os.environ.get("HF_TOKEN", None)
+DESCRIPTION = '''
+<div>
+<h1 style="text-align: center;">TxAgent: An AI Agent for Therapeutic Reasoning Across a Universe of Tools </h1>
+</div>
+'''
+INTRO = """
+Precision therapeutics require multimodal adaptive models that provide personalized treatment recommendations. We introduce TxAgent, an AI agent that leverages multi-step reasoning and real-time biomedical knowledge retrieval across a toolbox of 211 expert-curated tools to navigate complex drug interactions, contraindications, and patient-specific treatment strategies, delivering evidence-grounded therapeutic decisions. TxAgent executes goal-oriented tool selection and iterative function calls to solve therapeutic tasks that require deep clinical understanding and cross-source validation. The ToolUniverse consolidates 211 tools linked to trusted sources, including all US FDA-approved drugs since 1939 and validated clinical insights from Open Targets.
+"""
+LICENSE = """
+We welcome your feedback and suggestions to enhance your experience with TxAgent, and if you're interested in collaboration, please email Marinka Zitnik and Shanghua Gao.
+### Medical Advice Disclaimer
+DISCLAIMER: THIS WEBSITE DOES NOT PROVIDE MEDICAL ADVICE
+The information, including but not limited to, text, graphics, images and other material contained on this website are for informational purposes only. No material on this site is intended to be a substitute for professional medical advice, diagnosis or treatment. Always seek the advice of your physician or other qualified health care provider with any questions you may have regarding a medical condition or treatment and before undertaking a new health care regimen, and never disregard professional medical advice or delay in seeking it because of something you have read on this website.
+"""
+PLACEHOLDER = """
+<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
+   <h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">TxAgent</h1>
+   <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Tips before using TxAgent:</p>
+   <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.55;">Please click clear🗑️
+ (top-right) to remove previous context before sumbmitting a new question.</p>
+   <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.55;">Click retry🔄 (below message)  to get multiple versions of the answer.</p>
+</div>
+"""
+css = """
+h1 {
+  text-align: center;
+  display: block;
+}
+#duplicate-button {
+  margin: auto;
+  color: white;
+  background: #1565c0;
+  border-radius: 100vh;
+}
+.small-button button {
+    font-size: 12px !important;
+    padding: 4px 8px !important;
+    height: 6px !important;
+    width: 4px !important;
+}
+.gradio-accordion {
+    margin-top: 0px !important;
+    margin-bottom: 0px !important;
+}
+"""
+chat_css = """
+.gr-button { font-size: 20px !important; }  /* Enlarges button icons */
+.gr-button svg { width: 32px !important; height: 32px !important; } /* Enlarges SVG icons */
+"""
+# model_name = '/n/holylfs06/LABS/mzitnik_lab/Lab/shgao/bioagent/bio/alignment-handbook/data_new/L8-qlora-biov49v9v7v16_32k_chat01_merged'
+model_name = 'mims-harvard/TxAgent-T1-Llama-3.1-8B'
+rag_model_name = 'mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B'
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+question_examples = [
+    ['Given a 50-year-old patient experiencing severe acute pain and considering the use of the newly approved medication, Journavx, how should the dosage be adjusted considering the presence of moderate hepatic impairment?'],
+    ['Given a 50-year-old patient experiencing severe acute pain and considering the use of the newly approved medication, Journavx, how should the dosage be adjusted considering the presence of severe hepatic impairment?'],
+    ['A 30-year-old patient is taking Prozac to treat their depression. They were recently diagnosed with WHIM syndrome and require a treatment for that condition as well. Is Xolremdi suitable for this patient, considering contraindications?'],
+]
+new_tool_files = {
+    'new_tool': os.path.join(current_dir, 'data', 'new_tool.json'),
+}
+agent = TxAgent(model_name,
+                rag_model_name,
+                tool_files_dict=new_tool_files,
+                force_finish=True,
+                enable_checker=True,
+                step_rag_num=10,
+                seed=100,
+                additional_default_tools=['DirectResponse', 'RequireClarification'])
+agent.init_model()
+def update_model_parameters(enable_finish, enable_rag, enable_summary,
+                            init_rag_num, step_rag_num, skip_last_k,
+                            summary_mode, summary_skip_last_k, summary_context_length, force_finish, seed):
+    # Update model instance parameters dynamically
+    updated_params = agent.update_parameters(
+        enable_finish=enable_finish,
+        enable_rag=enable_rag,
+        enable_summary=enable_summary,
+        init_rag_num=init_rag_num,
+        step_rag_num=step_rag_num,
+        skip_last_k=skip_last_k,
+        summary_mode=summary_mode,
+        summary_skip_last_k=summary_skip_last_k,
+        summary_context_length=summary_context_length,
+        force_finish=force_finish,
+        seed=seed,
+    )
+    return updated_params
+def update_seed():
+    # Update model instance parameters dynamically
+    seed = random.randint(0, 10000)
+    updated_params = agent.update_parameters(
+        seed=seed,
+    )
+    return updated_params
+def handle_retry(history, retry_data: gr.RetryData, temperature, max_new_tokens, max_tokens, multi_agent, conversation, max_round):
+    print("Updated seed:", update_seed())
+    new_history = history[:retry_data.index]
+    previous_prompt = history[retry_data.index]['content']
+    print("previous_prompt", previous_prompt)
+    yield from agent.run_gradio_chat(new_history + [{"role": "user", "content": previous_prompt}], temperature, max_new_tokens, max_tokens, multi_agent, conversation, max_round)
+PASSWORD = "mypassword"
+# Function to check if the password is correct
+def check_password(input_password):
+    if input_password == PASSWORD:
+        return gr.update(visible=True), ""
+    else:
+        return gr.update(visible=False), "Incorrect password, try again!"
+conversation_state = gr.State([])
+# Gradio block
+chatbot = gr.Chatbot(height=800, placeholder=PLACEHOLDER,
+                     label='TxAgent', type="messages",  show_copy_button=True)
+with gr.Blocks(css=css) as demo:
+    gr.Markdown(DESCRIPTION)
+    gr.Markdown(INTRO)
+    default_temperature = 0.3
+    default_max_new_tokens = 1024
+    default_max_tokens = 81920
+    default_max_round = 30
+    temperature_state = gr.State(value=default_temperature)
+    max_new_tokens_state = gr.State(value=default_max_new_tokens)
+    max_tokens_state = gr.State(value=default_max_tokens)
+    max_round_state = gr.State(value=default_max_round)
+    chatbot.retry(handle_retry, chatbot, chatbot, temperature_state, max_new_tokens_state,
+                  max_tokens_state, gr.Checkbox(value=False, render=False), conversation_state, max_round_state)
+    gr.ChatInterface(
+        fn=agent.run_gradio_chat,
+        chatbot=chatbot,
+        fill_height=True, fill_width=True, stop_btn=True,
+        additional_inputs_accordion=gr.Accordion(
+            label="⚙️ Inference Parameters", open=False, render=False),
+        additional_inputs=[
+            temperature_state, max_new_tokens_state, max_tokens_state,
+            gr.Checkbox(
+                label="Activate multi-agent reasoning mode (it requires additional time but offers a more comprehensive analysis).", value=False, render=False),
+            conversation_state,
+            max_round_state,
+            gr.Number(label="Seed", value=100, render=False)
+        ],
+        examples=question_examples,
+        cache_examples=False,
+        css=chat_css,
+    )
+    with gr.Accordion("Settings", open=False):
+        # Define the sliders
+        temperature_slider = gr.Slider(
+            minimum=0,
+            maximum=1,
+            step=0.1,
+            value=default_temperature,
+            label="Temperature"
+        )
+        max_new_tokens_slider = gr.Slider(
+            minimum=128,
+            maximum=4096,
+            step=1,
+            value=default_max_new_tokens,
+            label="Max new tokens"
+        )
+        max_tokens_slider = gr.Slider(
+            minimum=128,
+            maximum=32000,
+            step=1,
+            value=default_max_tokens,
+            label="Max tokens"
+        )
+        max_round_slider = gr.Slider(
+            minimum=0,
+            maximum=50,
+            step=1,
+            value=default_max_round,
+            label="Max round")
+        # Automatically update states when slider values change
+        temperature_slider.change(
+            lambda x: x, inputs=temperature_slider, outputs=temperature_state)
+        max_new_tokens_slider.change(
+            lambda x: x, inputs=max_new_tokens_slider, outputs=max_new_tokens_state)
+        max_tokens_slider.change(
+            lambda x: x, inputs=max_tokens_slider, outputs=max_tokens_state)
+        max_round_slider.change(
+            lambda x: x, inputs=max_round_slider, outputs=max_round_state)
+        password_input = gr.Textbox(
+            label="Enter Password for More Settings", type="password")
+        incorrect_message = gr.Textbox(visible=False, interactive=False)
+        with gr.Accordion("⚙️ Settings", open=False, visible=False) as protected_accordion:
+            with gr.Row():
+                with gr.Column(scale=1):
+                    with gr.Accordion("⚙️ Model Loading", open=False):
+                        model_name_input = gr.Textbox(
+                            label="Enter model path", value=model_name)
+                        load_model_btn = gr.Button(value="Load Model")
+                        load_model_btn.click(
+                            agent.load_models, inputs=model_name_input, outputs=gr.Textbox(label="Status"))
+                with gr.Column(scale=1):
+                    with gr.Accordion("⚙️ Functional Parameters", open=False):
+                        # Create Gradio components for parameter inputs
+                        enable_finish = gr.Checkbox(
+                            label="Enable Finish", value=True)
+                        enable_rag = gr.Checkbox(
+                            label="Enable RAG", value=True)
+                        enable_summary = gr.Checkbox(
+                            label="Enable Summary", value=False)
+                        init_rag_num = gr.Number(
+                            label="Initial RAG Num", value=0)
+                        step_rag_num = gr.Number(
+                            label="Step RAG Num", value=10)
+                        skip_last_k = gr.Number(label="Skip Last K", value=0)
+                        summary_mode = gr.Textbox(
+                            label="Summary Mode", value='step')
+                        summary_skip_last_k = gr.Number(
+                            label="Summary Skip Last K", value=0)
+                        summary_context_length = gr.Number(
+                            label="Summary Context Length", value=None)
+                        force_finish = gr.Checkbox(
+                            label="Force FinalAnswer", value=True)
+                        seed = gr.Number(label="Seed", value=100)
+                        # Button to submit and update parameters
+                        submit_btn = gr.Button("Update Parameters")
+                        # Display the updated parameters
+                        updated_parameters_output = gr.JSON()
+                        # When button is clicked, update parameters
+                        submit_btn.click(fn=update_model_parameters,
+                                         inputs=[enable_finish, enable_rag, enable_summary, init_rag_num, step_rag_num, skip_last_k,
+                                                 summary_mode, summary_skip_last_k, summary_context_length, force_finish, seed],
+                                         outputs=updated_parameters_output)
+        # Button to submit the password
+        submit_button = gr.Button("Submit")
+        # When the button is clicked, check if the password is correct
+        submit_button.click(
+            check_password,
+            inputs=password_input,
+            outputs=[protected_accordion, incorrect_message]
+        )
+    gr.Markdown(LICENSE)
+if __name__ == "__main__":
+    demo.launch(share=True)

src/txagent/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from .txagent import TxAgent
+from .toolrag import ToolRAGModel
+__all__ = [
+    "TxAgent",
+    "ToolRAGModel",
+]

src/txagent/toolrag.py ADDED Viewed

	@@ -0,0 +1,60 @@

+from sentence_transformers import SentenceTransformer
+import torch
+import json
+from .utils import get_md5
+class ToolRAGModel:
+    def __init__(self, rag_model_name):
+        self.rag_model_name = rag_model_name
+        self.rag_model = None
+        self.tool_desc_embedding = None
+        self.tool_name = None
+        self.tool_embedding_path = None
+        self.load_rag_model()
+    def load_rag_model(self):
+        self.rag_model = SentenceTransformer(self.rag_model_name)
+        self.rag_model.max_seq_length = 4096
+        self.rag_model.tokenizer.padding_side = "right"
+    def load_tool_desc_embedding(self, toolbox):
+        self.tool_name, _ = toolbox.refresh_tool_name_desc(
+            enable_full_desc=True)
+        all_tools_str = [json.dumps(
+            each) for each in toolbox.prepare_tool_prompts(toolbox.all_tools)]
+        md5_value = get_md5(str(all_tools_str))
+        print("get the md value of tools:", md5_value)
+        self.tool_embedding_path = self.rag_model_name.split(
+            '/')[-1] + "tool_embedding_" + md5_value + ".pt"
+        try:
+            self.tool_desc_embedding = torch.load(
+                self.tool_embedding_path, weights_only=False)
+            assert len(self.tool_desc_embedding) == len(
+                toolbox.all_tools), "The number of tools in the toolbox is not equal to the number of tool_desc_embedding."
+        except:
+            self.tool_desc_embedding = None
+            print("\033[92mInferring the tool_desc_embedding.\033[0m")
+            self.tool_desc_embedding = self.rag_model.encode(
+                all_tools_str, prompt="", normalize_embeddings=True
+            )
+            torch.save(self.tool_desc_embedding, self.tool_embedding_path)
+            print("\033[92mFinished inferring the tool_desc_embedding.\033[0m")
+            print("\033[91mExiting. Please rerun the code to avoid the OOM issue.\033[0m")
+            exit()
+    def rag_infer(self, query, top_k=5):
+        torch.cuda.empty_cache()
+        queries = [query]
+        query_embeddings = self.rag_model.encode(
+            queries, prompt="", normalize_embeddings=True
+        )
+        if self.tool_desc_embedding is None:
+            print("No tool_desc_embedding")
+            exit()
+        scores = self.rag_model.similarity(
+            query_embeddings, self.tool_desc_embedding)
+        top_k = min(top_k, len(self.tool_name))
+        top_k_indices = torch.topk(scores, top_k).indices.tolist()[0]
+        top_k_tool_names = [self.tool_name[i] for i in top_k_indices]
+        return top_k_tool_names

src/txagent/txagent.py ADDED Viewed

	@@ -0,0 +1,937 @@

+import gradio as gr
+import os
+import sys
+import json
+import gc
+import numpy as np
+from vllm import LLM, SamplingParams
+from jinja2 import Template
+from typing import List
+import types
+from tooluniverse import ToolUniverse
+from gradio import ChatMessage
+from .toolrag import ToolRAGModel
+from .utils import NoRepeatSentenceProcessor, ReasoningTraceChecker, tool_result_format
+class TxAgent:
+    def __init__(self, model_name,
+                 rag_model_name,
+                 tool_files_dict=None,  # None leads to the default tool files in ToolUniverse
+                 enable_finish=True,
+                 enable_rag=True,
+                 enable_summary=False,
+                 init_rag_num=0,
+                 step_rag_num=10,
+                 summary_mode='step',
+                 summary_skip_last_k=0,
+                 summary_context_length=None,
+                 force_finish=True,
+                 avoid_repeat=True,
+                 seed=None,
+                 enable_checker=False,
+                 enable_chat=False,
+                 additional_default_tools=None,
+                 ):
+        self.model_name = model_name
+        self.tokenizer = None
+        self.terminators = None
+        self.rag_model_name = rag_model_name
+        self.tool_files_dict = tool_files_dict
+        self.model = None
+        self.rag_model = ToolRAGModel(rag_model_name)
+        self.tooluniverse = None
+        # self.tool_desc = None
+        self.prompt_multi_step = "You are a helpful assistant that will solve problems through detailed, step-by-step reasoning and actions based on your reasoning. Typically, your actions will use the provided functions. You have access to the following functions."
+        self.self_prompt = "Strictly follow the instruction."
+        self.chat_prompt = "You are helpful assistant to chat with the user."
+        self.enable_finish = enable_finish
+        self.enable_rag = enable_rag
+        self.enable_summary = enable_summary
+        self.summary_mode = summary_mode
+        self.summary_skip_last_k = summary_skip_last_k
+        self.summary_context_length = summary_context_length
+        self.init_rag_num = init_rag_num
+        self.step_rag_num = step_rag_num
+        self.force_finish = force_finish
+        self.avoid_repeat = avoid_repeat
+        self.seed = seed
+        self.enable_checker = enable_checker
+        self.additional_default_tools = additional_default_tools
+        self.print_self_values()
+    def init_model(self):
+        self.load_models()
+        self.load_tooluniverse()
+        self.load_tool_desc_embedding()
+    def print_self_values(self):
+        for attr, value in self.__dict__.items():
+            print(f"{attr}: {value}")
+    def load_models(self, model_name=None):
+        if model_name is not None:
+            if model_name == self.model_name:
+                return f"The model {model_name} is already loaded."
+            self.model_name = model_name
+        self.model = LLM(model=self.model_name)
+        self.chat_template = Template(self.model.get_tokenizer().chat_template)
+        self.tokenizer = self.model.get_tokenizer()
+        return f"Model {model_name} loaded successfully."
+    def load_tooluniverse(self):
+        self.tooluniverse = ToolUniverse(tool_files=self.tool_files_dict)
+        self.tooluniverse.load_tools()
+        special_tools = self.tooluniverse.prepare_tool_prompts(
+            self.tooluniverse.tool_category_dicts["special_tools"])
+        self.special_tools_name = [tool['name'] for tool in special_tools]
+    def load_tool_desc_embedding(self):
+        self.rag_model.load_tool_desc_embedding(self.tooluniverse)
+    def rag_infer(self, query, top_k=5):
+        return self.rag_model.rag_infer(query, top_k)
+    def initialize_tools_prompt(self, call_agent, call_agent_level, message):
+        picked_tools_prompt = []
+        picked_tools_prompt = self.add_special_tools(
+            picked_tools_prompt, call_agent=call_agent)
+        if call_agent:
+            call_agent_level += 1
+            if call_agent_level >= 2:
+                call_agent = False
+        if not call_agent:
+            picked_tools_prompt += self.tool_RAG(
+                message=message, rag_num=self.init_rag_num)
+        return picked_tools_prompt, call_agent_level
+    def initialize_conversation(self, message, conversation=None, history=None):
+        if conversation is None:
+            conversation = []
+        conversation = self.set_system_prompt(
+            conversation, self.prompt_multi_step)
+        if history is not None:
+            if len(history) == 0:
+                conversation = []
+                print("clear conversation successfully")
+            else:
+                for i in range(len(history)):
+                    if history[i]['role'] == 'user':
+                        if i-1 >= 0 and history[i-1]['role'] == 'assistant':
+                            conversation.append(
+                                {"role": "assistant", "content": history[i-1]['content']})
+                        conversation.append(
+                            {"role": "user", "content": history[i]['content']})
+                    if i == len(history)-1 and history[i]['role'] == 'assistant':
+                        conversation.append(
+                            {"role": "assistant", "content": history[i]['content']})
+        conversation.append({"role": "user", "content": message})
+        return conversation
+    def tool_RAG(self, message=None,
+                 picked_tool_names=None,
+                 existing_tools_prompt=[],
+                 rag_num=5,
+                 return_call_result=False):
+        extra_factor = 30  # Factor to retrieve more than rag_num
+        if picked_tool_names is None:
+            assert picked_tool_names is not None or message is not None
+            picked_tool_names = self.rag_infer(
+                message, top_k=rag_num*extra_factor)
+        picked_tool_names_no_special = []
+        for tool in picked_tool_names:
+            if tool not in self.special_tools_name:
+                picked_tool_names_no_special.append(tool)
+        picked_tool_names_no_special = picked_tool_names_no_special[:rag_num]
+        picked_tool_names = picked_tool_names_no_special[:rag_num]
+        picked_tools = self.tooluniverse.get_tool_by_name(picked_tool_names)
+        picked_tools_prompt = self.tooluniverse.prepare_tool_prompts(
+            picked_tools)
+        if return_call_result:
+            return picked_tools_prompt, picked_tool_names
+        return picked_tools_prompt
+    def add_special_tools(self, tools, call_agent=False):
+        if self.enable_finish:
+            tools.append(self.tooluniverse.get_one_tool_by_one_name(
+                'Finish', return_prompt=True))
+            print("Finish tool is added")
+        if call_agent:
+            tools.append(self.tooluniverse.get_one_tool_by_one_name(
+                'CallAgent', return_prompt=True))
+            print("CallAgent tool is added")
+        else:
+            if self.enable_rag:
+                tools.append(self.tooluniverse.get_one_tool_by_one_name(
+                    'Tool_RAG', return_prompt=True))
+                print("Tool_RAG tool is added")
+            if self.additional_default_tools is not None:
+                for each_tool_name in self.additional_default_tools:
+                    tool_prompt = self.tooluniverse.get_one_tool_by_one_name(
+                        each_tool_name, return_prompt=True)
+                    if tool_prompt is not None:
+                        print(f"{each_tool_name} tool is added")
+                        tools.append(tool_prompt)
+        return tools
+    def add_finish_tools(self, tools):
+        tools.append(self.tooluniverse.get_one_tool_by_one_name(
+            'Finish', return_prompt=True))
+        print("Finish tool is added")
+        return tools
+    def set_system_prompt(self, conversation, sys_prompt):
+        if len(conversation) == 0:
+            conversation.append(
+                {"role": "system", "content": sys_prompt})
+        else:
+            conversation[0] = {"role": "system", "content": sys_prompt}
+        return conversation
+    def run_function_call(self, fcall_str,
+                          return_message=False,
+                          existing_tools_prompt=None,
+                          message_for_call_agent=None,
+                          call_agent=False,
+                          call_agent_level=None,
+                          temperature=None):
+        function_call_json, message = self.tooluniverse.extract_function_call_json(
+            fcall_str, return_message=return_message, verbose=False)
+        call_results = []
+        special_tool_call = ''
+        if function_call_json is not None:
+            if isinstance(function_call_json, list):
+                for i in range(len(function_call_json)):
+                    print("\033[94mTool Call:\033[0m", function_call_json[i])
+                    if function_call_json[i]["name"] == 'Finish':
+                        special_tool_call = 'Finish'
+                        break
+                    elif function_call_json[i]["name"] == 'Tool_RAG':
+                        new_tools_prompt, call_result = self.tool_RAG(
+                            message=message,
+                            existing_tools_prompt=existing_tools_prompt,
+                            rag_num=self.step_rag_num,
+                            return_call_result=True)
+                        existing_tools_prompt += new_tools_prompt
+                    elif function_call_json[i]["name"] == 'CallAgent':
+                        if call_agent_level < 2 and call_agent:
+                            solution_plan = function_call_json[i]['arguments']['solution']
+                            full_message = (
+                                message_for_call_agent +
+                                "\nYou must follow the following plan to answer the question: " +
+                                str(solution_plan)
+                            )
+                            call_result = self.run_multistep_agent(
+                                full_message, temperature=temperature,
+                                max_new_tokens=1024, max_token=99999,
+                                call_agent=False, call_agent_level=call_agent_level)
+                            call_result = call_result.split(
+                                '[FinalAnswer]')[-1].strip()
+                        else:
+                            call_result = "Error: The CallAgent has been disabled. Please proceed with your reasoning process to solve this question."
+                    else:
+                        call_result = self.tooluniverse.run_one_function(
+                            function_call_json[i])
+                    call_id = self.tooluniverse.call_id_gen()
+                    function_call_json[i]["call_id"] = call_id
+                    print("\033[94mTool Call Result:\033[0m", call_result)
+                    call_results.append({
+                        "role": "tool",
+                        "content": json.dumps({"content": call_result, "call_id": call_id})
+                    })
+        else:
+            call_results.append({
+                "role": "tool",
+                "content": json.dumps({"content": "Not a valid function call, please check the function call format."})
+            })
+        revised_messages = [{
+            "role": "assistant",
+            "content": message.strip(),
+            "tool_calls": json.dumps(function_call_json)
+        }] + call_results
+        # Yield the final result.
+        return revised_messages, existing_tools_prompt, special_tool_call
+    def run_function_call_stream(self, fcall_str,
+                                 return_message=False,
+                                 existing_tools_prompt=None,
+                                 message_for_call_agent=None,
+                                 call_agent=False,
+                                 call_agent_level=None,
+                                 temperature=None,
+                                 return_gradio_history=True):
+        function_call_json, message = self.tooluniverse.extract_function_call_json(
+            fcall_str, return_message=return_message, verbose=False)
+        call_results = []
+        special_tool_call = ''
+        if return_gradio_history:
+            gradio_history = []
+        if function_call_json is not None:
+            if isinstance(function_call_json, list):
+                for i in range(len(function_call_json)):
+                    if function_call_json[i]["name"] == 'Finish':
+                        special_tool_call = 'Finish'
+                        break
+                    elif function_call_json[i]["name"] == 'Tool_RAG':
+                        new_tools_prompt, call_result = self.tool_RAG(
+                            message=message,
+                            existing_tools_prompt=existing_tools_prompt,
+                            rag_num=self.step_rag_num,
+                            return_call_result=True)
+                        existing_tools_prompt += new_tools_prompt
+                    elif function_call_json[i]["name"] == 'DirectResponse':
+                        call_result = function_call_json[i]['arguments']['respose']
+                        special_tool_call = 'DirectResponse'
+                    elif function_call_json[i]["name"] == 'RequireClarification':
+                        call_result = function_call_json[i]['arguments']['unclear_question']
+                        special_tool_call = 'RequireClarification'
+                    elif function_call_json[i]["name"] == 'CallAgent':
+                        if call_agent_level < 2 and call_agent:
+                            solution_plan = function_call_json[i]['arguments']['solution']
+                            full_message = (
+                                message_for_call_agent +
+                                "\nYou must follow the following plan to answer the question: " +
+                                str(solution_plan)
+                            )
+                            sub_agent_task = "Sub TxAgent plan: " + \
+                                str(solution_plan)
+                            # When streaming, yield responses as they arrive.
+                            call_result = yield from self.run_gradio_chat(
+                                full_message, history=[], temperature=temperature,
+                                max_new_tokens=1024, max_token=99999,
+                                call_agent=False, call_agent_level=call_agent_level,
+                                conversation=None,
+                                sub_agent_task=sub_agent_task)
+                            call_result = call_result.split(
+                                '[FinalAnswer]')[-1]
+                        else:
+                            call_result = "Error: The CallAgent has been disabled. Please proceed with your reasoning process to solve this question."
+                    else:
+                        call_result = self.tooluniverse.run_one_function(
+                            function_call_json[i])
+                    call_id = self.tooluniverse.call_id_gen()
+                    function_call_json[i]["call_id"] = call_id
+                    call_results.append({
+                        "role": "tool",
+                        "content": json.dumps({"content": call_result, "call_id": call_id})
+                    })
+                    if return_gradio_history and function_call_json[i]["name"] != 'Finish':
+                        if function_call_json[i]["name"] == 'Tool_RAG':
+                            gradio_history.append(ChatMessage(role="assistant", content=str(call_result), metadata={
+                                                  "title": "🧰 "+function_call_json[i]['name'], "log": str(function_call_json[i]['arguments'])}))
+                        else:
+                            gradio_history.append(ChatMessage(role="assistant", content=str(call_result), metadata={
+                                                  "title": "⚒️ "+function_call_json[i]['name'], "log": str(function_call_json[i]['arguments'])}))
+        else:
+            call_results.append({
+                "role": "tool",
+                "content": json.dumps({"content": "Not a valid function call, please check the function call format."})
+            })
+        revised_messages = [{
+            "role": "assistant",
+            "content": message.strip(),
+            "tool_calls": json.dumps(function_call_json)
+        }] + call_results
+        # Yield the final result.
+        if return_gradio_history:
+            return revised_messages, existing_tools_prompt, special_tool_call, gradio_history
+        else:
+            return revised_messages, existing_tools_prompt, special_tool_call
+    def get_answer_based_on_unfinished_reasoning(self, conversation, temperature, max_new_tokens, max_token, outputs=None):
+        if conversation[-1]['role'] == 'assisant':
+            conversation.append(
+                {'role': 'tool', 'content': 'Errors happen during the function call, please come up with the final answer with the current information.'})
+        finish_tools_prompt = self.add_finish_tools([])
+        last_outputs_str = self.llm_infer(messages=conversation,
+                                          temperature=temperature,
+                                          tools=finish_tools_prompt,
+                                          output_begin_string='Since I cannot continue reasoning, I will provide the final answer based on the current information and general knowledge.\n\n[FinalAnswer]',
+                                          skip_special_tokens=True,
+                                          max_new_tokens=max_new_tokens, max_token=max_token)
+        print(last_outputs_str)
+        return last_outputs_str
+    def run_multistep_agent(self, message: str,
+                            temperature: float,
+                            max_new_tokens: int,
+                            max_token: int,
+                            max_round: int = 20,
+                            call_agent=False,
+                            call_agent_level=0) -> str:
+        """
+        Generate a streaming response using the llama3-8b model.
+        Args:
+            message (str): The input message.
+            temperature (float): The temperature for generating the response.
+            max_new_tokens (int): The maximum number of new tokens to generate.
+        Returns:
+            str: The generated response.
+        """
+        print("\033[1;32;40mstart\033[0m")
+        picked_tools_prompt, call_agent_level = self.initialize_tools_prompt(
+            call_agent, call_agent_level, message)
+        conversation = self.initialize_conversation(message)
+        outputs = []
+        last_outputs = []
+        next_round = True
+        function_call_messages = []
+        current_round = 0
+        token_overflow = False
+        enable_summary = False
+        last_status = {}
+        if self.enable_checker:
+            checker = ReasoningTraceChecker(message, conversation)
+        try:
+            while next_round and current_round < max_round:
+                current_round += 1
+                if len(outputs) > 0:
+                    function_call_messages, picked_tools_prompt, special_tool_call = self.run_function_call(
+                        last_outputs, return_message=True,
+                        existing_tools_prompt=picked_tools_prompt,
+                        message_for_call_agent=message,
+                        call_agent=call_agent,
+                        call_agent_level=call_agent_level,
+                        temperature=temperature)
+                    if special_tool_call == 'Finish':
+                        next_round = False
+                        conversation.extend(function_call_messages)
+                        if isinstance(function_call_messages[0]['content'], types.GeneratorType):
+                            function_call_messages[0]['content'] = next(
+                                function_call_messages[0]['content'])
+                        return function_call_messages[0]['content'].split('[FinalAnswer]')[-1]
+                    if (self.enable_summary or token_overflow) and not call_agent:
+                        if token_overflow:
+                            print("token_overflow, using summary")
+                        enable_summary = True
+                    last_status = self.function_result_summary(
+                        conversation, status=last_status, enable_summary=enable_summary)
+                    if function_call_messages is not None:
+                        conversation.extend(function_call_messages)
+                        outputs.append(tool_result_format(
+                            function_call_messages))
+                    else:
+                        next_round = False
+                        conversation.extend(
+                            [{"role": "assistant", "content": ''.join(last_outputs)}])
+                        return ''.join(last_outputs).replace("</s>", "")
+                if self.enable_checker:
+                    good_status, wrong_info = checker.check_conversation()
+                    if not good_status:
+                        next_round = False
+                        print(
+                            "Internal error in reasoning: " + wrong_info)
+                        break
+                last_outputs = []
+                outputs.append("### TxAgent:\n")
+                last_outputs_str, token_overflow = self.llm_infer(messages=conversation,
+                                                                  temperature=temperature,
+                                                                  tools=picked_tools_prompt,
+                                                                  skip_special_tokens=False,
+                                                                  max_new_tokens=max_new_tokens, max_token=max_token,
+                                                                  check_token_status=True)
+                if last_outputs_str is None:
+                    next_round = False
+                    print(
+                        "The number of tokens exceeds the maximum limit.")
+                else:
+                    last_outputs.append(last_outputs_str)
+            if max_round == current_round:
+                print("The number of rounds exceeds the maximum limit!")
+            if self.force_finish:
+                return self.get_answer_based_on_unfinished_reasoning(conversation, temperature, max_new_tokens, max_token)
+            else:
+                return None
+        except Exception as e:
+            print(f"Error: {e}")
+            if self.force_finish:
+                return self.get_answer_based_on_unfinished_reasoning(conversation, temperature, max_new_tokens, max_token)
+            else:
+                return None
+    def build_logits_processor(self, messages, llm):
+        # Use the tokenizer from the LLM instance.
+        tokenizer = llm.get_tokenizer()
+        if self.avoid_repeat and len(messages) > 2:
+            assistant_messages = []
+            for i in range(1, len(messages) + 1):
+                if messages[-i]['role'] == 'assistant':
+                    assistant_messages.append(messages[-i]['content'])
+                    if len(assistant_messages) == 2:
+                        break
+            forbidden_ids = [tokenizer.encode(
+                msg, add_special_tokens=False) for msg in assistant_messages]
+            return [NoRepeatSentenceProcessor(forbidden_ids, 5)]
+        else:
+            return None
+    def llm_infer(self, messages, temperature=0.1, tools=None,
+                  output_begin_string=None, max_new_tokens=2048,
+                  max_token=None, skip_special_tokens=True,
+                  model=None, tokenizer=None, terminators=None, seed=None, check_token_status=False):
+        if model is None:
+            model = self.model
+        logits_processor = self.build_logits_processor(messages, model)
+        sampling_params = SamplingParams(
+            temperature=temperature,
+            max_tokens=max_new_tokens,
+            logits_processors=logits_processor,
+            seed=seed if seed is not None else self.seed,
+        )
+        prompt = self.chat_template.render(
+            messages=messages, tools=tools, add_generation_prompt=True)
+        if output_begin_string is not None:
+            prompt += output_begin_string
+        if check_token_status and max_token is not None:
+            token_overflow = False
+            num_input_tokens = len(self.tokenizer.encode(
+                prompt, return_tensors="pt")[0])
+            if max_token is not None:
+                if num_input_tokens > max_token:
+                    torch.cuda.empty_cache()
+                    gc.collect()
+                    print("Number of input tokens before inference:",
+                          num_input_tokens)
+                    logger.info(
+                        "The number of tokens exceeds the maximum limit!!!!")
+                    token_overflow = True
+                    return None, token_overflow
+        output = model.generate(
+            prompt,
+            sampling_params=sampling_params,
+        )
+        output = output[0].outputs[0].text
+        print("\033[92m" + output + "\033[0m")
+        if check_token_status and max_token is not None:
+            return output, token_overflow
+        return output
+    def run_self_agent(self, message: str,
+                       temperature: float,
+                       max_new_tokens: int,
+                       max_token: int) -> str:
+        print("\033[1;32;40mstart self agent\033[0m")
+        conversation = []
+        conversation = self.set_system_prompt(conversation, self.self_prompt)
+        conversation.append({"role": "user", "content": message})
+        return self.llm_infer(messages=conversation,
+                              temperature=temperature,
+                              tools=None,
+                              max_new_tokens=max_new_tokens, max_token=max_token)
+    def run_chat_agent(self, message: str,
+                       temperature: float,
+                       max_new_tokens: int,
+                       max_token: int) -> str:
+        print("\033[1;32;40mstart chat agent\033[0m")
+        conversation = []
+        conversation = self.set_system_prompt(conversation, self.chat_prompt)
+        conversation.append({"role": "user", "content": message})
+        return self.llm_infer(messages=conversation,
+                              temperature=temperature,
+                              tools=None,
+                              max_new_tokens=max_new_tokens, max_token=max_token)
+    def run_format_agent(self, message: str,
+                         answer: str,
+                         temperature: float,
+                         max_new_tokens: int,
+                         max_token: int) -> str:
+        print("\033[1;32;40mstart format agent\033[0m")
+        if '[FinalAnswer]' in answer:
+            possible_final_answer = answer.split("[FinalAnswer]")[-1]
+        elif "\n\n" in answer:
+            possible_final_answer = answer.split("\n\n")[-1]
+        else:
+            possible_final_answer = answer.strip()
+        if len(possible_final_answer) == 1:
+            choice = possible_final_answer[0]
+            if choice in ['A', 'B', 'C', 'D', 'E']:
+                return choice
+        elif len(possible_final_answer) > 1:
+            if possible_final_answer[1] == ':':
+                choice = possible_final_answer[0]
+                if choice in ['A', 'B', 'C', 'D', 'E']:
+                    print("choice", choice)
+                    return choice
+        conversation = []
+        format_prompt = f"You are helpful assistant to transform the answer of agent to the final answer of 'A', 'B', 'C', 'D'."
+        conversation = self.set_system_prompt(conversation, format_prompt)
+        conversation.append({"role": "user", "content": message +
+                            "\nThe final answer of agent:" + answer + "\n The answer is (must be a letter):"})
+        return self.llm_infer(messages=conversation,
+                              temperature=temperature,
+                              tools=None,
+                              max_new_tokens=max_new_tokens, max_token=max_token)
+    def run_summary_agent(self, thought_calls: str,
+                          function_response: str,
+                          temperature: float,
+                          max_new_tokens: int,
+                          max_token: int) -> str:
+        print("\033[1;32;40mSummarized Tool Result:\033[0m")
+        generate_tool_result_summary_training_prompt = """Thought and function calls:
+{thought_calls}
+Function calls' responses:
+\"\"\"
+{function_response}
+\"\"\"
+Based on the Thought and function calls, and the function calls' responses, you need to generate a summary of the function calls' responses that fulfills the requirements of the thought. The summary MUST BE ONE sentence and include all necessary information.
+Directly respond with the summarized sentence of the function calls' responses only.
+Generate **one summarized sentence** about "function calls' responses" with necessary information, and respond with a string:
+            """.format(thought_calls=thought_calls, function_response=function_response)
+        conversation = []
+        conversation.append(
+            {"role": "user", "content": generate_tool_result_summary_training_prompt})
+        output = self.llm_infer(messages=conversation,
+                                temperature=temperature,
+                                tools=None,
+                                max_new_tokens=max_new_tokens, max_token=max_token)
+        if '[' in output:
+            output = output.split('[')[0]
+        return output
+    def function_result_summary(self, input_list, status, enable_summary):
+        """
+        Processes the input list, extracting information from sequences of 'user', 'tool', 'assistant' roles.
+        Supports 'length' and 'step' modes, and skips the last 'k' groups.
+        Parameters:
+            input_list (list): A list of dictionaries containing role and other information.
+            summary_skip_last_k (int): Number of groups to skip from the end. Defaults to 0.
+            summary_context_length (int): The context length threshold for the 'length' mode.
+            last_processed_index (tuple or int): The last processed index.
+        Returns:
+            list: A list of extracted information from valid sequences.
+        """
+        if 'tool_call_step' not in status:
+            status['tool_call_step'] = 0
+        for idx in range(len(input_list)):
+            pos_id = len(input_list)-idx-1
+            if input_list[pos_id]['role'] == 'assistant':
+                if 'tool_calls' in input_list[pos_id]:
+                    if 'Tool_RAG' in str(input_list[pos_id]['tool_calls']):
+                        status['tool_call_step'] += 1
+                break
+        if 'step' in status:
+            status['step'] += 1
+        else:
+            status['step'] = 0
+        if not enable_summary:
+            return status
+        if 'summarized_index' not in status:
+            status['summarized_index'] = 0
+        if 'summarized_step' not in status:
+            status['summarized_step'] = 0
+        if 'previous_length' not in status:
+            status['previous_length'] = 0
+        if 'history' not in status:
+            status['history'] = []
+        function_response = ''
+        idx = 0
+        current_summarized_index = status['summarized_index']
+        status['history'].append(self.summary_mode == 'step' and status['summarized_step']
+                                 < status['step']-status['tool_call_step']-self.summary_skip_last_k)
+        idx = current_summarized_index
+        while idx < len(input_list):
+            if (self.summary_mode == 'step' and status['summarized_step'] < status['step']-status['tool_call_step']-self.summary_skip_last_k) or (self.summary_mode == 'length' and status['previous_length'] > self.summary_context_length):
+                if input_list[idx]['role'] == 'assistant':
+                    if 'Tool_RAG' in str(input_list[idx]['tool_calls']):
+                        this_thought_calls = None
+                    else:
+                        if len(function_response) != 0:
+                            print("internal summary")
+                            status['summarized_step'] += 1
+                            result_summary = self.run_summary_agent(
+                                thought_calls=this_thought_calls,
+                                function_response=function_response,
+                                temperature=0.1,
+                                max_new_tokens=1024,
+                                max_token=99999
+                            )
+                            input_list.insert(
+                                last_call_idx+1, {'role': 'tool', 'content': result_summary})
+                            status['summarized_index'] = last_call_idx + 2
+                            idx += 1
+                        last_call_idx = idx
+                        this_thought_calls = input_list[idx]['content'] + \
+                            input_list[idx]['tool_calls']
+                        function_response = ''
+                elif input_list[idx]['role'] == 'tool' and this_thought_calls is not None:
+                    function_response += input_list[idx]['content']
+                    del input_list[idx]
+                    idx -= 1
+            else:
+                break
+            idx += 1
+        if len(function_response) != 0:
+            status['summarized_step'] += 1
+            result_summary = self.run_summary_agent(
+                thought_calls=this_thought_calls,
+                function_response=function_response,
+                temperature=0.1,
+                max_new_tokens=1024,
+                max_token=99999
+            )
+            tool_calls = json.loads(input_list[last_call_idx]['tool_calls'])
+            for tool_call in tool_calls:
+                del tool_call['call_id']
+            input_list[last_call_idx]['tool_calls'] = json.dumps(tool_calls)
+            input_list.insert(
+                last_call_idx+1, {'role': 'tool', 'content': result_summary})
+            status['summarized_index'] = last_call_idx + 2
+        return status
+    # Following are Gradio related functions
+    # General update method that accepts any new arguments through kwargs
+    def update_parameters(self, **kwargs):
+        for key, value in kwargs.items():
+            if hasattr(self, key):
+                setattr(self, key, value)
+        # Return the updated attributes
+        updated_attributes = {key: value for key,
+                              value in kwargs.items() if hasattr(self, key)}
+        return updated_attributes
+    def run_gradio_chat(self, message: str,
+                        history: list,
+                        temperature: float,
+                        max_new_tokens: int,
+                        max_token: int,
+                        call_agent: bool,
+                        conversation: gr.State,
+                        max_round: int = 20,
+                        seed: int = None,
+                        call_agent_level: int = 0,
+                        sub_agent_task: str = None) -> str:
+        """
+        Generate a streaming response using the llama3-8b model.
+        Args:
+            message (str): The input message.
+            history (list): The conversation history used by ChatInterface.
+            temperature (float): The temperature for generating the response.
+            max_new_tokens (int): The maximum number of new tokens to generate.
+        Returns:
+            str: The generated response.
+        """
+        print("\033[1;32;40mstart\033[0m")
+        print("len(message)", len(message))
+        if len(message) <= 10:
+            yield "Hi, I am TxAgent, an assistant for answering biomedical questions. Please provide a valid message with a string longer than 10 characters."
+            return "Please provide a valid message."
+        outputs = []
+        outputs_str = ''
+        last_outputs = []
+        picked_tools_prompt, call_agent_level = self.initialize_tools_prompt(
+            call_agent,
+            call_agent_level,
+            message)
+        conversation = self.initialize_conversation(
+            message,
+            conversation=conversation,
+            history=history)
+        history = []
+        next_round = True
+        function_call_messages = []
+        current_round = 0
+        enable_summary = False
+        last_status = {}  # for summary
+        token_overflow = False
+        if self.enable_checker:
+            checker = ReasoningTraceChecker(
+                message, conversation, init_index=len(conversation))
+        try:
+            while next_round and current_round < max_round:
+                current_round += 1
+                if len(last_outputs) > 0:
+                    function_call_messages, picked_tools_prompt, special_tool_call, current_gradio_history = yield from self.run_function_call_stream(
+                        last_outputs, return_message=True,
+                        existing_tools_prompt=picked_tools_prompt,
+                        message_for_call_agent=message,
+                        call_agent=call_agent,
+                        call_agent_level=call_agent_level,
+                        temperature=temperature)
+                    history.extend(current_gradio_history)
+                    if special_tool_call == 'Finish':
+                        yield history
+                        next_round = False
+                        conversation.extend(function_call_messages)
+                        return function_call_messages[0]['content']
+                    elif special_tool_call == 'RequireClarification' or special_tool_call == 'DirectResponse':
+                        history.append(
+                            ChatMessage(role="assistant", content=history[-1].content))
+                        yield history
+                        next_round = False
+                        return history[-1].content
+                    if (self.enable_summary or token_overflow) and not call_agent:
+                        if token_overflow:
+                            print("token_overflow, using summary")
+                        enable_summary = True
+                    last_status = self.function_result_summary(
+                        conversation, status=last_status,
+                        enable_summary=enable_summary)
+                    if function_call_messages is not None:
+                        conversation.extend(function_call_messages)
+                        formated_md_function_call_messages = tool_result_format(
+                            function_call_messages)
+                        yield history
+                    else:
+                        next_round = False
+                        conversation.extend(
+                            [{"role": "assistant", "content": ''.join(last_outputs)}])
+                        return ''.join(last_outputs).replace("</s>", "")
+                if self.enable_checker:
+                    good_status, wrong_info = checker.check_conversation()
+                    if not good_status:
+                        next_round = False
+                        print("Internal error in reasoning: " + wrong_info)
+                        break
+                last_outputs = []
+                last_outputs_str, token_overflow = self.llm_infer(
+                    messages=conversation,
+                    temperature=temperature,
+                    tools=picked_tools_prompt,
+                    skip_special_tokens=False,
+                    max_new_tokens=max_new_tokens,
+                    max_token=max_token,
+                    seed=seed,
+                    check_token_status=True)
+                last_thought = last_outputs_str.split("[TOOL_CALLS]")[0]
+                for each in history:
+                    if each.metadata is not None:
+                        each.metadata['status'] = 'done'
+                if '[FinalAnswer]' in last_thought:
+                    final_thought, final_answer = last_thought.split(
+                        '[FinalAnswer]')
+                    history.append(
+                        ChatMessage(role="assistant",
+                                    content=final_thought.strip())
+                    )
+                    yield history
+                    history.append(
+                        ChatMessage(
+                            role="assistant", content="**Answer**:\n"+final_answer.strip())
+                    )
+                    yield history
+                else:
+                    history.append(ChatMessage(
+                        role="assistant", content=last_thought))
+                    yield history
+                last_outputs.append(last_outputs_str)
+            if next_round:
+                if self.force_finish:
+                    last_outputs_str = self.get_answer_based_on_unfinished_reasoning(
+                        conversation, temperature, max_new_tokens, max_token)
+                    for each in history:
+                        if each.metadata is not None:
+                            each.metadata['status'] = 'done'
+                    if '[FinalAnswer]' in last_thought:
+                        final_thought, final_answer = last_thought.split(
+                            '[FinalAnswer]')
+                        history.append(
+                            ChatMessage(role="assistant",
+                                        content=final_thought.strip())
+                        )
+                        yield history
+                        history.append(
+                            ChatMessage(
+                                role="assistant", content="**Answer**:\n"+final_answer.strip())
+                        )
+                        yield history
+                else:
+                    yield "The number of rounds exceeds the maximum limit!"
+        except Exception as e:
+            print(f"Error: {e}")
+            if self.force_finish:
+                last_outputs_str = self.get_answer_based_on_unfinished_reasoning(
+                    conversation,
+                    temperature,
+                    max_new_tokens,
+                    max_token)
+                for each in history:
+                    if each.metadata is not None:
+                        each.metadata['status'] = 'done'
+                if '[FinalAnswer]' in last_thought or '"name": "Finish",' in last_outputs_str:
+                    final_thought, final_answer = last_thought.split(
+                        '[FinalAnswer]')
+                    history.append(
+                        ChatMessage(role="assistant",
+                                    content=final_thought.strip())
+                    )
+                    yield history
+                    history.append(
+                        ChatMessage(
+                            role="assistant", content="**Answer**:\n"+final_answer.strip())
+                    )
+                    yield history
+            else:
+                return None

src/txagent/utils.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import sys
+import json
+import hashlib
+import torch
+from typing import List
+def get_md5(input_str):
+    # Create an MD5 hash object
+    md5_hash = hashlib.md5()
+    # Encode the string and update the hash object
+    md5_hash.update(input_str.encode('utf-8'))
+    # Return the hexadecimal MD5 digest
+    return md5_hash.hexdigest()
+def tool_result_format(function_call_messages):
+    current_output = "\n\n<details>\n<summary> <strong>Verfied Feedback from Tools</strong>, click to see details:</summary>\n\n"
+    for each_message in function_call_messages:
+        if each_message['role'] == 'tool':
+            current_output += f"{each_message['content']}\n\n"
+    current_output += "</details>\n\n\n"
+    return current_output
+class NoRepeatSentenceProcessor:
+    def __init__(self, forbidden_sequences: List[List[int]], allowed_prefix_length: int):
+        """
+        Args:
+            forbidden_sequences (List[List[int]]): A list of token ID sequences corresponding to forbidden sentences.
+            allowed_prefix_length (int): The number k such that if the generated tokens match the first k tokens
+                                         of a forbidden sequence, then the candidate token that would extend the match is blocked.
+        """
+        self.allowed_prefix_length = allowed_prefix_length
+        # Build a lookup dictionary: key is a tuple of the first k tokens, value is a set of tokens to block.
+        self.forbidden_prefix_dict = {}
+        for seq in forbidden_sequences:
+            if len(seq) > allowed_prefix_length:
+                prefix = tuple(seq[:allowed_prefix_length])
+                next_token = seq[allowed_prefix_length]
+                self.forbidden_prefix_dict.setdefault(
+                    prefix, set()).add(next_token)
+    def __call__(self, token_ids: List[int], logits: torch.Tensor) -> torch.Tensor:
+        """
+        Modifies the logits to block tokens that would extend a forbidden sentence.
+        Args:
+            token_ids (List[int]): List of token IDs generated so far.
+            logits (torch.Tensor): Logits tensor for the next token (shape: [vocab_size]).
+        Returns:
+            torch.Tensor: Modified logits.
+        """
+        if len(token_ids) >= self.allowed_prefix_length:
+            prefix = tuple(token_ids[:self.allowed_prefix_length])
+            if prefix in self.forbidden_prefix_dict:
+                for token_id in self.forbidden_prefix_dict[prefix]:
+                    logits[token_id] = -float("inf")
+        return logits
+class ReasoningTraceChecker:
+    def __init__(self, question, conversation, init_index=None):
+        self.question = question
+        self.conversation = conversation
+        self.existing_thoughts = []
+        self.existing_actions = []
+        if init_index is not None:
+            self.index = init_index
+        else:
+            self.index = 1
+        self.question = self.question.lower()
+        self.new_thoughts = []
+        self.new_actions = []
+    def check_conversation(self):
+        info = ''
+        current_index = self.index
+        for i in range(current_index, len(self.conversation)):
+            each = self.conversation[i]
+            self.index = i
+            if each['role'] == 'assistant':
+                print(each)
+                thought = each['content']
+                actions = each['tool_calls']
+                good_status, current_info = self.check_repeat_thought(thought)
+                info += current_info
+                if not good_status:
+                    return False, info
+                good_status, current_info = self.check_repeat_action(actions)
+                info += current_info
+                if not good_status:
+                    return False, info
+        return True, info
+    def check_repeat_thought(self, thought):
+        if thought in self.existing_thoughts:
+            return False, "repeat_thought"
+        self.existing_thoughts.append(thought)
+        return True, ''
+    def check_repeat_action(self, actions):
+        if type(actions) != list:
+            actions = json.loads(actions)
+        for each_action in actions:
+            if 'call_id' in each_action:
+                del each_action['call_id']
+            each_action = json.dumps(each_action)
+            if each_action in self.existing_actions:
+                return False, "repeat_action"
+            self.existing_actions.append(each_action)
+        return True, ''