Spaces:

jogonba2
/

SmolAgentsCourse

Sleeping

App Files Files Community

José Ángel González commited on 24 days ago

Commit

e0d808c

1 Parent(s): 1665a27

add more tools and fixes

Browse files

Files changed (3) hide show

agents/react_agent.py +18 -20
app.py +2 -6
requirements.txt +4 -2

agents/react_agent.py CHANGED Viewed

@@ -15,9 +15,11 @@ Based on two pillars:
 from smolagents import (
     OpenAIServerModel,
     CodeAgent,
-    DuckDuckGoSearchTool,
     PythonInterpreterTool,
-    Tool
 )
 from PIL import Image
 import requests
@@ -39,16 +41,20 @@ def parse_excel(content: bytes) -> str:
 def parse_text(content: str) -> str:
     return content
 def parse_mp3(content: bytes) -> str:
     speech_to_text = Tool.from_space(
         "maguid28/TranscriptTool",
         name="transcription_tool",
-        description="Transcribe speech to text"
     )
     with open("audio.mp3", "wb") as fw:
         fw.write(content)
     return speech_to_text("audio.mp3")
 def download(task_id: str) -> bytes:
     response = requests.get(FILE_URL.format(task_id=task_id))
     return response.content
@@ -73,22 +79,19 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 FILE_URL = f"{DEFAULT_API_URL}/files/{{task_id}}"
 # Tools
-TOOLS = [DuckDuckGoSearchTool(), PythonInterpreterTool()]
-AUTHORIZED_IMPORTS = [
-    "geopandas",
-    "plotly",
-    "shapely",
-    "json",
-    "pandas",
-    "numpy",
-    "datetime",
 ]
 class ReactAgent:
     def __init__(self):
         model = OpenAIServerModel(
-            model_id="gpt-4.5-preview",
             api_key=os.environ["OPENAI_API_KEY"],
             temperature=0,
         )
@@ -111,7 +114,7 @@ class ReactAgent:
             user_question = question["question"]
             if file_info["type"] == "image":
                 return self.agent.run(user_question, images=[parsed_content])
             user_question = (
                 f"{user_question}\n"
                 f"Here is the content of the file you have to consider to answer the question:\n"
@@ -125,11 +128,6 @@ class ReactAgent:
         prev_system_prompt = self.agent.system_prompt
         prompt_prefix = prev_system_prompt.split("Now Begin!")[0].strip()
         gaia_answer_rules = """\n\nYour final answer should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
-        gaia_answer_rules = """It is crucial that you wrap your final answer in the ```code``` block by using the `final_answer` tool or your mom will die."""
         system_prompt = prompt_prefix + gaia_answer_rules + "\n\nNow Begin!"
         self.agent.system_prompt = system_prompt
-if __name__ == "__main__":
-    question = {'task_id': '8e867cd7-cff9-4e6c-867a-ff5ddc2550be', 'question': 'How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.', 'Level': '1', 'file_name': ''}
-    agent = ReactAgent()
-    response = agent(question)

 from smolagents import (
     OpenAIServerModel,
     CodeAgent,
+    # DuckDuckGoSearchTool,
+    GoogleSearchTool,
     PythonInterpreterTool,
+    VisitWebpageTool,
+    Tool,
 )
 from PIL import Image
 import requests
 def parse_text(content: str) -> str:
     return content
 def parse_mp3(content: bytes) -> str:
+    # Faster than load & run a whisper model
+    # by ourselves or using the SpechToTextTool
     speech_to_text = Tool.from_space(
         "maguid28/TranscriptTool",
         name="transcription_tool",
+        description="Transcribe speech to text",
     )
     with open("audio.mp3", "wb") as fw:
         fw.write(content)
     return speech_to_text("audio.mp3")
 def download(task_id: str) -> bytes:
     response = requests.get(FILE_URL.format(task_id=task_id))
     return response.content
 FILE_URL = f"{DEFAULT_API_URL}/files/{{task_id}}"
 # Tools
+TOOLS = [
+    GoogleSearchTool(provider="serper"),
+    PythonInterpreterTool(),
+    VisitWebpageTool(max_output_length=5000),
 ]
+# DuckDuckGoSearchTool()
+AUTHORIZED_IMPORTS = ["json", "pandas", "numpy", "datetime", "requests", "bs4"]
 class ReactAgent:
     def __init__(self):
         model = OpenAIServerModel(
+            model_id="gpt-4o",
             api_key=os.environ["OPENAI_API_KEY"],
             temperature=0,
         )
             user_question = question["question"]
             if file_info["type"] == "image":
                 return self.agent.run(user_question, images=[parsed_content])
+            print(parsed_content)
             user_question = (
                 f"{user_question}\n"
                 f"Here is the content of the file you have to consider to answer the question:\n"
         prev_system_prompt = self.agent.system_prompt
         prompt_prefix = prev_system_prompt.split("Now Begin!")[0].strip()
         gaia_answer_rules = """\n\nYour final answer should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
+        gaia_answer_rules += """ You must wrap it in the ```code``` block by using the `final_answer` tool or your mom will die."""
         system_prompt = prompt_prefix + gaia_answer_rules + "\n\nNow Begin!"
         self.agent.system_prompt = system_prompt

app.py CHANGED Viewed

@@ -29,12 +29,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent ( modify this part to create your agent)
-    try:
-        agent = ReactAgent()
-    except Exception as e:
-        print(f"Error instantiating agent: {e}")
-        return f"Error initializing agent: {e}", None
     # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
@@ -65,6 +59,8 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:

     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
     # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
+        # Instantiate the agent for each question in the data (new context for each question!)
+        agent = ReactAgent()
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:

requirements.txt CHANGED Viewed

@@ -8,7 +8,9 @@ langchain-experimental
 duckduckgo-search
 langchain
 pydantic
-smolagents
 pandas
 openpyxl
-tabulate

 duckduckgo-search
 langchain
 pydantic
+smolagents[transformers,audio]
 pandas
 openpyxl
+tabulate
+bs4
+transformers