José Ángel González commited on
Commit
e0d808c
·
1 Parent(s): 1665a27

add more tools and fixes

Browse files
Files changed (3) hide show
  1. agents/react_agent.py +18 -20
  2. app.py +2 -6
  3. requirements.txt +4 -2
agents/react_agent.py CHANGED
@@ -15,9 +15,11 @@ Based on two pillars:
15
  from smolagents import (
16
  OpenAIServerModel,
17
  CodeAgent,
18
- DuckDuckGoSearchTool,
 
19
  PythonInterpreterTool,
20
- Tool
 
21
  )
22
  from PIL import Image
23
  import requests
@@ -39,16 +41,20 @@ def parse_excel(content: bytes) -> str:
39
  def parse_text(content: str) -> str:
40
  return content
41
 
 
42
  def parse_mp3(content: bytes) -> str:
 
 
43
  speech_to_text = Tool.from_space(
44
  "maguid28/TranscriptTool",
45
  name="transcription_tool",
46
- description="Transcribe speech to text"
47
  )
48
  with open("audio.mp3", "wb") as fw:
49
  fw.write(content)
50
  return speech_to_text("audio.mp3")
51
 
 
52
  def download(task_id: str) -> bytes:
53
  response = requests.get(FILE_URL.format(task_id=task_id))
54
  return response.content
@@ -73,22 +79,19 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
73
  FILE_URL = f"{DEFAULT_API_URL}/files/{{task_id}}"
74
 
75
  # Tools
76
- TOOLS = [DuckDuckGoSearchTool(), PythonInterpreterTool()]
77
- AUTHORIZED_IMPORTS = [
78
- "geopandas",
79
- "plotly",
80
- "shapely",
81
- "json",
82
- "pandas",
83
- "numpy",
84
- "datetime",
85
  ]
 
 
86
 
87
 
88
  class ReactAgent:
89
  def __init__(self):
90
  model = OpenAIServerModel(
91
- model_id="gpt-4.5-preview",
92
  api_key=os.environ["OPENAI_API_KEY"],
93
  temperature=0,
94
  )
@@ -111,7 +114,7 @@ class ReactAgent:
111
  user_question = question["question"]
112
  if file_info["type"] == "image":
113
  return self.agent.run(user_question, images=[parsed_content])
114
-
115
  user_question = (
116
  f"{user_question}\n"
117
  f"Here is the content of the file you have to consider to answer the question:\n"
@@ -125,11 +128,6 @@ class ReactAgent:
125
  prev_system_prompt = self.agent.system_prompt
126
  prompt_prefix = prev_system_prompt.split("Now Begin!")[0].strip()
127
  gaia_answer_rules = """\n\nYour final answer should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
128
- gaia_answer_rules = """It is crucial that you wrap your final answer in the ```code``` block by using the `final_answer` tool or your mom will die."""
129
  system_prompt = prompt_prefix + gaia_answer_rules + "\n\nNow Begin!"
130
  self.agent.system_prompt = system_prompt
131
-
132
- if __name__ == "__main__":
133
- question = {'task_id': '8e867cd7-cff9-4e6c-867a-ff5ddc2550be', 'question': 'How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.', 'Level': '1', 'file_name': ''}
134
- agent = ReactAgent()
135
- response = agent(question)
 
15
  from smolagents import (
16
  OpenAIServerModel,
17
  CodeAgent,
18
+ # DuckDuckGoSearchTool,
19
+ GoogleSearchTool,
20
  PythonInterpreterTool,
21
+ VisitWebpageTool,
22
+ Tool,
23
  )
24
  from PIL import Image
25
  import requests
 
41
  def parse_text(content: str) -> str:
42
  return content
43
 
44
+
45
  def parse_mp3(content: bytes) -> str:
46
+ # Faster than load & run a whisper model
47
+ # by ourselves or using the SpechToTextTool
48
  speech_to_text = Tool.from_space(
49
  "maguid28/TranscriptTool",
50
  name="transcription_tool",
51
+ description="Transcribe speech to text",
52
  )
53
  with open("audio.mp3", "wb") as fw:
54
  fw.write(content)
55
  return speech_to_text("audio.mp3")
56
 
57
+
58
  def download(task_id: str) -> bytes:
59
  response = requests.get(FILE_URL.format(task_id=task_id))
60
  return response.content
 
79
  FILE_URL = f"{DEFAULT_API_URL}/files/{{task_id}}"
80
 
81
  # Tools
82
+ TOOLS = [
83
+ GoogleSearchTool(provider="serper"),
84
+ PythonInterpreterTool(),
85
+ VisitWebpageTool(max_output_length=5000),
 
 
 
 
 
86
  ]
87
+ # DuckDuckGoSearchTool()
88
+ AUTHORIZED_IMPORTS = ["json", "pandas", "numpy", "datetime", "requests", "bs4"]
89
 
90
 
91
  class ReactAgent:
92
  def __init__(self):
93
  model = OpenAIServerModel(
94
+ model_id="gpt-4o",
95
  api_key=os.environ["OPENAI_API_KEY"],
96
  temperature=0,
97
  )
 
114
  user_question = question["question"]
115
  if file_info["type"] == "image":
116
  return self.agent.run(user_question, images=[parsed_content])
117
+ print(parsed_content)
118
  user_question = (
119
  f"{user_question}\n"
120
  f"Here is the content of the file you have to consider to answer the question:\n"
 
128
  prev_system_prompt = self.agent.system_prompt
129
  prompt_prefix = prev_system_prompt.split("Now Begin!")[0].strip()
130
  gaia_answer_rules = """\n\nYour final answer should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
131
+ gaia_answer_rules += """ You must wrap it in the ```code``` block by using the `final_answer` tool or your mom will die."""
132
  system_prompt = prompt_prefix + gaia_answer_rules + "\n\nNow Begin!"
133
  self.agent.system_prompt = system_prompt
 
 
 
 
 
app.py CHANGED
@@ -29,12 +29,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
29
  questions_url = f"{api_url}/questions"
30
  submit_url = f"{api_url}/submit"
31
 
32
- # 1. Instantiate Agent ( modify this part to create your agent)
33
- try:
34
- agent = ReactAgent()
35
- except Exception as e:
36
- print(f"Error instantiating agent: {e}")
37
- return f"Error initializing agent: {e}", None
38
  # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
39
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
40
  print(agent_code)
@@ -65,6 +59,8 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
65
  answers_payload = []
66
  print(f"Running agent on {len(questions_data)} questions...")
67
  for item in questions_data:
 
 
68
  task_id = item.get("task_id")
69
  question_text = item.get("question")
70
  if not task_id or question_text is None:
 
29
  questions_url = f"{api_url}/questions"
30
  submit_url = f"{api_url}/submit"
31
 
 
 
 
 
 
 
32
  # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
33
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
34
  print(agent_code)
 
59
  answers_payload = []
60
  print(f"Running agent on {len(questions_data)} questions...")
61
  for item in questions_data:
62
+ # Instantiate the agent for each question in the data (new context for each question!)
63
+ agent = ReactAgent()
64
  task_id = item.get("task_id")
65
  question_text = item.get("question")
66
  if not task_id or question_text is None:
requirements.txt CHANGED
@@ -8,7 +8,9 @@ langchain-experimental
8
  duckduckgo-search
9
  langchain
10
  pydantic
11
- smolagents
12
  pandas
13
  openpyxl
14
- tabulate
 
 
 
8
  duckduckgo-search
9
  langchain
10
  pydantic
11
+ smolagents[transformers,audio]
12
  pandas
13
  openpyxl
14
+ tabulate
15
+ bs4
16
+ transformers