# This file contains information about verified agent results for different benchmarks. # Format: # benchmark_name: # - agent_name: "Name of the agent" # verification_date: YYYY-MM-DD usaco: - agent_name: "USACO Reflexion + Episodic (gpt-4o-mini-2024-07-18)" verification_date: 2024-08-20 - agent_name: "USACO Reflexion + Episodic + Semantic (gpt-4o-mini-2024-07-18)" verification_date: 2024-08-20 - agent_name: "USACO Reflexion (gpt-4o-mini-2024-07-18)" verification_date: 2024-08-20 - agent_name: "USACO Episodic (gpt-4o-mini-2024-07-18)" verification_date: 2024-08-12 - agent_name: "USACO Reflexion + Semantic (gpt-4o-mini-2024-07-18)" verification_date: 2024-08-20 - agent_name: "USACO Zero-shot (gpt-4o-mini-2024-07-18)" verification_date: 2024-08-11 - agent_name: "USACO Semantic (gpt-4o-mini-2024-07-18)" verification_date: 2024-08-12 - agent_name: USACO Reflexion + Episodic + Semantic (gpt-4o-2024-05-13) verification_date: 2024-08-25 - agent_name: USACO Reflexion + Episodic (gpt-4o-2024-05-13) verification_date: 2024-08-25 - agent_name: USACO Reflexion + Semantic (gpt-4o-2024-05-13) verification_date: 2024-08-25 - agent_name: Episodic Retrial (2x) (gpt-4o-2024-05-13) verification_date: 2024-08-25 - agent_name: Episodic Retrial (3x) (gpt-4o-mini-2024-07-18) verification_date: 2024-08-25 - agent_name: Episodic Retrial (2x) (gpt-4o-mini-2024-07-18) verification_date: 2024-08-25 - agent_name: Episodic Retrial (5x) (gpt-4o-mini-2024-07-18) verification_date: 2024-08-25 - agent_name: Episodic Warming (3 Steps) (gpt-4o-mini-2024-07-18) verification_date: 2024-08-24 - agent_name: USACO Episodic (gpt-4o-2024-05-13) verification_date: 2024-08-24 - agent_name: USACO Semantic (gpt-4o-2024-05-13) verification_date: 2024-08-24 - agent_name: Zero-shot Retrial (2x) (gpt-4o-mini-2024-07-18) verification_date: 2024-08-24 - agent_name: Zero-shot Retrial (3x) (gpt-4o-mini-2024-07-18) verification_date: 2024-08-24 - agent_name: Zero-shot Retrial (5x) (gpt-4o-mini-2024-07-18) verification_date: 2024-08-24 - agent_name: USACO Zero-shot (gpt-4o-2024-05-13) verification_date: 2024-08-24 swebench_verified: - agent_name: "Agentless (gpt-4o-mini-2024-07-18) (50 Instances)" verification_date: 2024-08-17 - agent_name: "SWE-agent (gpt-4o-mini-2024-07-18) (Cost Limit: $1) (50 Instances)" verification_date: 2024-08-19 mlagentbench: - agent_name: "MLAgentBench ResearchAgent (gpt-4o-mini-2024-07-18)" verification_date: 2024-08-19