Spaces:
Running
Running
# This file contains information about verified agent results for different benchmarks. | |
# Format: | |
# benchmark_name: | |
# - agent_name: "Name of the agent" | |
# verification_date: YYYY-MM-DD | |
corebench_easy: | |
- agent_name: "AutoGPT (GPT-4o)" | |
verification_date: 2024-09-28 | |
- agent_name: "AutoGPT (GPT-4o-mini)" | |
verification_date: 2024-09-28 | |
- agent_name: "CORE-Agent (GPT-4o)" | |
verification_date: 2024-09-28 | |
- agent_name: "CORE-Agent (GPT-4o-mini)" | |
verification_date: 2024-09-28 | |
corebench_medium: | |
- agent_name: "AutoGPT (GPT-4o)" | |
verification_date: 2024-09-28 | |
- agent_name: "AutoGPT (GPT-4o-mini)" | |
verification_date: 2024-09-28 | |
- agent_name: "CORE-Agent (GPT-4o)" | |
verification_date: 2024-09-28 | |
- agent_name: "CORE-Agent (GPT-4o-mini)" | |
verification_date: 2024-09-28 | |
corebench_hard: | |
- agent_name: "AutoGPT (GPT-4o)" | |
verification_date: 2024-09-28 | |
- agent_name: "AutoGPT (GPT-4o-mini)" | |
verification_date: 2024-09-28 | |
- agent_name: "CORE-Agent (GPT-4o)" | |
verification_date: 2024-09-28 | |
- agent_name: "CORE-Agent (GPT-4o-mini)" | |
verification_date: 2024-09-28 |