benediktstroebl commited on
Commit
e92240d
·
verified ·
1 Parent(s): 280969f

Upload verified_agents.yaml

Browse files
Files changed (1) hide show
  1. verified_agents.yaml +29 -0
verified_agents.yaml CHANGED
@@ -19,6 +19,35 @@ usaco:
19
  verification_date: 2024-08-11
20
  - agent_name: "USACO Semantic (gpt-4o-mini-2024-07-18)"
21
  verification_date: 2024-08-12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  swebench_verified:
24
  - agent_name: "Agentless (gpt-4o-mini-2024-07-18) (50 Instances)"
 
19
  verification_date: 2024-08-11
20
  - agent_name: "USACO Semantic (gpt-4o-mini-2024-07-18)"
21
  verification_date: 2024-08-12
22
+ - agent_name: USACO Reflexion + Episodic + Semantic (gpt-4o-2024-05-13)
23
+ verification_date: 2024-08-25
24
+ - agent_name: USACO Reflexion + Episodic (gpt-4o-2024-05-13)
25
+ verification_date: 2024-08-25
26
+ - agent_name: USACO Reflexion + Semantic (gpt-4o-2024-05-13)
27
+ verification_date: 2024-08-25
28
+ - agent_name: Episodic Retrial (2x) (gpt-4o-2024-05-13)
29
+ verification_date: 2024-08-25
30
+ - agent_name: Episodic Retrial (3x) (gpt-4o-mini-2024-07-18)
31
+ verification_date: 2024-08-25
32
+ - agent_name: Episodic Retrial (2x) (gpt-4o-mini-2024-07-18)
33
+ verification_date: 2024-08-25
34
+ - agent_name: Episodic Retrial (5x) (gpt-4o-mini-2024-07-18)
35
+ verification_date: 2024-08-25
36
+ - agent_name: Episodic Warming (3 Steps) (gpt-4o-mini-2024-07-18)
37
+ verification_date: 2024-08-24
38
+ - agent_name: USACO Episodic (gpt-4o-2024-05-13)
39
+ verification_date: 2024-08-24
40
+ - agent_name: USACO Semantic (gpt-4o-2024-05-13)
41
+ verification_date: 2024-08-24
42
+ - agent_name: Zero-shot Retrial (2x) (gpt-4o-mini-2024-07-18)
43
+ verification_date: 2024-08-24
44
+ - agent_name: Zero-shot Retrial (3x) (gpt-4o-mini-2024-07-18)
45
+ verification_date: 2024-08-24
46
+ - agent_name: Zero-shot Retrial (5x) (gpt-4o-mini-2024-07-18)
47
+ verification_date: 2024-08-24
48
+ - agent_name: USACO Zero-shot (gpt-4o-2024-05-13)
49
+ verification_date: 2024-08-24
50
+
51
 
52
  swebench_verified:
53
  - agent_name: "Agentless (gpt-4o-mini-2024-07-18) (50 Instances)"