benediktstroebl commited on
Commit
73428db
·
unverified ·
1 Parent(s): 317b884

Delete evals_live/usaco_usaco_example_agent_1722871527.json

Browse files
evals_live/usaco_usaco_example_agent_1722871527.json DELETED
@@ -1,127 +0,0 @@
1
- {
2
- "config": {
3
- "agent_name": "usaco_example_agent_2",
4
- "benchmark_name": "usaco",
5
- "date": "2024-08-05",
6
- "run_id": "usaco_usaco_example_agent_1722871527"
7
- },
8
- "results": {
9
- "accuracy": 0.420,
10
- "total_cost": 1.42
11
- },
12
- "raw_eval_results": {
13
- "rdict": {
14
- "1333_platinum_good_bitstrings": [
15
- {
16
- "result_type": 7,
17
- "status": "No submission, error during judging",
18
- "judge_output": "No submission, error during judging",
19
- "num_passed": 0,
20
- "fraction_passed": 0,
21
- "result_list": null,
22
- "num_tests": 10,
23
- "problem_id": "1333_platinum_good_bitstrings"
24
- }
25
- ]
26
- },
27
- "sdict": {
28
- "1333_platinum_good_bitstrings": [
29
- {
30
- "solution_code": "test",
31
- "result": {
32
- "result_type": 7,
33
- "status": "No submission, error during judging",
34
- "judge_output": "No submission, error during judging",
35
- "num_passed": 0,
36
- "fraction_passed": 0,
37
- "result_list": null,
38
- "num_tests": 10,
39
- "problem_id": "1333_platinum_good_bitstrings"
40
- },
41
- "problem_id": "1333_platinum_good_bitstrings"
42
- }
43
- ]
44
- },
45
- "rs": [
46
- [
47
- {
48
- "result_type": 7,
49
- "status": "No submission, error during judging",
50
- "judge_output": "No submission, error during judging",
51
- "num_passed": 0,
52
- "fraction_passed": 0,
53
- "result_list": null,
54
- "num_tests": 10,
55
- "problem_id": "1333_platinum_good_bitstrings"
56
- }
57
- ]
58
- ],
59
- "ss": [
60
- [
61
- {
62
- "solution_code": "test",
63
- "result": {
64
- "result_type": 7,
65
- "status": "No submission, error during judging",
66
- "judge_output": "No submission, error during judging",
67
- "num_passed": 0,
68
- "fraction_passed": 0,
69
- "result_list": null,
70
- "num_tests": 10,
71
- "problem_id": "1333_platinum_good_bitstrings"
72
- },
73
- "problem_id": "1333_platinum_good_bitstrings"
74
- }
75
- ]
76
- ]
77
- },
78
- "raw_logging_results": [
79
- {
80
- "task_id": "1333_platinum_good_bitstrings",
81
- "trace_id": "3aaa346a-30ee-4cb6-9b6d-e59930656d45",
82
- "project_id": "citp_agent_eval/usaco_1722871516",
83
- "inputs": {
84
- "self": "<openai.resources.chat.completions.Completions object at 0x75aea89672e0>",
85
- "messages": [
86
- {
87
- "role": "user",
88
- "content": "test"
89
- }
90
- ],
91
- "model": "gpt-4o-mini-2024-07-18",
92
- "max_tokens": 2000,
93
- "n": 1,
94
- "temperature": 1
95
- },
96
- "id": "9a995abc-5d34-478e-86b2-46ea71a55a96",
97
- "outputs": [
98
- "Test received! How can I assist you today?"
99
- ],
100
- "exception": null,
101
- "summary": {
102
- "usage": {
103
- "gpt-4o-mini-2024-07-18": {
104
- "requests": 1,
105
- "completion_tokens": 10,
106
- "prompt_tokens": 8,
107
- "total_tokens": 18
108
- }
109
- }
110
- },
111
- "display_name": null,
112
- "attributes": {
113
- "weave": {
114
- "client_version": "0.50.13",
115
- "source": "python-sdk",
116
- "os_name": "Linux",
117
- "os_version": "#10-Ubuntu SMP Mon Jun 17 15:31:00 UTC 2024",
118
- "os_release": "6.8.0-1010-azure",
119
- "sys_version": "3.9.19 (main, May 6 2024, 19:43:03) \n[GCC 11.2.0]"
120
- },
121
- "task_id": "1333_platinum_good_bitstrings"
122
- },
123
- "_children": [],
124
- "_feedback": null
125
- }
126
- ]
127
- }