Spaces:
Running
Running
Commit
·
ad4ec76
1
Parent(s):
575c750
updated visibility feature
Browse files- evals/usaco_traces/task_analyses.json +8 -2
- utils.py +31 -3
evals/usaco_traces/task_analyses.json
CHANGED
@@ -51,7 +51,10 @@
|
|
51 |
},
|
52 |
"analysis": {
|
53 |
"description": "In this step, the AI agent processed a user request to provide a Python solution to the specified problem regarding the function 'gen_string(a, b)'. The agent broke down the task into several steps, including: restating the problem in plain English, conceptualizing a solution, providing pseudocode, and finally outputting the complete Python code with comments explaining each part of the solution. The agent ensured that the output code adhered to the user's specifications regarding format and structure.",
|
54 |
-
"assessment": "The agent made progress by successfully addressing all parts of the task as requested by the user, providing a structured and clear solution to the problem. There are no indications that the agent got stuck or made a mistake."
|
|
|
|
|
|
|
55 |
}
|
56 |
},
|
57 |
{
|
@@ -104,7 +107,10 @@
|
|
104 |
},
|
105 |
"analysis": {
|
106 |
"description": "In this step, the AI agent provided a structured approach to solve a programming problem involving generating a specific bitstring and counting its 'good prefixes'. It restated the problem in plain English, conceptualized a solution, wrote pseudocode outlining the logic, and then provided a complete Python solution with comments for clarity. The output is formatted with Python Markdown code blocks as requested.",
|
107 |
-
"assessment": "The agent made significant progress by thoroughly addressing the task requirements, breaking down the problem systematically, and providing a well-defined solution. There was no indication of getting stuck or making mistakes."
|
|
|
|
|
|
|
108 |
}
|
109 |
}
|
110 |
],
|
|
|
51 |
},
|
52 |
"analysis": {
|
53 |
"description": "In this step, the AI agent processed a user request to provide a Python solution to the specified problem regarding the function 'gen_string(a, b)'. The agent broke down the task into several steps, including: restating the problem in plain English, conceptualizing a solution, providing pseudocode, and finally outputting the complete Python code with comments explaining each part of the solution. The agent ensured that the output code adhered to the user's specifications regarding format and structure.",
|
54 |
+
"assessment": "The agent made progress by successfully addressing all parts of the task as requested by the user, providing a structured and clear solution to the problem. There are no indications that the agent got stuck or made a mistake.",
|
55 |
+
"success": true,
|
56 |
+
"action_type": "plan",
|
57 |
+
"step_outline": "Structure problem"
|
58 |
}
|
59 |
},
|
60 |
{
|
|
|
107 |
},
|
108 |
"analysis": {
|
109 |
"description": "In this step, the AI agent provided a structured approach to solve a programming problem involving generating a specific bitstring and counting its 'good prefixes'. It restated the problem in plain English, conceptualized a solution, wrote pseudocode outlining the logic, and then provided a complete Python solution with comments for clarity. The output is formatted with Python Markdown code blocks as requested.",
|
110 |
+
"assessment": "The agent made significant progress by thoroughly addressing the task requirements, breaking down the problem systematically, and providing a well-defined solution. There was no indication of getting stuck or making mistakes.",
|
111 |
+
"success": false,
|
112 |
+
"action_type": "retrieve",
|
113 |
+
"step_outline": "Retrieve context from textbook"
|
114 |
}
|
115 |
}
|
116 |
],
|
utils.py
CHANGED
@@ -199,8 +199,14 @@ def create_flow_chart(steps):
|
|
199 |
margin=dict(b=20,l=5,r=5,t=40),
|
200 |
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
|
201 |
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
|
202 |
-
plot_bgcolor='white',
|
203 |
-
paper_bgcolor='white'
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
)
|
205 |
|
206 |
fig = go.Figure(data=[edge_trace, node_trace], layout=layout)
|
@@ -231,6 +237,28 @@ def create_flow_chart(steps):
|
|
231 |
bgcolor='rgba(255,255,255,0.8)', # Set legend background to slightly transparent white
|
232 |
bordercolor='rgba(0,0,0,0.1)', # Add a light border to the legend
|
233 |
borderwidth=1
|
234 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
|
236 |
return fig
|
|
|
199 |
margin=dict(b=20,l=5,r=5,t=40),
|
200 |
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
|
201 |
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
|
202 |
+
plot_bgcolor='white',
|
203 |
+
paper_bgcolor='white',
|
204 |
+
modebar=dict(
|
205 |
+
activecolor='#1f77b4', # Color of active tool
|
206 |
+
orientation='h', # Vertical orientation
|
207 |
+
bgcolor='rgba(255,255,255,0.8)', # Slightly transparent white background
|
208 |
+
color='#777', # Color of inactive tools
|
209 |
+
)
|
210 |
)
|
211 |
|
212 |
fig = go.Figure(data=[edge_trace, node_trace], layout=layout)
|
|
|
237 |
bgcolor='rgba(255,255,255,0.8)', # Set legend background to slightly transparent white
|
238 |
bordercolor='rgba(0,0,0,0.1)', # Add a light border to the legend
|
239 |
borderwidth=1
|
240 |
+
),
|
241 |
+
dragmode='pan'
|
242 |
+
)
|
243 |
+
|
244 |
+
config = {
|
245 |
+
'add': ['pan2d'],
|
246 |
+
'remove': [
|
247 |
+
'zoom2d',
|
248 |
+
'zoomIn2d',
|
249 |
+
'zoomOut2d',
|
250 |
+
'resetScale2d',
|
251 |
+
'hoverClosestCartesian',
|
252 |
+
'hoverCompareCartesian',
|
253 |
+
'toggleSpikelines',
|
254 |
+
'lasso2d',
|
255 |
+
'lasso',
|
256 |
+
'select2d',
|
257 |
+
'select',
|
258 |
+
]
|
259 |
+
}
|
260 |
+
|
261 |
+
# Apply the config to the figure
|
262 |
+
fig.update_layout(modebar=config)
|
263 |
|
264 |
return fig
|