core_leaderboard

Running

App Files Files Community

benediktstroebl commited on Aug 9, 2024

Commit

ad4ec76

1 Parent(s): 575c750

updated visibility feature

Browse files

Files changed (2) hide show

evals/usaco_traces/task_analyses.json +8 -2
utils.py +31 -3

evals/usaco_traces/task_analyses.json CHANGED Viewed

@@ -51,7 +51,10 @@
                 },
                 "analysis": {
                     "description": "In this step, the AI agent processed a user request to provide a Python solution to the specified problem regarding the function 'gen_string(a, b)'. The agent broke down the task into several steps, including: restating the problem in plain English, conceptualizing a solution, providing pseudocode, and finally outputting the complete Python code with comments explaining each part of the solution. The agent ensured that the output code adhered to the user's specifications regarding format and structure.",
-                    "assessment": "The agent made progress by successfully addressing all parts of the task as requested by the user, providing a structured and clear solution to the problem. There are no indications that the agent got stuck or made a mistake."
                 }
             },
             {
@@ -104,7 +107,10 @@
                 },
                 "analysis": {
                     "description": "In this step, the AI agent provided a structured approach to solve a programming problem involving generating a specific bitstring and counting its 'good prefixes'. It restated the problem in plain English, conceptualized a solution, wrote pseudocode outlining the logic, and then provided a complete Python solution with comments for clarity. The output is formatted with Python Markdown code blocks as requested.",
-                    "assessment": "The agent made significant progress by thoroughly addressing the task requirements, breaking down the problem systematically, and providing a well-defined solution. There was no indication of getting stuck or making mistakes."
                 }
             }
         ],

                 },
                 "analysis": {
                     "description": "In this step, the AI agent processed a user request to provide a Python solution to the specified problem regarding the function 'gen_string(a, b)'. The agent broke down the task into several steps, including: restating the problem in plain English, conceptualizing a solution, providing pseudocode, and finally outputting the complete Python code with comments explaining each part of the solution. The agent ensured that the output code adhered to the user's specifications regarding format and structure.",
+                    "assessment": "The agent made progress by successfully addressing all parts of the task as requested by the user, providing a structured and clear solution to the problem. There are no indications that the agent got stuck or made a mistake.",
+                    "success": true,
+                    "action_type": "plan",
+                    "step_outline": "Structure problem"
                 }
             },
             {
                 },
                 "analysis": {
                     "description": "In this step, the AI agent provided a structured approach to solve a programming problem involving generating a specific bitstring and counting its 'good prefixes'. It restated the problem in plain English, conceptualized a solution, wrote pseudocode outlining the logic, and then provided a complete Python solution with comments for clarity. The output is formatted with Python Markdown code blocks as requested.",
+                    "assessment": "The agent made significant progress by thoroughly addressing the task requirements, breaking down the problem systematically, and providing a well-defined solution. There was no indication of getting stuck or making mistakes.",
+                    "success": false,
+                    "action_type": "retrieve",
+                    "step_outline": "Retrieve context from textbook"
                 }
             }
         ],

utils.py CHANGED Viewed

@@ -199,8 +199,14 @@ def create_flow_chart(steps):
         margin=dict(b=20,l=5,r=5,t=40),
         xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
         yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
-        plot_bgcolor='white',  # Set plot background color to white
-        paper_bgcolor='white'  # Set paper background color to white
     )
     fig = go.Figure(data=[edge_trace, node_trace], layout=layout)
@@ -231,6 +237,28 @@ def create_flow_chart(steps):
         bgcolor='rgba(255,255,255,0.8)',  # Set legend background to slightly transparent white
         bordercolor='rgba(0,0,0,0.1)',  # Add a light border to the legend
         borderwidth=1
-    ))
     return fig

         margin=dict(b=20,l=5,r=5,t=40),
         xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
         yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
+        plot_bgcolor='white',
+        paper_bgcolor='white',
+        modebar=dict(
+            activecolor='#1f77b4',  # Color of active tool
+            orientation='h',  # Vertical orientation
+            bgcolor='rgba(255,255,255,0.8)',  # Slightly transparent white background
+            color='#777',  # Color of inactive tools
+        )
     )
     fig = go.Figure(data=[edge_trace, node_trace], layout=layout)
         bgcolor='rgba(255,255,255,0.8)',  # Set legend background to slightly transparent white
         bordercolor='rgba(0,0,0,0.1)',  # Add a light border to the legend
         borderwidth=1
+    ),
+    dragmode='pan'
+    )
+    config = {
+        'add': ['pan2d'],
+        'remove': [
+            'zoom2d',
+            'zoomIn2d',
+            'zoomOut2d',
+            'resetScale2d',
+            'hoverClosestCartesian',
+            'hoverCompareCartesian',
+            'toggleSpikelines',
+            'lasso2d',
+            'lasso',
+            'select2d',
+            'select',
+        ]
+    }
+    # Apply the config to the figure
+    fig.update_layout(modebar=config)
     return fig