core_leaderboard

Running

App Files Files Community

benediktstroebl commited on Aug 11, 2024

Commit

f9140ad

1 Parent(s): 7bf3598

new data structure with global dict for faster processing

Browse files

Files changed (1) hide show

app.py +30 -18

app.py CHANGED Viewed

@@ -37,22 +37,31 @@ def download_latest_results():
 abs_path = Path(__file__).parent
-def load_analyzed_traces(agent_name, benchmark_name):
     processed_dir = abs_path / "evals_live"
-    try:
-        for file in processed_dir.glob('*.json'):
             with open(file, 'r') as f:
                 data = json.load(f)
-                if data['config']['agent_name'] == agent_name and data['config']['benchmark_name'] == benchmark_name:
-                    assert type(data['raw_logging_results']) == dict, f"Invalid format for raw_logging_results: {type(data['raw_logging_results'])}"
-                    return data['raw_logging_results']
-    except AssertionError as e:
-        return None
-    except Exception as e:
-        print(f"Error loading analyzed traces: {e}")
-        return None
 def update_agent_dropdown(benchmark_name, metric):
     df = parse_json_files(os.path.join(abs_path, "evals_live"), benchmark_name)
@@ -68,7 +77,7 @@ def update_task_analysis(benchmark_name, agent_name):
     if not agent_name:
         return "Please select an agent.", None, None, ""
-    analyzed_traces = load_analyzed_traces(agent_name, benchmark_name)
     if not analyzed_traces:
         return f"No analysis available for agent: {agent_name}", None, None, ""
@@ -80,7 +89,7 @@ def update_task_details(benchmark_name, agent_name, task_id):
     if not task_id:
         return "Please select a task.", None, ""
-    analyzed_traces = load_analyzed_traces(agent_name, benchmark_name)
     if not analyzed_traces or task_id not in analyzed_traces:
         return f"No analysis available for task: {task_id}", None, ""
@@ -276,7 +285,7 @@ with gr.Blocks() as demo:
                 raw_call_details = gr.HTML()
             def update_raw_task_dropdown(agent_name):
-                analyzed_traces = load_analyzed_traces(agent_name, "usaco")
                 if not analyzed_traces:
                     return gr.Dropdown(choices=[], label="Select Task"), gr.Dropdown(choices=[], label="Select Step"), f"No raw predictions data available for agent: {agent_name}."
                 task_ids = list(analyzed_traces.keys())
@@ -284,14 +293,14 @@ with gr.Blocks() as demo:
                 return gr.Dropdown(choices=task_ids, label="Select Task", value=task_ids[0]), gr.Dropdown(choices=[(f"Step {i+1}", i) for i in range(len(steps))], label="Select Step", value=0), update_raw_call_details(agent_name, task_ids[0], 0)
             def update_raw_step_dropdown(agent_name, task_id):
-                analyzed_traces = load_analyzed_traces(agent_name, "usaco")
                 if not analyzed_traces or task_id not in analyzed_traces:
                     return gr.Dropdown(choices=[], label="Select Step", value="No data available.")
                 steps = analyzed_traces[task_id]['steps']
                 return gr.Dropdown(choices=[(f"Step {i+1}", i) for i in range(len(steps))], label="Select Step", value=0)
             def update_raw_call_details(agent_name, task_id, step_index):
-                analyzed_traces = load_analyzed_traces(agent_name, "usaco")
                 if not analyzed_traces or task_id not in analyzed_traces:
                     return "No data available for this selection."
                 steps = analyzed_traces[task_id]['steps']
@@ -330,6 +339,9 @@ with gr.Blocks() as demo:
 async def main():
     # Download the results from the Hugging Face Hub
     await asyncio.to_thread(download_latest_results)

 abs_path = Path(__file__).parent
+# Global variable to store preprocessed data
+preprocessed_traces = {}
+def preprocess_traces():
+    global preprocessed_traces
     processed_dir = abs_path / "evals_live"
+    for file in processed_dir.glob('*.json'):
+        try:
             with open(file, 'r') as f:
                 data = json.load(f)
+                agent_name = data['config']['agent_name']
+                benchmark_name = data['config']['benchmark_name']
+                if benchmark_name not in preprocessed_traces:
+                    preprocessed_traces[benchmark_name] = {}
+                assert type(data['raw_logging_results']) == dict, f"Invalid format for raw_logging_results: {type(data['raw_logging_results'])}"
+                preprocessed_traces[benchmark_name][agent_name] = data['raw_logging_results']
+        except AssertionError as e:
+            preprocessed_traces[benchmark_name][agent_name] = None
+        except Exception as e:
+            print(f"Error preprocessing {file}: {e}")
+            preprocessed_traces[benchmark_name][agent_name] = None
+def get_analyzed_traces(agent_name, benchmark_name):
+    return preprocessed_traces.get(benchmark_name, {}).get(agent_name)
 def update_agent_dropdown(benchmark_name, metric):
     df = parse_json_files(os.path.join(abs_path, "evals_live"), benchmark_name)
     if not agent_name:
         return "Please select an agent.", None, None, ""
+    analyzed_traces = get_analyzed_traces(agent_name, benchmark_name)
     if not analyzed_traces:
         return f"No analysis available for agent: {agent_name}", None, None, ""
     if not task_id:
         return "Please select a task.", None, ""
+    analyzed_traces = get_analyzed_traces(agent_name, benchmark_name)
     if not analyzed_traces or task_id not in analyzed_traces:
         return f"No analysis available for task: {task_id}", None, ""
                 raw_call_details = gr.HTML()
             def update_raw_task_dropdown(agent_name):
+                analyzed_traces = get_analyzed_traces(agent_name, "usaco")
                 if not analyzed_traces:
                     return gr.Dropdown(choices=[], label="Select Task"), gr.Dropdown(choices=[], label="Select Step"), f"No raw predictions data available for agent: {agent_name}."
                 task_ids = list(analyzed_traces.keys())
                 return gr.Dropdown(choices=task_ids, label="Select Task", value=task_ids[0]), gr.Dropdown(choices=[(f"Step {i+1}", i) for i in range(len(steps))], label="Select Step", value=0), update_raw_call_details(agent_name, task_ids[0], 0)
             def update_raw_step_dropdown(agent_name, task_id):
+                analyzed_traces = get_analyzed_traces(agent_name, "usaco")
                 if not analyzed_traces or task_id not in analyzed_traces:
                     return gr.Dropdown(choices=[], label="Select Step", value="No data available.")
                 steps = analyzed_traces[task_id]['steps']
                 return gr.Dropdown(choices=[(f"Step {i+1}", i) for i in range(len(steps))], label="Select Step", value=0)
             def update_raw_call_details(agent_name, task_id, step_index):
+                analyzed_traces = get_analyzed_traces(agent_name, "usaco")
                 if not analyzed_traces or task_id not in analyzed_traces:
                     return "No data available for this selection."
                 steps = analyzed_traces[task_id]['steps']
 async def main():
+    # Preprocess traces
+    preprocess_traces()
     # Download the results from the Hugging Face Hub
     await asyncio.to_thread(download_latest_results)