Spaces:
Running
Running
Commit
·
f9140ad
1
Parent(s):
7bf3598
new data structure with global dict for faster processing
Browse files
app.py
CHANGED
@@ -37,22 +37,31 @@ def download_latest_results():
|
|
37 |
abs_path = Path(__file__).parent
|
38 |
|
39 |
|
40 |
-
|
|
|
|
|
|
|
|
|
41 |
processed_dir = abs_path / "evals_live"
|
42 |
-
|
43 |
-
|
44 |
with open(file, 'r') as f:
|
45 |
data = json.load(f)
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
def update_agent_dropdown(benchmark_name, metric):
|
58 |
df = parse_json_files(os.path.join(abs_path, "evals_live"), benchmark_name)
|
@@ -68,7 +77,7 @@ def update_task_analysis(benchmark_name, agent_name):
|
|
68 |
if not agent_name:
|
69 |
return "Please select an agent.", None, None, ""
|
70 |
|
71 |
-
analyzed_traces =
|
72 |
if not analyzed_traces:
|
73 |
return f"No analysis available for agent: {agent_name}", None, None, ""
|
74 |
|
@@ -80,7 +89,7 @@ def update_task_details(benchmark_name, agent_name, task_id):
|
|
80 |
if not task_id:
|
81 |
return "Please select a task.", None, ""
|
82 |
|
83 |
-
analyzed_traces =
|
84 |
if not analyzed_traces or task_id not in analyzed_traces:
|
85 |
return f"No analysis available for task: {task_id}", None, ""
|
86 |
|
@@ -276,7 +285,7 @@ with gr.Blocks() as demo:
|
|
276 |
raw_call_details = gr.HTML()
|
277 |
|
278 |
def update_raw_task_dropdown(agent_name):
|
279 |
-
analyzed_traces =
|
280 |
if not analyzed_traces:
|
281 |
return gr.Dropdown(choices=[], label="Select Task"), gr.Dropdown(choices=[], label="Select Step"), f"No raw predictions data available for agent: {agent_name}."
|
282 |
task_ids = list(analyzed_traces.keys())
|
@@ -284,14 +293,14 @@ with gr.Blocks() as demo:
|
|
284 |
return gr.Dropdown(choices=task_ids, label="Select Task", value=task_ids[0]), gr.Dropdown(choices=[(f"Step {i+1}", i) for i in range(len(steps))], label="Select Step", value=0), update_raw_call_details(agent_name, task_ids[0], 0)
|
285 |
|
286 |
def update_raw_step_dropdown(agent_name, task_id):
|
287 |
-
analyzed_traces =
|
288 |
if not analyzed_traces or task_id not in analyzed_traces:
|
289 |
return gr.Dropdown(choices=[], label="Select Step", value="No data available.")
|
290 |
steps = analyzed_traces[task_id]['steps']
|
291 |
return gr.Dropdown(choices=[(f"Step {i+1}", i) for i in range(len(steps))], label="Select Step", value=0)
|
292 |
|
293 |
def update_raw_call_details(agent_name, task_id, step_index):
|
294 |
-
analyzed_traces =
|
295 |
if not analyzed_traces or task_id not in analyzed_traces:
|
296 |
return "No data available for this selection."
|
297 |
steps = analyzed_traces[task_id]['steps']
|
@@ -330,6 +339,9 @@ with gr.Blocks() as demo:
|
|
330 |
|
331 |
|
332 |
async def main():
|
|
|
|
|
|
|
333 |
# Download the results from the Hugging Face Hub
|
334 |
await asyncio.to_thread(download_latest_results)
|
335 |
|
|
|
37 |
abs_path = Path(__file__).parent
|
38 |
|
39 |
|
40 |
+
# Global variable to store preprocessed data
|
41 |
+
preprocessed_traces = {}
|
42 |
+
|
43 |
+
def preprocess_traces():
|
44 |
+
global preprocessed_traces
|
45 |
processed_dir = abs_path / "evals_live"
|
46 |
+
for file in processed_dir.glob('*.json'):
|
47 |
+
try:
|
48 |
with open(file, 'r') as f:
|
49 |
data = json.load(f)
|
50 |
+
agent_name = data['config']['agent_name']
|
51 |
+
benchmark_name = data['config']['benchmark_name']
|
52 |
+
if benchmark_name not in preprocessed_traces:
|
53 |
+
preprocessed_traces[benchmark_name] = {}
|
54 |
+
|
55 |
+
assert type(data['raw_logging_results']) == dict, f"Invalid format for raw_logging_results: {type(data['raw_logging_results'])}"
|
56 |
+
preprocessed_traces[benchmark_name][agent_name] = data['raw_logging_results']
|
57 |
+
except AssertionError as e:
|
58 |
+
preprocessed_traces[benchmark_name][agent_name] = None
|
59 |
+
except Exception as e:
|
60 |
+
print(f"Error preprocessing {file}: {e}")
|
61 |
+
preprocessed_traces[benchmark_name][agent_name] = None
|
62 |
+
|
63 |
+
def get_analyzed_traces(agent_name, benchmark_name):
|
64 |
+
return preprocessed_traces.get(benchmark_name, {}).get(agent_name)
|
65 |
|
66 |
def update_agent_dropdown(benchmark_name, metric):
|
67 |
df = parse_json_files(os.path.join(abs_path, "evals_live"), benchmark_name)
|
|
|
77 |
if not agent_name:
|
78 |
return "Please select an agent.", None, None, ""
|
79 |
|
80 |
+
analyzed_traces = get_analyzed_traces(agent_name, benchmark_name)
|
81 |
if not analyzed_traces:
|
82 |
return f"No analysis available for agent: {agent_name}", None, None, ""
|
83 |
|
|
|
89 |
if not task_id:
|
90 |
return "Please select a task.", None, ""
|
91 |
|
92 |
+
analyzed_traces = get_analyzed_traces(agent_name, benchmark_name)
|
93 |
if not analyzed_traces or task_id not in analyzed_traces:
|
94 |
return f"No analysis available for task: {task_id}", None, ""
|
95 |
|
|
|
285 |
raw_call_details = gr.HTML()
|
286 |
|
287 |
def update_raw_task_dropdown(agent_name):
|
288 |
+
analyzed_traces = get_analyzed_traces(agent_name, "usaco")
|
289 |
if not analyzed_traces:
|
290 |
return gr.Dropdown(choices=[], label="Select Task"), gr.Dropdown(choices=[], label="Select Step"), f"No raw predictions data available for agent: {agent_name}."
|
291 |
task_ids = list(analyzed_traces.keys())
|
|
|
293 |
return gr.Dropdown(choices=task_ids, label="Select Task", value=task_ids[0]), gr.Dropdown(choices=[(f"Step {i+1}", i) for i in range(len(steps))], label="Select Step", value=0), update_raw_call_details(agent_name, task_ids[0], 0)
|
294 |
|
295 |
def update_raw_step_dropdown(agent_name, task_id):
|
296 |
+
analyzed_traces = get_analyzed_traces(agent_name, "usaco")
|
297 |
if not analyzed_traces or task_id not in analyzed_traces:
|
298 |
return gr.Dropdown(choices=[], label="Select Step", value="No data available.")
|
299 |
steps = analyzed_traces[task_id]['steps']
|
300 |
return gr.Dropdown(choices=[(f"Step {i+1}", i) for i in range(len(steps))], label="Select Step", value=0)
|
301 |
|
302 |
def update_raw_call_details(agent_name, task_id, step_index):
|
303 |
+
analyzed_traces = get_analyzed_traces(agent_name, "usaco")
|
304 |
if not analyzed_traces or task_id not in analyzed_traces:
|
305 |
return "No data available for this selection."
|
306 |
steps = analyzed_traces[task_id]['steps']
|
|
|
339 |
|
340 |
|
341 |
async def main():
|
342 |
+
# Preprocess traces
|
343 |
+
preprocess_traces()
|
344 |
+
|
345 |
# Download the results from the Hugging Face Hub
|
346 |
await asyncio.to_thread(download_latest_results)
|
347 |
|