benediktstroebl commited on
Commit
f9140ad
·
1 Parent(s): 7bf3598

new data structure with global dict for faster processing

Browse files
Files changed (1) hide show
  1. app.py +30 -18
app.py CHANGED
@@ -37,22 +37,31 @@ def download_latest_results():
37
  abs_path = Path(__file__).parent
38
 
39
 
40
- def load_analyzed_traces(agent_name, benchmark_name):
 
 
 
 
41
  processed_dir = abs_path / "evals_live"
42
- try:
43
- for file in processed_dir.glob('*.json'):
44
  with open(file, 'r') as f:
45
  data = json.load(f)
46
- if data['config']['agent_name'] == agent_name and data['config']['benchmark_name'] == benchmark_name:
47
- assert type(data['raw_logging_results']) == dict, f"Invalid format for raw_logging_results: {type(data['raw_logging_results'])}"
48
-
49
- return data['raw_logging_results']
50
-
51
- except AssertionError as e:
52
- return None
53
- except Exception as e:
54
- print(f"Error loading analyzed traces: {e}")
55
- return None
 
 
 
 
 
56
 
57
  def update_agent_dropdown(benchmark_name, metric):
58
  df = parse_json_files(os.path.join(abs_path, "evals_live"), benchmark_name)
@@ -68,7 +77,7 @@ def update_task_analysis(benchmark_name, agent_name):
68
  if not agent_name:
69
  return "Please select an agent.", None, None, ""
70
 
71
- analyzed_traces = load_analyzed_traces(agent_name, benchmark_name)
72
  if not analyzed_traces:
73
  return f"No analysis available for agent: {agent_name}", None, None, ""
74
 
@@ -80,7 +89,7 @@ def update_task_details(benchmark_name, agent_name, task_id):
80
  if not task_id:
81
  return "Please select a task.", None, ""
82
 
83
- analyzed_traces = load_analyzed_traces(agent_name, benchmark_name)
84
  if not analyzed_traces or task_id not in analyzed_traces:
85
  return f"No analysis available for task: {task_id}", None, ""
86
 
@@ -276,7 +285,7 @@ with gr.Blocks() as demo:
276
  raw_call_details = gr.HTML()
277
 
278
  def update_raw_task_dropdown(agent_name):
279
- analyzed_traces = load_analyzed_traces(agent_name, "usaco")
280
  if not analyzed_traces:
281
  return gr.Dropdown(choices=[], label="Select Task"), gr.Dropdown(choices=[], label="Select Step"), f"No raw predictions data available for agent: {agent_name}."
282
  task_ids = list(analyzed_traces.keys())
@@ -284,14 +293,14 @@ with gr.Blocks() as demo:
284
  return gr.Dropdown(choices=task_ids, label="Select Task", value=task_ids[0]), gr.Dropdown(choices=[(f"Step {i+1}", i) for i in range(len(steps))], label="Select Step", value=0), update_raw_call_details(agent_name, task_ids[0], 0)
285
 
286
  def update_raw_step_dropdown(agent_name, task_id):
287
- analyzed_traces = load_analyzed_traces(agent_name, "usaco")
288
  if not analyzed_traces or task_id not in analyzed_traces:
289
  return gr.Dropdown(choices=[], label="Select Step", value="No data available.")
290
  steps = analyzed_traces[task_id]['steps']
291
  return gr.Dropdown(choices=[(f"Step {i+1}", i) for i in range(len(steps))], label="Select Step", value=0)
292
 
293
  def update_raw_call_details(agent_name, task_id, step_index):
294
- analyzed_traces = load_analyzed_traces(agent_name, "usaco")
295
  if not analyzed_traces or task_id not in analyzed_traces:
296
  return "No data available for this selection."
297
  steps = analyzed_traces[task_id]['steps']
@@ -330,6 +339,9 @@ with gr.Blocks() as demo:
330
 
331
 
332
  async def main():
 
 
 
333
  # Download the results from the Hugging Face Hub
334
  await asyncio.to_thread(download_latest_results)
335
 
 
37
  abs_path = Path(__file__).parent
38
 
39
 
40
+ # Global variable to store preprocessed data
41
+ preprocessed_traces = {}
42
+
43
+ def preprocess_traces():
44
+ global preprocessed_traces
45
  processed_dir = abs_path / "evals_live"
46
+ for file in processed_dir.glob('*.json'):
47
+ try:
48
  with open(file, 'r') as f:
49
  data = json.load(f)
50
+ agent_name = data['config']['agent_name']
51
+ benchmark_name = data['config']['benchmark_name']
52
+ if benchmark_name not in preprocessed_traces:
53
+ preprocessed_traces[benchmark_name] = {}
54
+
55
+ assert type(data['raw_logging_results']) == dict, f"Invalid format for raw_logging_results: {type(data['raw_logging_results'])}"
56
+ preprocessed_traces[benchmark_name][agent_name] = data['raw_logging_results']
57
+ except AssertionError as e:
58
+ preprocessed_traces[benchmark_name][agent_name] = None
59
+ except Exception as e:
60
+ print(f"Error preprocessing {file}: {e}")
61
+ preprocessed_traces[benchmark_name][agent_name] = None
62
+
63
+ def get_analyzed_traces(agent_name, benchmark_name):
64
+ return preprocessed_traces.get(benchmark_name, {}).get(agent_name)
65
 
66
  def update_agent_dropdown(benchmark_name, metric):
67
  df = parse_json_files(os.path.join(abs_path, "evals_live"), benchmark_name)
 
77
  if not agent_name:
78
  return "Please select an agent.", None, None, ""
79
 
80
+ analyzed_traces = get_analyzed_traces(agent_name, benchmark_name)
81
  if not analyzed_traces:
82
  return f"No analysis available for agent: {agent_name}", None, None, ""
83
 
 
89
  if not task_id:
90
  return "Please select a task.", None, ""
91
 
92
+ analyzed_traces = get_analyzed_traces(agent_name, benchmark_name)
93
  if not analyzed_traces or task_id not in analyzed_traces:
94
  return f"No analysis available for task: {task_id}", None, ""
95
 
 
285
  raw_call_details = gr.HTML()
286
 
287
  def update_raw_task_dropdown(agent_name):
288
+ analyzed_traces = get_analyzed_traces(agent_name, "usaco")
289
  if not analyzed_traces:
290
  return gr.Dropdown(choices=[], label="Select Task"), gr.Dropdown(choices=[], label="Select Step"), f"No raw predictions data available for agent: {agent_name}."
291
  task_ids = list(analyzed_traces.keys())
 
293
  return gr.Dropdown(choices=task_ids, label="Select Task", value=task_ids[0]), gr.Dropdown(choices=[(f"Step {i+1}", i) for i in range(len(steps))], label="Select Step", value=0), update_raw_call_details(agent_name, task_ids[0], 0)
294
 
295
  def update_raw_step_dropdown(agent_name, task_id):
296
+ analyzed_traces = get_analyzed_traces(agent_name, "usaco")
297
  if not analyzed_traces or task_id not in analyzed_traces:
298
  return gr.Dropdown(choices=[], label="Select Step", value="No data available.")
299
  steps = analyzed_traces[task_id]['steps']
300
  return gr.Dropdown(choices=[(f"Step {i+1}", i) for i in range(len(steps))], label="Select Step", value=0)
301
 
302
  def update_raw_call_details(agent_name, task_id, step_index):
303
+ analyzed_traces = get_analyzed_traces(agent_name, "usaco")
304
  if not analyzed_traces or task_id not in analyzed_traces:
305
  return "No data available for this selection."
306
  steps = analyzed_traces[task_id]['steps']
 
339
 
340
 
341
  async def main():
342
+ # Preprocess traces
343
+ preprocess_traces()
344
+
345
  # Download the results from the Hugging Face Hub
346
  await asyncio.to_thread(download_latest_results)
347