benediktstroebl commited on
Commit
ff06039
·
1 Parent(s): f400b47
Files changed (3) hide show
  1. app.py +1 -1
  2. utils/data.py +44 -44
  3. utils/processing.py +4 -3
app.py CHANGED
@@ -591,7 +591,7 @@ with gr.Blocks() as demo:
591
 
592
  async def main():
593
  # Preprocess traces
594
- preprocessor.preprocess_traces('evals_live')
595
 
596
  # # Download the results from the Hugging Face Hub
597
  await asyncio.to_thread(download_latest_results)
 
591
 
592
  async def main():
593
  # Preprocess traces
594
+ # preprocessor.preprocess_traces('evals_live')
595
 
596
  # # Download the results from the Hugging Face Hub
597
  await asyncio.to_thread(download_latest_results)
utils/data.py CHANGED
@@ -6,61 +6,61 @@ from utils.pareto import Agent, compute_pareto_frontier
6
  import plotly.graph_objects as go
7
  import textwrap
8
 
9
- def parse_json_files(folder_path, benchmark_name):
10
- # Convert folder path to Path object
11
- folder = Path(folder_path)
12
 
13
- # List to store data from each file
14
- data_list = []
15
 
16
- # Iterate through all JSON files in the folder
17
- for json_file in folder.glob('*.json'):
18
- try:
19
- with open(json_file, 'r') as file:
20
- data = json.load(file)
21
 
22
- # Extract config and results
23
- config = data['config']
24
- results = data['results']
25
 
26
- # Combine config and results into a single dictionary
27
- combined_data = {
28
- 'agent_name': config['agent_name'],
29
- 'benchmark_name': config['benchmark_name'],
30
- 'date': config['date']
31
- }
32
 
33
- # Add results with 'results_' prefix
34
- for key, value in results.items():
35
- combined_data[f'results_{key}'] = value
36
 
37
- data_list.append(combined_data)
38
- except Exception as e:
39
- print(f"Error processing {json_file}: {e}. Skipping!")
40
 
41
- # Create DataFrame from the list of dictionaries
42
- df = pd.DataFrame(data_list)
43
- df = df[df['benchmark_name'] == benchmark_name]
44
 
45
- # sort df by descending accuracy
46
- df = df.sort_values(by='results_accuracy', ascending=False)
47
 
48
- # round all float columns to 2 decimal places
49
- for column in df.select_dtypes(include='float').columns:
50
- df[column] = df[column].round(3)
51
 
52
- # Rename columns
53
- df = df.rename(columns={
54
- 'agent_name': 'Agent Name',
55
- 'results_total_cost': 'Total Cost',
56
- 'results_accuracy': 'Accuracy',
57
- 'results_precision': 'Precision',
58
- 'results_recall': 'Recall',
59
- 'results_f1_score': 'F1 Score',
60
- 'results_auc': 'AUC',
61
- })
62
 
63
- return df
64
 
65
 
66
  def create_scatter_plot(df, x: str, y: str, x_label: str = None, y_label: str = None, hover_data: list = None):
 
6
  import plotly.graph_objects as go
7
  import textwrap
8
 
9
+ # def parse_json_files(folder_path, benchmark_name):
10
+ # # Convert folder path to Path object
11
+ # folder = Path(folder_path)
12
 
13
+ # # List to store data from each file
14
+ # data_list = []
15
 
16
+ # # Iterate through all JSON files in the folder
17
+ # for json_file in folder.glob('*.json'):
18
+ # try:
19
+ # with open(json_file, 'r') as file:
20
+ # data = json.load(file)
21
 
22
+ # # Extract config and results
23
+ # config = data['config']
24
+ # results = data['results']
25
 
26
+ # # Combine config and results into a single dictionary
27
+ # combined_data = {
28
+ # 'agent_name': config['agent_name'],
29
+ # 'benchmark_name': config['benchmark_name'],
30
+ # 'date': config['date']
31
+ # }
32
 
33
+ # # Add results with 'results_' prefix
34
+ # for key, value in results.items():
35
+ # combined_data[f'results_{key}'] = value
36
 
37
+ # data_list.append(combined_data)
38
+ # except Exception as e:
39
+ # print(f"Error processing {json_file}: {e}. Skipping!")
40
 
41
+ # # Create DataFrame from the list of dictionaries
42
+ # df = pd.DataFrame(data_list)
43
+ # df = df[df['benchmark_name'] == benchmark_name]
44
 
45
+ # # sort df by descending accuracy
46
+ # df = df.sort_values(by='results_accuracy', ascending=False)
47
 
48
+ # # round all float columns to 2 decimal places
49
+ # for column in df.select_dtypes(include='float').columns:
50
+ # df[column] = df[column].round(3)
51
 
52
+ # # Rename columns
53
+ # df = df.rename(columns={
54
+ # 'agent_name': 'Agent Name',
55
+ # 'results_total_cost': 'Total Cost',
56
+ # 'results_accuracy': 'Accuracy',
57
+ # 'results_precision': 'Precision',
58
+ # 'results_recall': 'Recall',
59
+ # 'results_f1_score': 'F1 Score',
60
+ # 'results_auc': 'AUC',
61
+ # })
62
 
63
+ # return df
64
 
65
 
66
  def create_scatter_plot(df, x: str, y: str, x_label: str = None, y_label: str = None, hover_data: list = None):
utils/processing.py CHANGED
@@ -123,10 +123,11 @@ async def process_upload(input_path, output_path):
123
  openai_client = AsyncOpenAIClient(model="gpt-4o-mini")
124
 
125
  try:
126
- processed_calls = await analyze_agent_steps(data['raw_logging_results'], openai_client, llm_eval=True)
127
- failure_report = await analyze_agent_performance(data['raw_logging_results'], data['results']['failed_tasks'], openai_client)
128
  data['raw_logging_results'] = processed_calls
129
- data['failure_report'] = failure_report
 
 
130
  except Exception as e:
131
  traceback.print_exc()
132
  print(f"Error in processing: {str(e)}")
 
123
  openai_client = AsyncOpenAIClient(model="gpt-4o-mini")
124
 
125
  try:
126
+ processed_calls = await analyze_agent_steps(data['raw_logging_results'], openai_client, llm_eval=False)
 
127
  data['raw_logging_results'] = processed_calls
128
+
129
+ # failure_report = await analyze_agent_performance(data['raw_logging_results'], data['results']['failed_tasks'], openai_client)
130
+ # data['failure_report'] = None
131
  except Exception as e:
132
  traceback.print_exc()
133
  print(f"Error in processing: {str(e)}")