Spaces:
Running
Running
Commit
·
ff06039
1
Parent(s):
f400b47
Update
Browse files- app.py +1 -1
- utils/data.py +44 -44
- utils/processing.py +4 -3
app.py
CHANGED
@@ -591,7 +591,7 @@ with gr.Blocks() as demo:
|
|
591 |
|
592 |
async def main():
|
593 |
# Preprocess traces
|
594 |
-
preprocessor.preprocess_traces('evals_live')
|
595 |
|
596 |
# # Download the results from the Hugging Face Hub
|
597 |
await asyncio.to_thread(download_latest_results)
|
|
|
591 |
|
592 |
async def main():
|
593 |
# Preprocess traces
|
594 |
+
# preprocessor.preprocess_traces('evals_live')
|
595 |
|
596 |
# # Download the results from the Hugging Face Hub
|
597 |
await asyncio.to_thread(download_latest_results)
|
utils/data.py
CHANGED
@@ -6,61 +6,61 @@ from utils.pareto import Agent, compute_pareto_frontier
|
|
6 |
import plotly.graph_objects as go
|
7 |
import textwrap
|
8 |
|
9 |
-
def parse_json_files(folder_path, benchmark_name):
|
10 |
-
|
11 |
-
|
12 |
|
13 |
-
|
14 |
-
|
15 |
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
|
45 |
-
|
46 |
-
|
47 |
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
|
63 |
-
|
64 |
|
65 |
|
66 |
def create_scatter_plot(df, x: str, y: str, x_label: str = None, y_label: str = None, hover_data: list = None):
|
|
|
6 |
import plotly.graph_objects as go
|
7 |
import textwrap
|
8 |
|
9 |
+
# def parse_json_files(folder_path, benchmark_name):
|
10 |
+
# # Convert folder path to Path object
|
11 |
+
# folder = Path(folder_path)
|
12 |
|
13 |
+
# # List to store data from each file
|
14 |
+
# data_list = []
|
15 |
|
16 |
+
# # Iterate through all JSON files in the folder
|
17 |
+
# for json_file in folder.glob('*.json'):
|
18 |
+
# try:
|
19 |
+
# with open(json_file, 'r') as file:
|
20 |
+
# data = json.load(file)
|
21 |
|
22 |
+
# # Extract config and results
|
23 |
+
# config = data['config']
|
24 |
+
# results = data['results']
|
25 |
|
26 |
+
# # Combine config and results into a single dictionary
|
27 |
+
# combined_data = {
|
28 |
+
# 'agent_name': config['agent_name'],
|
29 |
+
# 'benchmark_name': config['benchmark_name'],
|
30 |
+
# 'date': config['date']
|
31 |
+
# }
|
32 |
|
33 |
+
# # Add results with 'results_' prefix
|
34 |
+
# for key, value in results.items():
|
35 |
+
# combined_data[f'results_{key}'] = value
|
36 |
|
37 |
+
# data_list.append(combined_data)
|
38 |
+
# except Exception as e:
|
39 |
+
# print(f"Error processing {json_file}: {e}. Skipping!")
|
40 |
|
41 |
+
# # Create DataFrame from the list of dictionaries
|
42 |
+
# df = pd.DataFrame(data_list)
|
43 |
+
# df = df[df['benchmark_name'] == benchmark_name]
|
44 |
|
45 |
+
# # sort df by descending accuracy
|
46 |
+
# df = df.sort_values(by='results_accuracy', ascending=False)
|
47 |
|
48 |
+
# # round all float columns to 2 decimal places
|
49 |
+
# for column in df.select_dtypes(include='float').columns:
|
50 |
+
# df[column] = df[column].round(3)
|
51 |
|
52 |
+
# # Rename columns
|
53 |
+
# df = df.rename(columns={
|
54 |
+
# 'agent_name': 'Agent Name',
|
55 |
+
# 'results_total_cost': 'Total Cost',
|
56 |
+
# 'results_accuracy': 'Accuracy',
|
57 |
+
# 'results_precision': 'Precision',
|
58 |
+
# 'results_recall': 'Recall',
|
59 |
+
# 'results_f1_score': 'F1 Score',
|
60 |
+
# 'results_auc': 'AUC',
|
61 |
+
# })
|
62 |
|
63 |
+
# return df
|
64 |
|
65 |
|
66 |
def create_scatter_plot(df, x: str, y: str, x_label: str = None, y_label: str = None, hover_data: list = None):
|
utils/processing.py
CHANGED
@@ -123,10 +123,11 @@ async def process_upload(input_path, output_path):
|
|
123 |
openai_client = AsyncOpenAIClient(model="gpt-4o-mini")
|
124 |
|
125 |
try:
|
126 |
-
processed_calls = await analyze_agent_steps(data['raw_logging_results'], openai_client, llm_eval=
|
127 |
-
failure_report = await analyze_agent_performance(data['raw_logging_results'], data['results']['failed_tasks'], openai_client)
|
128 |
data['raw_logging_results'] = processed_calls
|
129 |
-
|
|
|
|
|
130 |
except Exception as e:
|
131 |
traceback.print_exc()
|
132 |
print(f"Error in processing: {str(e)}")
|
|
|
123 |
openai_client = AsyncOpenAIClient(model="gpt-4o-mini")
|
124 |
|
125 |
try:
|
126 |
+
processed_calls = await analyze_agent_steps(data['raw_logging_results'], openai_client, llm_eval=False)
|
|
|
127 |
data['raw_logging_results'] = processed_calls
|
128 |
+
|
129 |
+
# failure_report = await analyze_agent_performance(data['raw_logging_results'], data['results']['failed_tasks'], openai_client)
|
130 |
+
# data['failure_report'] = None
|
131 |
except Exception as e:
|
132 |
traceback.print_exc()
|
133 |
print(f"Error in processing: {str(e)}")
|