Spaces:
Sleeping
Sleeping
import os | |
import json | |
import pandas as pd | |
from models_info import model_info | |
directory = 'data/raw-eval-outputs' | |
data = [] | |
def model_hyperlink(link, model_name): | |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>' | |
def make_clickable_names(df): | |
df["Model"] = df.apply( | |
lambda row: model_hyperlink(row["Link"], row["Model"]), axis=1 | |
) | |
return df | |
# Iterate over all the files in the directory | |
for filename in os.listdir(directory): | |
if filename.endswith(".json"): | |
filepath = os.path.join(directory, filename) | |
with open(filepath, 'r') as f: | |
json_data = json.load(f) | |
model_name = filename.replace("_results.json", "") | |
# Extract the accuracy values | |
results = json_data['results'] | |
row = {'Model': model_name} | |
for key, value in results.items(): | |
row[key] = round(value['acc,none'] * 100, 2) | |
# Add the tuning type and link to the row | |
row['T'] = model_info[model_name]['tuning'] | |
row['Link'] = model_info[model_name]['link'] | |
data.append(row) | |
df = pd.DataFrame(data) | |
df = make_clickable_names(df) | |
df.drop(columns=["Link"], inplace=True) | |
df['medmcqa_diff'] = (df['medmcqa_g2b'] - df['medmcqa_orig_filtered']).round(2) | |
df['medqa_diff'] = (df['medqa_4options_g2b'] - df['medqa_4options_orig_filtered']).round(2) | |
# Reorder columns | |
cols = [ | |
"T", | |
"Model", | |
"b4bqa", | |
"b4b", | |
"medmcqa_g2b", | |
"medmcqa_orig_filtered", | |
"medmcqa_diff", | |
"medqa_4options_g2b", | |
"medqa_4options_orig_filtered", | |
"medqa_diff" | |
] + [col for col in df.columns if col not in [ | |
"T", "Model", "b4bqa", "b4b", "medmcqa_g2b", "medmcqa_orig_filtered", "medmcqa_diff", "medqa_4options_g2b", "medqa_4options_orig_filtered", "medqa_diff" | |
]] | |
df = df[cols] | |
output_csv = 'data/csv/models_data.csv' | |
df.to_csv(output_csv, index=False) | |
print(f"DataFrame saved to {output_csv}") | |