File size: 19,927 Bytes
d04912b
 
ff4f460
d04912b
 
148c1e7
ff4f460
d04912b
 
c933ce0
c6356a2
c933ce0
 
aac20b5
 
 
 
148c1e7
aac20b5
 
ff4f460
148c1e7
aac20b5
148c1e7
 
aac20b5
148c1e7
 
 
 
aac20b5
 
148c1e7
 
aac20b5
148c1e7
ff4f460
aac20b5
148c1e7
d04912b
 
148c1e7
 
 
 
 
 
 
ff4f460
 
 
 
 
 
 
 
 
 
4790bc5
ff4f460
4790bc5
 
d04912b
ff4f460
 
 
 
 
 
 
 
 
 
 
 
 
c933ce0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148c1e7
 
 
 
 
 
 
 
4790bc5
 
148c1e7
 
 
 
ff4f460
 
148c1e7
ff4f460
 
 
 
 
148c1e7
ff4f460
148c1e7
4790bc5
ff4f460
4790bc5
 
c933ce0
 
ff4f460
 
 
 
148c1e7
ff4f460
 
 
148c1e7
 
 
ff4f460
 
 
 
 
 
 
148c1e7
ff4f460
 
 
c933ce0
ff4f460
 
 
 
 
 
c6356a2
ff4f460
 
 
 
 
c6356a2
 
 
 
 
 
ff4f460
 
 
 
148c1e7
ff4f460
 
 
5a252e0
ff4f460
 
c933ce0
c6356a2
ff4f460
 
 
 
 
 
aac20b5
 
 
e9371b8
c6356a2
 
ff4f460
c6356a2
ff4f460
c6356a2
 
ff4f460
c6356a2
ff4f460
c6356a2
 
 
 
ff4f460
c6356a2
ff4f460
 
c6356a2
ff4f460
c6356a2
 
ff4f460
c6356a2
ff4f460
c6356a2
 
ff4f460
c6356a2
ff4f460
 
 
 
 
 
 
aac20b5
 
 
 
 
c6356a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ff4f460
148c1e7
 
 
 
cd4766a
c6356a2
 
148c1e7
 
cd4766a
c6356a2
 
 
 
148c1e7
ff4f460
 
 
 
 
 
 
 
 
 
 
 
 
d04912b
 
 
148c1e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ff4f460
 
 
148c1e7
ff4f460
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e9371b8
 
ff4f460
 
 
 
 
148c1e7
 
 
ff4f460
c6356a2
 
 
 
 
 
 
 
 
 
ff4f460
148c1e7
 
 
ff4f460
 
 
 
148c1e7
ff4f460
 
 
148c1e7
ff4f460
 
 
c933ce0
 
 
 
 
 
 
ff4f460
c933ce0
 
ff4f460
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148c1e7
 
 
 
c6356a2
 
 
 
 
 
 
 
 
 
148c1e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ff4f460
 
148c1e7
 
 
 
 
ff4f460
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
import json
import os
from dataclasses import dataclass, field

import pandas as pd
from huggingface_hub import model_info, ModelInfo
from transformers import AutoConfig


UNKNOWN_MODEL_SHOW_SIZE = 150
PERCENT_ROUND_DIGITS = 1


MODEL_CONFIG_CACHE = {}
MODEL_CARD_CACHE = {}


def get_hf_model_info_card_or_none(model_name: str) -> ModelInfo | None:
    if model_name in MODEL_CARD_CACHE:
        return MODEL_CARD_CACHE[model_name]
    try:
        info = model_info(repo_id=model_name)
        MODEL_CARD_CACHE[model_name] = info
        return info
    except Exception:
        MODEL_CARD_CACHE[model_name] = None
        return None


def get_hf_hub_config_or_none(model_name: str) -> AutoConfig | None:
    if model_name in MODEL_CONFIG_CACHE:
        return MODEL_CONFIG_CACHE[model_name]
    try:
        config = AutoConfig.from_pretrained(model_name, revision="main", trust_remote_code=True)
        MODEL_CONFIG_CACHE[model_name] = config
        return config
    except Exception:
        MODEL_CONFIG_CACHE[model_name] = None
        return None


def get_hf_model_info_card_or_none(model_name: str) -> ModelInfo | None:
    try:
        info = model_info(repo_id=model_name)
        return info
    except Exception:
        return None

def model_size_to_symbol(model_size_in_b_params: int | None) -> str:
    """Converts model size to a symbol"""
    if model_size_in_b_params is None or model_size_in_b_params == 0 or not model_size_in_b_params:
        return "❓"
    if model_size_in_b_params < 5:
        return "🛴"
    elif model_size_in_b_params < 50:
        return "🚗"
    elif model_size_in_b_params < 100:
        return "🚚"
    elif model_size_in_b_params < 1000:
        return "🚀"
    else:
        return "❓"


def model_type_to_symbol(model_type: str) -> str:
    """Converts model type to a symbol"""
    if model_type == "Open-Weights":
        return "💙"
    elif model_type == "Proprietary":
        return "🟥"
    else:
        return "❓"


def get_hf_data_by_model_name(model_name: str) -> dict:
    """Get model data from Hugging Face API by model name"""
    model_family = "Unknown"
    if 'mistral' in model_name.lower() or 'numina' in model_name.lower():
        model_family = "Mistral"
    elif 'meta-llama' in model_name.lower():
        model_family = "LLaMA"
    elif 'claude' in model_name.lower():
        model_family = "Claude"
    elif 'qwen' in model_name.lower() or 'athene' in model_name.lower() or 'qwq' in model_name.lower() or 'qvq' in model_name.lower():
        model_family = "Qwen"
    elif 'gpt' in model_name.lower() or 'o1' in model_name.lower():
        model_family = "GPT"
    elif 'gemini' in model_name.lower():
        model_family = "Gemini"
    elif 'deepseek' in model_name.lower():
        model_family = "DeepSeek"

    print(model_name, model_family)
    model_config = get_hf_hub_config_or_none(model_name)
    model_info_card = get_hf_model_info_card_or_none(model_name)
    print('model_config', type(model_config))
    print('model_info_card', type(model_info_card))

    # If model name is a path, try to get the model name from the hub
    if '/' in model_name:
        print(f"Model {model_name} is not on the hub, try unsloth/...")
        model_name = "unsloth/" + model_name.split("/")[-1]
        if not model_config:
            model_config = get_hf_hub_config_or_none(model_name)
        if not model_info_card:
            model_info_card = get_hf_model_info_card_or_none(model_name)

    architecture = "Unknown"
    if model_config:
        architectures = getattr(model_config, "architectures", None)
        if architectures:
            architecture = ";".join(architectures)

    num_params = None
    if model_info_card:
        try:
            num_params = round(model_info_card.safetensors["total"] / 1e9, 1)
        except Exception as e:
            print("SafeTensors not found in", model_name, e)
            if 'Pixtral-12B' in model_name:
                num_params = 12
            elif 'Pixtral-Large-Instruct-2411' in model_name:
                num_params = 123.3
            pass
        print("num_params", model_name, num_params)

    model_url = None
    if model_config or model_info_card:
        model_url = f"https://huggingface.co/{model_name}"

    model_license = "Unknown"
    if model_info_card:
        model_license = model_info_card.card_data["license_name"]
        model_license_link = model_info_card.card_data["license_link"]
        if model_license_link:
            model_license = f"[{model_license}]({model_license_link})"
        if not model_license:
            model_license = "Unknown"

    return {
        "model_architecture": architecture,
        "model_type": "Open-Weights" if model_info_card else "Proprietary",
        "model_size": num_params if num_params else None,
        "model_url": model_url,
        "model_license": model_license,
        "model_family": model_family,
    }


@dataclass
class Field:
    pretty_name: str
    column_type: str  # rate (auto-convert to percent number), number, str, markdown
    displayed_by_default: bool = True
    never_hidden: bool = False
    fully_hidden: bool = False
    tags: list[str] = field(default_factory=list)

    @property
    def gradio_column_type(self) -> str:
        if self.column_type == "rate":
            return "number"
        return self.column_type


MODEL_COLUMNS_DICT = {
    "model_type_symbol": Field("T", "str", never_hidden=True),
    "model_size_symbol": Field("S", "str", never_hidden=True),
    "full_model_name": Field("Full Model Name", "markdown", fully_hidden=True, displayed_by_default=False),
    "model_name": Field("Model Name", "markdown", never_hidden=True),
    "model_type": Field("Type", "str", displayed_by_default=False),
    "model_size": Field("#Params (B)", "number", displayed_by_default=False),
    "model_size_including_unknown": Field("#Params inc. Proprietary (B)", "number", fully_hidden=True, displayed_by_default=False),
    "model_architecture": Field("Architecture", "str", displayed_by_default=False),
    "model_license": Field("License", "markdown", displayed_by_default=False),
    "model_family": Field("Family", "str", displayed_by_default=False),
    "model_url": Field("Model URL", "str", fully_hidden=True, displayed_by_default=False),
}

U_MATH_COLUMNS_DICT = {
    "rank": Field("Rank", "number", never_hidden=True),
    **MODEL_COLUMNS_DICT,
    "judge_model_name": Field("Judge Model Name", "markdown", displayed_by_default=False),
    "u_math_acc": Field("U-MATH Acc", "rate", never_hidden=True, tags=["default"]),
    "u_math_text_acc": Field("U-MATH Text Acc", "rate", tags=["default", "text"]),
    "u_math_visual_acc": Field("U-MATH Visual Acc", "rate", tags=["default", "visual"]),
    "u_math_text_hard_acc": Field("U-MATH TextHard Acc", "rate", tags=["default", "text"]),
    "differential_calc_acc": Field("Diff Calc Acc", "rate", displayed_by_default=False, tags=["subjects"]),
    "differential_calc_text_acc": Field("Diff Calc Text Acc", "rate", displayed_by_default=False, tags=["text"]),
    "differential_calc_visual_acc": Field(
        "Diff Calc Visual Acc", "rate", displayed_by_default=False, tags=["visual"]
    ),
    "integral_calc_acc": Field("Integral Calc Acc", "rate", displayed_by_default=False, tags=["subjects"]),
    "integral_calc_text_acc": Field("Integral Calc Text Acc", "rate", displayed_by_default=False, tags=["text"]),
    "integral_calc_visual_acc": Field(
        "Integral Calc Visual Acc", "rate", displayed_by_default=False, tags=["visual"]
    ),
    "algebra_acc": Field("Algebra Acc", "rate", displayed_by_default=False, tags=["subjects"]),
    "algebra_text_acc": Field("Algebra Text Acc", "rate", displayed_by_default=False, tags=["text"]),
    "algebra_visual_acc": Field("Algebra Visual Acc", "rate", displayed_by_default=False, tags=["visual"]),
    "multivariable_calculus_acc": Field("Multivar Calc Acc", "rate", displayed_by_default=False, tags=["subjects"]),
    "multivariable_calculus_text_acc": Field(
        "Multivar Calc Text Acc", "rate", displayed_by_default=False, tags=["text"]
    ),
    "multivariable_calculus_visual_acc": Field(
        "Multivar Calc Visual Acc", "rate", displayed_by_default=False, tags=["visual"]
    ),
    "precalculus_review_acc": Field("Precalc Acc", "rate", displayed_by_default=False, tags=["subjects"]),
    "precalculus_review_text_acc": Field("Precalc Text Acc", "rate", displayed_by_default=False, tags=["text"]),
    "precalculus_review_visual_acc": Field(
        "Precalc Visual Acc", "rate", displayed_by_default=False, tags=["visual"]
    ),
    "sequences_series_acc": Field("Seq & Series Acc", "rate", displayed_by_default=False, tags=["subjects"]),
    "sequences_series_text_acc": Field("Seq & Series Text Acc", "rate", displayed_by_default=False, tags=["text"]),
    "sequences_series_visual_acc": Field(
        "Seq & Series Visual Acc", "rate", displayed_by_default=False, tags=["visual"]
    ),
}

MU_MATH_COLUMNS_DICT = {
    "rank": Field("Rank", "number", never_hidden=True),
    **MODEL_COLUMNS_DICT,
    "extract_model_name": Field("Extract Model Name", "markdown", displayed_by_default=False),
    "mu_math_f1": Field("μ-MATH F1", "rate", never_hidden=True, tags=["default", "splits"]),
    "mu_math_tpr": Field("μ-MATH TPR", "rate", displayed_by_default=False, tags=["default"]),
    "mu_math_tnr": Field("μ-MATH TNR", "rate", displayed_by_default=False, tags=["default"]),
    "mu_math_ppv": Field("μ-MATH PPV", "rate", displayed_by_default=False, tags=["default"]),
    "mu_math_npv": Field("μ-MATH NPV", "rate", displayed_by_default=False, tags=["default"]),
    "GPT-4o_f1": Field("GPT-4o Subset F1", "rate", tags=["splits"]),
    "GPT-4o_tpr": Field("GPT-4o Subset TPR", "rate", displayed_by_default=False),
    "GPT-4o_tnr": Field("GPT-4o Subset TNR", "rate", displayed_by_default=False),
    "GPT-4o_ppv": Field("GPT-4o Subset PPV", "rate", displayed_by_default=False),
    "GPT-4o_npv": Field("GPT-4o Subset NPV", "rate", displayed_by_default=False),
    "Gemini-1.5-Pro_f1": Field("Gemini-1.5-Pro Subset F1", "rate", tags=["splits"]),
    "Gemini-1.5-Pro_tpr": Field("Gemini-1.5-Pro Subset TPR", "rate", displayed_by_default=False),
    "Gemini-1.5-Pro_tnr": Field("Gemini-1.5-Pro Subset TNR", "rate", displayed_by_default=False),
    "Gemini-1.5-Pro_ppv": Field("Gemini-1.5-Pro Subset PPV", "rate", displayed_by_default=False),
    "Gemini-1.5-Pro_npv": Field("Gemini-1.5-Pro Subset NPV", "rate", displayed_by_default=False),
    "Llama-3.1-70B-Instruct_f1": Field("Llama-3.1-70B Subset F1", "rate", tags=["splits"]),
    "Llama-3.1-70B-Instruct_tpr": Field("Llama-3.1-70B Subset TPR", "rate", displayed_by_default=False),
    "Llama-3.1-70B-Instruct_tnr": Field("Llama-3.1-70B Subset TNR", "rate", displayed_by_default=False),
    "Llama-3.1-70B-Instruct_ppv": Field("Llama-3.1-70B Subset PPV", "rate", displayed_by_default=False),
    "Llama-3.1-70B-Instruct_npv": Field("Llama-3.1-70B Subset NPV", "rate", displayed_by_default=False),
    "Qwen2.5-72B-Instruct_f1": Field("Qwen2.5-72B Subset F1", "rate", tags=["splits"]),
    "Qwen2.5-72B-Instruct_tpr": Field("Qwen2.5-72B Subset TPR", "rate", displayed_by_default=False),
    "Qwen2.5-72B-Instruct_tnr": Field("Qwen2.5-72B Subset TNR", "rate", displayed_by_default=False),
    "Qwen2.5-72B-Instruct_ppv": Field("Qwen2.5-72B Subset PPV", "rate", displayed_by_default=False),
    "Qwen2.5-72B-Instruct_npv": Field("Qwen2.5-72B Subset NPV", "rate", displayed_by_default=False),
}
U_MATH_AND_MU_MATH_COLUMNS_DICT = {
    "u_math_rank": Field("U-MATH Rank", "number", never_hidden=True),
    "mu_math_rank": Field("μ-MATH Rank", "number", never_hidden=True),
    **MODEL_COLUMNS_DICT,
    "u_math_acc": Field("U-MATH Acc", "rate", tags=["default", "u_math", "mu_math"]),
    "u_math_text_acc": Field("U-MATH Text Acc", "rate", displayed_by_default=False, tags=["u_math"]),
    "u_math_visual_acc": Field("U-MATH Visual Acc", "rate", displayed_by_default=False, tags=["u_math"]),
    "judge_model_name": Field("Judge Model Name", "markdown", displayed_by_default=False),
    "extract_model_name": Field("Extract Model Name", "markdown", displayed_by_default=False),
    "mu_math_f1": Field("μ-MATH F1", "rate", tags=["default", "u_math", "mu_math"]),
    "mu_math_tpr": Field("μ-MATH TPR", "rate", displayed_by_default=False, tags=["mu_math"]),
    "mu_math_tnr": Field("μ-MATH TNR", "rate", displayed_by_default=False, tags=["mu_math"]),
    "mu_math_ppv": Field("μ-MATH PPV", "rate", displayed_by_default=False, tags=["mu_math"]),
    "mu_math_npv": Field("μ-MATH NPV", "rate", displayed_by_default=False, tags=["mu_math"]),
}


def load_json_data(json_path: str, main_col: str | None = None) -> pd.DataFrame:
    """Loads json data from a file"""
    with open(json_path, "r") as f:
        data = json.load(f)
    df = pd.DataFrame.from_records(data)
    if main_col:
        df = df.sort_values(by=[main_col], ascending=False)

    for col in df.columns:
        if df.dtypes[col] == "float64":
            df[col] = df[col].round(decimals=2)
    return df


def get_model_meta_info_df(model_full_names: list[str]) -> pd.DataFrame:
    """Given a list of model names, returns a dataframe with model meta info"""
    model_to_meta_dict = {
        model_name: get_hf_data_by_model_name(model_name) for model_name in model_full_names
    }

    df = pd.DataFrame.from_dict(model_to_meta_dict, orient="index")
    df["model_type_symbol"] = df["model_type"].apply(model_type_to_symbol)
    df["model_size_symbol"] = df["model_size"].apply(model_size_to_symbol)
    df["model_size_including_unknown"] = df["model_size"].apply(lambda x: x if x and pd.notna(x) else UNKNOWN_MODEL_SHOW_SIZE).astype(float)
    df["full_model_name"] = df.index
    df = df.reset_index(drop=True)
    df["model_name"] = df["full_model_name"].apply(
        lambda x: f"[{x}]({url})" if (url := model_to_meta_dict[x]["model_url"]) else x
    )

    return df


def get_u_math_leaderboard_df(use_pretty_names: bool = True, add_meta: bool = True) -> pd.DataFrame:
    """Creates a dataframe from json with U-MATH eval results"""
    json_path = os.path.join("data", "u_math_eval_results.json")
    df = load_json_data(json_path)
    df = df.rename(columns={"model_name": "full_model_name"})

    # flatten list [x, y, z] in columns as ["_acc", "_text_acc", "_visual_acc"] suffixes for columns
    for col in [
        "u_math",
        "differential_calc",
        "integral_calc",
        "algebra",
        "multivariable_calculus",
        "precalculus_review",
        "sequences_series",
    ]:
        df[col + "_acc"] = df[col].apply(lambda x: x[0])
        df[col + "_text_acc"] = df[col].apply(lambda x: x[1])
        df[col + "_visual_acc"] = df[col].apply(lambda x: x[2])
        del df[col]

    df["u_math_text_hard_acc"] = (df["differential_calc_text_acc"] + df["integral_calc_text_acc"] + df["multivariable_calculus_text_acc"] + df["sequences_series_text_acc"]) / 4

    # Sort and add rank
    df = df.sort_values(by=["u_math_acc"], ascending=False)
    df["rank"] = range(1, len(df) + 1)

    # populate with model info
    if add_meta:
        df_meta = get_model_meta_info_df(df["full_model_name"].unique())
        df = pd.merge(df, df_meta, on=["full_model_name"], how="left")

    # resolve rate columns to percent
    for col in df.columns:
        if U_MATH_COLUMNS_DICT[col].column_type == "rate":
            if all(df[col] <= 1):
                df[col] = (df[col] * 100).round(PERCENT_ROUND_DIGITS)
            elif any(df[col] > 1) and all(df[col] <= 100):
                df[col] = df[col].round(PERCENT_ROUND_DIGITS)
            else:
                raise ValueError(f"Column {col} has values {df[col]} that are not in [0, 1] or [0, 100]")

    # convert to pretty names and sort columns by order in dict
    if use_pretty_names:
        df = df[U_MATH_COLUMNS_DICT.keys()]
        df = df.rename(columns={key: col.pretty_name for key, col in U_MATH_COLUMNS_DICT.items() if key in df.columns})

    return df


def get_mu_math_leaderboard_df(use_pretty_names: bool = True, add_meta: bool = True) -> pd.DataFrame:
    """Creates a dataframe from json with mu-MATH eval results"""
    json_path = os.path.join("data", "mu_math_eval_results.json")
    df = load_json_data(json_path)
    df = df.rename(columns={"model_name": "full_model_name"})

    # Calculate columns with prefixes f1, tpr, tnr, ppv, npv
    for col in ["mu_math", "GPT-4o", "Gemini-1.5-Pro", "Llama-3.1-70B-Instruct", "Qwen2.5-72B-Instruct"]:
        df[col + "_acc"] = df[col].apply(lambda x: x[0])
        df[col + "_f1"] = df[col].apply(lambda x: x[1])
        df[col + "_mcc"] = df[col].apply(lambda x: x[2])
        df[col + "_tpr"] = df[col].apply(lambda x: x[3])
        df[col + "_tnr"] = df[col].apply(lambda x: x[4])
        df[col + "_ppv"] = df[col].apply(lambda x: x[5])
        df[col + "_npv"] = df[col].apply(lambda x: x[6])
        del df[col]
        del df[col + "_acc"]
        del df[col + "_mcc"]

    # # flatten list [x, y, z] in columns as ["_f1", "_precision", "_recall"] suffixes for columns
    # for col in ["mu_math", "GPT-4o", "Gemini-1.5-Pro", "Llama-3.1-70B-Instruct", "Qwen2.5-72B-Instruct"]:
    #     df[col + "_f1"] = df[col].apply(lambda x: x[0])
    #     df[col + "_precision"] = df[col].apply(lambda x: x[1])
    #     df[col + "_recall"] = df[col].apply(lambda x: x[2])
    #     del df[col]

    NUM_MU_MATH_SAMPLES = 1084

    # Sort and add rank
    df = df.sort_values(by=["mu_math_f1"], ascending=False)
    df["rank"] = range(1, len(df) + 1)

    # populate with model info
    if add_meta:
        df_meta = get_model_meta_info_df(df["full_model_name"].unique())
        df = pd.merge(df, df_meta, on=["full_model_name"], how="left")

    # resolve rate columns to percent
    for col in df.columns:
        if MU_MATH_COLUMNS_DICT[col].column_type == "rate":
            if all(df[col] <= 1):
                df[col] = (df[col] * 100).round(2)
            elif any(df[col] > 1) and all(df[col] <= 100):
                df[col] = df[col].round(2)
            else:
                raise ValueError(f"Column {col} has values {df[col]} that are not in [0, 1] or [0, 100]")

    # convert to pretty names and sort columns by order in dict
    if use_pretty_names:
        df = df[MU_MATH_COLUMNS_DICT.keys()]
        df = df.rename(columns={key: col.pretty_name for key, col in MU_MATH_COLUMNS_DICT.items() if key in df.columns})

    return df


def get_joined_leaderboard_df(use_pretty_names: bool = True, add_meta: bool = True) -> pd.DataFrame:
    """Creates a dataframe from json with U-MATH and mu-MATH eval results"""
    u_math_df = get_u_math_leaderboard_df(use_pretty_names=False, add_meta=False)
    u_math_df = u_math_df.rename(columns={"rank": "u_math_rank"})
    mu_math_df = get_mu_math_leaderboard_df(use_pretty_names=False, add_meta=False)
    mu_math_df = mu_math_df.rename(columns={"rank": "mu_math_rank"})
    assert set(u_math_df.columns).intersection(set(mu_math_df.columns)) == {"full_model_name"}, f"Columns overlap in {u_math_df.columns} and {mu_math_df.columns}"

    # merge U-MATH and mu-MATH dataframes
    df = pd.merge(u_math_df, mu_math_df, on=["full_model_name"], how="inner", suffixes=("", ""))

    # sort by rank on u_math
    df = df.sort_values(by=["u_math_rank"], ascending=True)

    # add meta info
    if add_meta:
        df_meta = get_model_meta_info_df(df["full_model_name"].unique())
        df = pd.merge(df, df_meta, on=["full_model_name"], how="left")

    # convert to pretty names and sort columns by order in dict
    if use_pretty_names:
        df = df[U_MATH_AND_MU_MATH_COLUMNS_DICT.keys()]
        df = df.rename(
            columns={key: col.pretty_name for key, col in U_MATH_AND_MU_MATH_COLUMNS_DICT.items() if key in df.columns}
        )

    return df