Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Use preprocessed table dataset (WIP)
Browse files- src/populate.py +18 -19
src/populate.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import json
|
2 |
import os
|
3 |
-
from decimal import Decimal
|
4 |
|
5 |
import datasets
|
6 |
import pandas as pd
|
@@ -9,9 +8,21 @@ from src.about import Tasks
|
|
9 |
from src.display.formatting import has_no_nan_values, make_clickable_model
|
10 |
from src.display.utils import AutoEvalColumn, EvalQueueColumn
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
def get_leaderboard_df(contents_repo: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
|
14 |
-
"""Creates a dataframe from all the individual experiment results"""
|
15 |
df = datasets.load_dataset(contents_repo, split="train").to_pandas()
|
16 |
df["Model"] = df["model"].map(make_clickable_model)
|
17 |
df["T"] = df["model_type"].map(lambda x: x.split(":")[0].strip())
|
@@ -33,28 +44,16 @@ def get_leaderboard_df(contents_repo: str, cols: list, benchmark_cols: list) ->
|
|
33 |
"model": "model_name_for_query",
|
34 |
}
|
35 |
)
|
36 |
-
df[[task.value.col_name for task in Tasks]] = df[[task.value.col_name for task in Tasks]].map(lambda x: Decimal(x))
|
37 |
|
38 |
# Add a row ID column
|
39 |
df[AutoEvalColumn.row_id.name] = range(len(df))
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
"WikiCorpus J to E BLEU",
|
46 |
-
"XL-Sum JA BLEU",
|
47 |
-
"XL-Sum ROUGE1",
|
48 |
-
"XL-Sum ROUGE2",
|
49 |
-
"XL-Sum ROUGE-Lsum",
|
50 |
-
]
|
51 |
-
|
52 |
-
existing_score_cols = [col for col in score_cols if col in df.columns]
|
53 |
-
|
54 |
-
# スコア列を100で割り、.4f形式でフォーマット
|
55 |
-
df[existing_score_cols] = (df[existing_score_cols] / 100).map(lambda x: f"{x:.4f}")
|
56 |
df = df.sort_values(by=[AutoEvalColumn.AVG.name], ascending=False)
|
57 |
-
df = df[cols].round(decimals=
|
58 |
|
59 |
# filter out if any of the benchmarks have not been produced
|
60 |
df = df[has_no_nan_values(df, benchmark_cols)]
|
|
|
1 |
import json
|
2 |
import os
|
|
|
3 |
|
4 |
import datasets
|
5 |
import pandas as pd
|
|
|
8 |
from src.display.formatting import has_no_nan_values, make_clickable_model
|
9 |
from src.display.utils import AutoEvalColumn, EvalQueueColumn
|
10 |
|
11 |
+
# The values of these columns are in the range of 0-100
|
12 |
+
# We normalize them to 0-1
|
13 |
+
COLUMNS_TO_NORMALIZE = [
|
14 |
+
"ALT E to J BLEU",
|
15 |
+
"ALT J to E BLEU",
|
16 |
+
"WikiCorpus E to J BLEU",
|
17 |
+
"WikiCorpus J to E BLEU",
|
18 |
+
"XL-Sum JA BLEU",
|
19 |
+
"XL-Sum ROUGE1",
|
20 |
+
"XL-Sum ROUGE2",
|
21 |
+
"XL-Sum ROUGE-Lsum",
|
22 |
+
]
|
23 |
+
|
24 |
|
25 |
def get_leaderboard_df(contents_repo: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
|
|
|
26 |
df = datasets.load_dataset(contents_repo, split="train").to_pandas()
|
27 |
df["Model"] = df["model"].map(make_clickable_model)
|
28 |
df["T"] = df["model_type"].map(lambda x: x.split(":")[0].strip())
|
|
|
44 |
"model": "model_name_for_query",
|
45 |
}
|
46 |
)
|
|
|
47 |
|
48 |
# Add a row ID column
|
49 |
df[AutoEvalColumn.row_id.name] = range(len(df))
|
50 |
|
51 |
+
# Normalize the columns
|
52 |
+
available_columns_to_normalize = [col for col in COLUMNS_TO_NORMALIZE if col in df.columns]
|
53 |
+
df[available_columns_to_normalize] = df[available_columns_to_normalize] / 100
|
54 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
df = df.sort_values(by=[AutoEvalColumn.AVG.name], ascending=False)
|
56 |
+
df = df[cols].round(decimals=4)
|
57 |
|
58 |
# filter out if any of the benchmarks have not been produced
|
59 |
df = df[has_no_nan_values(df, benchmark_cols)]
|