|
import pandas as pd |
|
import json |
|
|
|
from .config import base_info, language_list, delimiter, avg_label, round_precision |
|
|
|
def load_tables(file_path: str) -> pd.DataFrame: |
|
""" |
|
Load and process the leaderboard data from a JSONL file. |
|
- Flattens nested JSON structures. |
|
- Computes total scores for each language. |
|
- Sorts models by their performance. |
|
""" |
|
data = [] |
|
with open(file_path, "r", encoding="utf-8") as f: |
|
for line in f: |
|
json_obj = json.loads(line) |
|
flattened = pd.json_normalize(json_obj, sep=delimiter) |
|
data.append(flattened) |
|
|
|
|
|
df = pd.concat(data, ignore_index=True) |
|
|
|
|
|
df = df.map(lambda x: round(x, round_precision) if isinstance(x, (int, float)) else x) |
|
|
|
base = pd.DataFrame() |
|
|
|
|
|
for info in base_info: |
|
base[info["display"]] = df[info["key"]] |
|
|
|
|
|
main_table = base.copy() |
|
|
|
detailed_tables = [] |
|
|
|
for lang in language_list: |
|
|
|
main_table[lang['display']] = df[f"{lang['key']}{delimiter}total"] |
|
|
|
|
|
cols = [col for col in df.columns if col.startswith(lang["key"])] |
|
total_col = None |
|
table = base.copy() |
|
|
|
for col in cols: |
|
display_col = col.split(delimiter)[:-1] |
|
|
|
|
|
if len(display_col) == 1: |
|
total_col = col |
|
|
|
|
|
display_col = col if len(display_col) < 2 else " - ".join(display_col[1:]) |
|
table[display_col] = df[col] |
|
|
|
|
|
if total_col: |
|
total_col_data = table.pop(total_col) |
|
table.insert(len(base.columns), "Total", total_col_data) |
|
table = table.sort_values(by="Total", ascending=False) |
|
|
|
detailed_tables.append(table) |
|
|
|
|
|
main_table[avg_label] = sum( |
|
[main_table[lang["display"]] if lang["main_table_avg"] else 0 for lang in language_list] |
|
) |
|
main_table[avg_label] = round( |
|
main_table[avg_label] / sum(lang["main_table_avg"] for lang in language_list), round_precision |
|
) |
|
|
|
|
|
last_col = main_table.pop(main_table.columns[-1]) |
|
main_table.insert(len(base.columns), last_col.name, last_col) |
|
|
|
|
|
main_table = main_table.sort_values(by=avg_label, ascending=False) |
|
|
|
|
|
return [{"name": "Overall", "table": main_table, "hidden_col": []}] + [ |
|
{"name": lang["tab"], "table": table, "hidden_col": lang["hidden_col"]} |
|
for lang, table in zip(language_list, detailed_tables) |
|
] |
|
|