Spaces:
Running
Running
from __future__ import annotations | |
from pathlib import Path | |
import gradio as gr | |
import pandas as pd | |
from apscheduler.schedulers.background import BackgroundScheduler | |
from constants import Constants, model_type_emoji | |
from gradio_leaderboard import ColumnFilter, Leaderboard, SelectColumns | |
TITLE = """<h1 align="center" id="space-title">TabArena: Public leaderboard for Tabular methods</h1>""" | |
INTRODUCTION_TEXT = ( | |
"TabArena Leaderboard measures the performance of tabular models on a collection of tabular " | |
"datasets manually curated. The datasets are collected to make sure they are tabular, with " | |
"permissive license without ethical issues and so on, we refer to the paper for a full " | |
"description of our approach." | |
) | |
ABOUT_TEXT = """ | |
## How It Works. | |
To evaluate the leaderboard, follow install instructions in | |
`https://github.com/autogluon/tabrepo/tree/tabarena` and run | |
`https://github.com/autogluon/tabrepo/blob/tabarena/examples/tabarena/run_tabarena_eval.py`. | |
This will generate a leaderboard. You can add your own method and contact the authors if you want it to be added | |
to the leaderboard. We require method to have public code available to be considered in the leaderboard. | |
""" | |
CITATION_BUTTON_LABEL = ( | |
"If you use this leaderboard in your research please cite the following:" | |
) | |
CITATION_BUTTON_TEXT = r""" | |
@article{ | |
TBA, | |
} | |
""" | |
def get_model_family(model_name: str) -> str: | |
prefixes_mapping = { | |
Constants.automl: ["AutoGluon"], | |
Constants.neural_network: ["REALMLP", "TabM", "FASTAI", "MNCA", "NN_TORCH"], | |
Constants.tree: ["GBM", "CAT", "EBM", "XGB", "XT", "RF"], | |
Constants.foundational: ["TABDPT", "TABICL", "TABPFN"], | |
Constants.baseline: ["KNN", "LR"], | |
} | |
for method_type, prefixes in prefixes_mapping.items(): | |
for prefix in prefixes: | |
if prefix.lower() in model_name.lower(): | |
return method_type | |
return Constants.other | |
def rename_map(model_name: str) -> str: | |
rename_map = { | |
"TABM": "TabM", | |
"REALMLP": "RealMLP", | |
"GBM": "LightGBM", | |
"CAT": "CatBoost", | |
"XGB": "XGBoost", | |
"XT": "ExtraTrees", | |
"RF": "RandomForest", | |
"MNCA": "ModernNCA", | |
"NN_TORCH": "TorchMLP", | |
"FASTAI": "FastaiMLP", | |
"TABPFN": "TabPFNv2", | |
"EBM": "EBM", | |
"TABDPT": "TabDPT", | |
"TABICL": "TabICL", | |
"KNN": "KNN", | |
"LR": "Linear", | |
} | |
for prefix in rename_map: | |
if prefix in model_name: | |
return model_name.replace(prefix, rename_map[prefix]) | |
return model_name | |
def load_data(filename: str): | |
df_leaderboard = pd.read_csv(Path(__file__).parent / "data" / f"{filename}.csv.zip") | |
print( | |
f"Loaded dataframe with {len(df_leaderboard)} rows and columns {df_leaderboard.columns}" | |
) | |
# sort by ELO | |
df_leaderboard = df_leaderboard.sort_values(by="elo", ascending=False) | |
# add model family information | |
df_leaderboard["Type"] = df_leaderboard.loc[:, "method"].apply( | |
lambda s: model_type_emoji[get_model_family(s)] | |
) | |
df_leaderboard["TypeName"] = df_leaderboard.loc[:, "method"].apply( | |
lambda s: get_model_family(s) | |
) | |
df_leaderboard["method"] = df_leaderboard["method"].apply(rename_map) | |
# select only the columns we want to display | |
df_leaderboard = df_leaderboard.loc[ | |
:, ["Type", "TypeName", "method", "elo", "rank", "time_train_s", "time_infer_s"] | |
] | |
# round for better display | |
df_leaderboard = df_leaderboard.round(1) | |
# rename some columns | |
return df_leaderboard.rename( | |
columns={ | |
"time_train_s": "training time (s) [β¬οΈ]", | |
"time_infer_s": "inference time (s) [β¬οΈ]", | |
"method": "Model", | |
"elo": "Elo [β¬οΈ]", | |
"rank": "Rank [β¬οΈ]", | |
} | |
) | |
# TODO show ELO +/- sem | |
# TODO: rename and re-order columns | |
def make_leaderboard(df_leaderboard: pd.DataFrame) -> Leaderboard: | |
df_leaderboard["TypeFiler"] = df_leaderboard["TypeName"].apply( | |
lambda m: f"{m} {model_type_emoji[m]}" | |
) | |
# De-selects but does not filter... | |
# default = df_leaderboard["TypeFiler"].unique().tolist() | |
# default = [(s, s) for s in default if "AutoML" not in s] | |
df_leaderboard["Only Default"] = df_leaderboard["Model"].str.endswith("(default)") | |
df_leaderboard["Only Tuned"] = df_leaderboard["Model"].str.endswith("(tuned)") | |
df_leaderboard["Only Tuned + Ensemble"] = df_leaderboard["Model"].str.endswith( | |
"(tuned + ensemble)" | |
) | df_leaderboard["Model"].str.endswith("(4h)") | |
return Leaderboard( | |
value=df_leaderboard, | |
select_columns=SelectColumns( | |
default_selection=list(df_leaderboard.columns), | |
cant_deselect=["Type", "Model"], | |
label="Select Columns to Display:", | |
), | |
hide_columns=[ | |
"TypeName", | |
"TypeFiler", | |
"RefModel", | |
"Only Default", | |
"Only Tuned", | |
"Only Tuned + Ensemble", | |
], | |
search_columns=["Model", "Type"], | |
filter_columns=[ | |
ColumnFilter( | |
"TypeFiler", type="checkboxgroup", label="Filter by Model Type" | |
), | |
ColumnFilter("Only Default", type="boolean", default=False), | |
ColumnFilter("Only Tuned", type="boolean", default=False), | |
ColumnFilter("Only Tuned + Ensemble", type="boolean", default=False), | |
], | |
bool_checkboxgroup_label="Custom Views (Exclusive, only toggle one at a time):", | |
) | |
def main(): | |
demo = gr.Blocks() | |
with demo: | |
gr.HTML(TITLE) | |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
with gr.Tabs(elem_classes="tab-buttons"): | |
with gr.TabItem("π Overall", elem_id="llm-benchmark-tab-table", id=2): | |
df_leaderboard = load_data("leaderboard-all") | |
make_leaderboard(df_leaderboard) | |
# TODO: decide on which subsets we want to support here. | |
# with gr.TabItem('π Regression', elem_id="llm-benchmark-tab-table", id=0): | |
# df_leaderboard = load_data("leaderboard-regression") | |
# leaderboard = make_leaderboard(df_leaderboard) | |
# | |
# with gr.TabItem('π Classification', elem_id="llm-benchmark-tab-table", id=1): | |
# df_leaderboard = load_data("leaderboard-classification") | |
# leaderboard = make_leaderboard(df_leaderboard) | |
# | |
# with gr.TabItem('π Classification', elem_id="llm-benchmark-tab-table", id=1): | |
# df_leaderboard = load_data("leaderboard-classification") | |
# leaderboard = make_leaderboard(df_leaderboard) | |
# | |
# with gr.TabItem('π TabPFNv2-Compatible', elem_id="llm-benchmark-tab-table", id=1): | |
# df_leaderboard = load_data("leaderboard-classification") | |
# leaderboard = make_leaderboard(df_leaderboard) | |
# | |
# with gr.TabItem('π TabICL-Compatible', elem_id="llm-benchmark-tab-table", id=1): | |
# df_leaderboard = load_data("leaderboard-classification") | |
# leaderboard = make_leaderboard(df_leaderboard) | |
with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=4): | |
gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text") | |
with gr.Row(), gr.Accordion("π Citation", open=False): | |
gr.Textbox( | |
value=CITATION_BUTTON_TEXT, | |
label=CITATION_BUTTON_LABEL, | |
lines=20, | |
elem_id="citation-button", | |
show_copy_button=True, | |
) | |
scheduler = BackgroundScheduler() | |
# scheduler.add_job(restart_space, "interval", seconds=1800) | |
scheduler.start() | |
demo.queue(default_concurrency_limit=40).launch() | |
demo.launch() | |
if __name__ == "__main__": | |
main() | |