Spaces:
Running
Running
add files
Browse files- .gitignore +10 -0
- README.md +17 -14
- constants.py +19 -0
- data/leaderboard-all.csv.zip +3 -0
- data/leaderboard-classification.csv.zip +3 -0
- data/leaderboard-regression.csv.zip +3 -0
- main.py +118 -0
- pyproject.toml +13 -0
.gitignore
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Python-generated files
|
2 |
+
__pycache__/
|
3 |
+
*.py[oc]
|
4 |
+
build/
|
5 |
+
dist/
|
6 |
+
wheels/
|
7 |
+
*.egg-info
|
8 |
+
|
9 |
+
# Virtual environments
|
10 |
+
.venv
|
README.md
CHANGED
@@ -1,14 +1,17 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
|
|
|
|
|
|
|
1 |
+
# TabArena Leaderboard code
|
2 |
+
|
3 |
+
This repository contains the frontend code to display TabArena leaderboard. The leaderboard is hosted on a
|
4 |
+
HuggingFace space.
|
5 |
+
|
6 |
+
Reference:
|
7 |
+
* website: tabarena.ai
|
8 |
+
* paper: TODO
|
9 |
+
* codebase to compute the leaderboard: https://github.com/autogluon/tabrepo/tree/tabarena
|
10 |
+
|
11 |
+
TODOS:
|
12 |
+
* add regression/classif/multiclassif
|
13 |
+
|
14 |
+
DONE:
|
15 |
+
* readme title and information
|
16 |
+
* pull data from leaderboard
|
17 |
+
* update columns
|
constants.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class MethodTypes:
|
2 |
+
col_name: str = "method_type"
|
3 |
+
automl: str = "AutoML"
|
4 |
+
tree: str = "Tree-based"
|
5 |
+
foundational: str = "Foundational"
|
6 |
+
finetuned: str = "Neural-network"
|
7 |
+
baseline: str = "Baseline"
|
8 |
+
other: str = "Other"
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
model_type_emoji = {
|
13 |
+
MethodTypes.tree: "π΄",
|
14 |
+
MethodTypes.foundational: "π§ ",
|
15 |
+
MethodTypes.finetuned: "π",
|
16 |
+
MethodTypes.automl: "π€",
|
17 |
+
MethodTypes.baseline: "π",
|
18 |
+
MethodTypes.other: "β",
|
19 |
+
}
|
data/leaderboard-all.csv.zip
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9e301dfeeb8cc7092301268aad4e4c9922517b6288101a588b5a15f5a0aaca9
|
3 |
+
size 4679
|
data/leaderboard-classification.csv.zip
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9e301dfeeb8cc7092301268aad4e4c9922517b6288101a588b5a15f5a0aaca9
|
3 |
+
size 4679
|
data/leaderboard-regression.csv.zip
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9e301dfeeb8cc7092301268aad4e4c9922517b6288101a588b5a15f5a0aaca9
|
3 |
+
size 4679
|
main.py
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
|
3 |
+
from apscheduler.schedulers.background import BackgroundScheduler
|
4 |
+
import pandas as pd
|
5 |
+
import gradio as gr
|
6 |
+
from gradio_leaderboard import Leaderboard, ColumnFilter
|
7 |
+
|
8 |
+
from constants import MethodTypes, model_type_emoji
|
9 |
+
|
10 |
+
|
11 |
+
TITLE = """<h1 align="center" id="space-title">TabArena: Public leaderboard for Tabular methods</h1>"""
|
12 |
+
|
13 |
+
INTRODUCTION_TEXT = ("TabArena Leaderboard measures the performance of tabular models on a collection of tabular "
|
14 |
+
"datasets manually curated. The datasets are collected to make sure they are tabular, with "
|
15 |
+
"permissive license without ethical issues and so on, we refer to the paper XXX for a "
|
16 |
+
"description of our approach.")
|
17 |
+
|
18 |
+
ABOUT_TEXT = f"""
|
19 |
+
## How It Works.
|
20 |
+
|
21 |
+
To evaluate the leaderboard, follow install instructions in
|
22 |
+
`https://github.com/autogluon/tabrepo/tree/tabarena` and run
|
23 |
+
`https://github.com/autogluon/tabrepo/blob/tabarena/examples/tabarena/run_tabarena_eval.py`.
|
24 |
+
|
25 |
+
|
26 |
+
This will generate a leaderboard. You can add your own method and contact the authors if you want it to be added
|
27 |
+
to the leaderboard. We require method to have public code available to be considered in the leaderboard.
|
28 |
+
"""
|
29 |
+
|
30 |
+
CITATION_BUTTON_LABEL = "If you use this leaderboard in your research please cite the following:"
|
31 |
+
CITATION_BUTTON_TEXT = r"""
|
32 |
+
@article{
|
33 |
+
TODO update when arxiv version is ready,
|
34 |
+
}
|
35 |
+
"""
|
36 |
+
|
37 |
+
|
38 |
+
def get_model_family(model_name: str) -> str:
|
39 |
+
prefixes_mapping = {
|
40 |
+
MethodTypes.automl: ["AutoGluon"],
|
41 |
+
MethodTypes.finetuned: ["REALMLP", "TabM", "FASTAI", "MNCA", "NN_TORCH"],
|
42 |
+
MethodTypes.tree: ["GBM", "CAT", "EBM", "XGB"],
|
43 |
+
MethodTypes.foundational: ["TABDPT", "TABICL", "TABPFN"],
|
44 |
+
MethodTypes.baseline: ["KNN", "LR"]
|
45 |
+
}
|
46 |
+
for method_type, prefixes in prefixes_mapping.items():
|
47 |
+
for prefix in prefixes:
|
48 |
+
if prefix.lower() in model_name.lower():
|
49 |
+
return method_type
|
50 |
+
return MethodTypes.other
|
51 |
+
|
52 |
+
|
53 |
+
def load_data(filename: str):
|
54 |
+
df_leaderboard = pd.read_csv(Path(__file__).parent / "data" / f"{filename}.csv.zip")
|
55 |
+
print(f"Loaded dataframe with {len(df_leaderboard)} rows and columns {df_leaderboard.columns}")
|
56 |
+
df_leaderboard["family"] = df_leaderboard.loc[:, "method"].apply(get_model_family)
|
57 |
+
df_leaderboard["family"] = df_leaderboard.loc[:, "family"].apply(lambda s: s + " " + model_type_emoji[s])
|
58 |
+
df_leaderboard = df_leaderboard.loc[:, ["method", "family", "time_train_s", "time_infer_s", "rank", "elo"]]
|
59 |
+
df_leaderboard = df_leaderboard.round(1)
|
60 |
+
df_leaderboard.rename(columns={
|
61 |
+
"time_train_s": "training time (s)",
|
62 |
+
"time_infer_s": "inference time (s)",
|
63 |
+
}, inplace=True)
|
64 |
+
return df_leaderboard
|
65 |
+
|
66 |
+
|
67 |
+
def make_leaderboard(df_leaderboard: pd.DataFrame) -> Leaderboard:
|
68 |
+
return Leaderboard(
|
69 |
+
value=df_leaderboard,
|
70 |
+
search_columns=["method"],
|
71 |
+
filter_columns=[
|
72 |
+
# "method",
|
73 |
+
ColumnFilter("family", type="dropdown", label="Filter by family"),
|
74 |
+
]
|
75 |
+
)
|
76 |
+
|
77 |
+
|
78 |
+
def main():
|
79 |
+
|
80 |
+
demo = gr.Blocks()
|
81 |
+
with demo:
|
82 |
+
gr.HTML(TITLE)
|
83 |
+
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
84 |
+
|
85 |
+
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
86 |
+
with gr.TabItem('π
Overall', elem_id="llm-benchmark-tab-table", id=2):
|
87 |
+
df_leaderboard = load_data("leaderboard-all")
|
88 |
+
leaderboard = make_leaderboard(df_leaderboard)
|
89 |
+
|
90 |
+
with gr.TabItem('π
Regression', elem_id="llm-benchmark-tab-table", id=0):
|
91 |
+
df_leaderboard = load_data("leaderboard-regression")
|
92 |
+
leaderboard = make_leaderboard(df_leaderboard)
|
93 |
+
|
94 |
+
with gr.TabItem('π
Classification', elem_id="llm-benchmark-tab-table", id=1):
|
95 |
+
df_leaderboard = load_data("leaderboard-classification")
|
96 |
+
leaderboard = make_leaderboard(df_leaderboard)
|
97 |
+
|
98 |
+
with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=4):
|
99 |
+
gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text")
|
100 |
+
with gr.Row():
|
101 |
+
with gr.Accordion("π Citation", open=False):
|
102 |
+
citation_button = gr.Textbox(
|
103 |
+
value=CITATION_BUTTON_TEXT,
|
104 |
+
label=CITATION_BUTTON_LABEL,
|
105 |
+
lines=20,
|
106 |
+
elem_id="citation-button",
|
107 |
+
show_copy_button=True,
|
108 |
+
)
|
109 |
+
|
110 |
+
scheduler = BackgroundScheduler()
|
111 |
+
# scheduler.add_job(restart_space, "interval", seconds=1800)
|
112 |
+
scheduler.start()
|
113 |
+
demo.queue(default_concurrency_limit=40).launch()
|
114 |
+
demo.launch()
|
115 |
+
|
116 |
+
|
117 |
+
if __name__ == "__main__":
|
118 |
+
main()
|
pyproject.toml
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
name = "tabarenaleaderboard"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = "Add your description here"
|
5 |
+
readme = "README.md"
|
6 |
+
requires-python = ">=3.12"
|
7 |
+
dependencies = [
|
8 |
+
"apscheduler>=3.11.0",
|
9 |
+
"gradio-client>=1.3.0",
|
10 |
+
"gradio-leaderboard==0.0.9",
|
11 |
+
"gradio[oauth]==4.44.0",
|
12 |
+
"pandas>=2.2.3",
|
13 |
+
]
|