Spaces:
Running
Running
add comments
Browse files- constants.py +7 -7
- main.py +24 -10
constants.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
class
|
2 |
col_name: str = "method_type"
|
3 |
automl: str = "AutoML"
|
4 |
tree: str = "Tree-based"
|
@@ -10,10 +10,10 @@ class MethodTypes:
|
|
10 |
|
11 |
|
12 |
model_type_emoji = {
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
}
|
|
|
1 |
+
class Constants:
|
2 |
col_name: str = "method_type"
|
3 |
automl: str = "AutoML"
|
4 |
tree: str = "Tree-based"
|
|
|
10 |
|
11 |
|
12 |
model_type_emoji = {
|
13 |
+
Constants.tree: "π΄",
|
14 |
+
Constants.foundational: "π§ ",
|
15 |
+
Constants.finetuned: "π",
|
16 |
+
Constants.automl: "π€",
|
17 |
+
Constants.baseline: "π",
|
18 |
+
Constants.other: "β",
|
19 |
}
|
main.py
CHANGED
@@ -5,14 +5,14 @@ import pandas as pd
|
|
5 |
import gradio as gr
|
6 |
from gradio_leaderboard import Leaderboard, ColumnFilter
|
7 |
|
8 |
-
from constants import
|
9 |
|
10 |
|
11 |
TITLE = """<h1 align="center" id="space-title">TabArena: Public leaderboard for Tabular methods</h1>"""
|
12 |
|
13 |
INTRODUCTION_TEXT = ("TabArena Leaderboard measures the performance of tabular models on a collection of tabular "
|
14 |
"datasets manually curated. The datasets are collected to make sure they are tabular, with "
|
15 |
-
"permissive license without ethical issues and so on, we refer to the paper
|
16 |
"description of our approach.")
|
17 |
|
18 |
ABOUT_TEXT = f"""
|
@@ -37,30 +37,44 @@ TODO update when arxiv version is ready,
|
|
37 |
|
38 |
def get_model_family(model_name: str) -> str:
|
39 |
prefixes_mapping = {
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
}
|
46 |
for method_type, prefixes in prefixes_mapping.items():
|
47 |
for prefix in prefixes:
|
48 |
if prefix.lower() in model_name.lower():
|
49 |
return method_type
|
50 |
-
return
|
51 |
|
52 |
|
53 |
def load_data(filename: str):
|
54 |
df_leaderboard = pd.read_csv(Path(__file__).parent / "data" / f"{filename}.csv.zip")
|
55 |
print(f"Loaded dataframe with {len(df_leaderboard)} rows and columns {df_leaderboard.columns}")
|
56 |
-
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
df_leaderboard = df_leaderboard.loc[:, ["method", "family", "time_train_s", "time_infer_s", "rank", "elo"]]
|
|
|
|
|
59 |
df_leaderboard = df_leaderboard.round(1)
|
|
|
|
|
60 |
df_leaderboard.rename(columns={
|
61 |
"time_train_s": "training time (s)",
|
62 |
"time_infer_s": "inference time (s)",
|
63 |
}, inplace=True)
|
|
|
|
|
64 |
return df_leaderboard
|
65 |
|
66 |
|
|
|
5 |
import gradio as gr
|
6 |
from gradio_leaderboard import Leaderboard, ColumnFilter
|
7 |
|
8 |
+
from constants import Constants, model_type_emoji
|
9 |
|
10 |
|
11 |
TITLE = """<h1 align="center" id="space-title">TabArena: Public leaderboard for Tabular methods</h1>"""
|
12 |
|
13 |
INTRODUCTION_TEXT = ("TabArena Leaderboard measures the performance of tabular models on a collection of tabular "
|
14 |
"datasets manually curated. The datasets are collected to make sure they are tabular, with "
|
15 |
+
"permissive license without ethical issues and so on, we refer to the paper for a full "
|
16 |
"description of our approach.")
|
17 |
|
18 |
ABOUT_TEXT = f"""
|
|
|
37 |
|
38 |
def get_model_family(model_name: str) -> str:
|
39 |
prefixes_mapping = {
|
40 |
+
Constants.automl: ["AutoGluon"],
|
41 |
+
Constants.finetuned: ["REALMLP", "TabM", "FASTAI", "MNCA", "NN_TORCH"],
|
42 |
+
Constants.tree: ["GBM", "CAT", "EBM", "XGB"],
|
43 |
+
Constants.foundational: ["TABDPT", "TABICL", "TABPFN"],
|
44 |
+
Constants.baseline: ["KNN", "LR"]
|
45 |
}
|
46 |
for method_type, prefixes in prefixes_mapping.items():
|
47 |
for prefix in prefixes:
|
48 |
if prefix.lower() in model_name.lower():
|
49 |
return method_type
|
50 |
+
return Constants.other
|
51 |
|
52 |
|
53 |
def load_data(filename: str):
|
54 |
df_leaderboard = pd.read_csv(Path(__file__).parent / "data" / f"{filename}.csv.zip")
|
55 |
print(f"Loaded dataframe with {len(df_leaderboard)} rows and columns {df_leaderboard.columns}")
|
56 |
+
|
57 |
+
# sort by ELO
|
58 |
+
df_leaderboard.sort_values(by="elo", ascending=False, inplace=True)
|
59 |
+
|
60 |
+
# add model family information
|
61 |
+
df_leaderboard["family"] = df_leaderboard.loc[:, "method"].apply(
|
62 |
+
lambda s: get_model_family(s) + " " + model_type_emoji[get_model_family(s)]
|
63 |
+
)
|
64 |
+
|
65 |
+
# select only the columns we want to display
|
66 |
df_leaderboard = df_leaderboard.loc[:, ["method", "family", "time_train_s", "time_infer_s", "rank", "elo"]]
|
67 |
+
|
68 |
+
# round for better display
|
69 |
df_leaderboard = df_leaderboard.round(1)
|
70 |
+
|
71 |
+
# rename some columns
|
72 |
df_leaderboard.rename(columns={
|
73 |
"time_train_s": "training time (s)",
|
74 |
"time_infer_s": "inference time (s)",
|
75 |
}, inplace=True)
|
76 |
+
|
77 |
+
# TODO show ELO +/- sem
|
78 |
return df_leaderboard
|
79 |
|
80 |
|