Spaces:
Running
Running
Commit
·
e3793a3
1
Parent(s):
22b0fe7
maint: make more nice looking
Browse files
data/leaderboard-classification.csv.zip
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:d9e301dfeeb8cc7092301268aad4e4c9922517b6288101a588b5a15f5a0aaca9
|
3 |
-
size 4679
|
|
|
|
|
|
|
|
data/leaderboard-regression.csv.zip
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:d9e301dfeeb8cc7092301268aad4e4c9922517b6288101a588b5a15f5a0aaca9
|
3 |
-
size 4679
|
|
|
|
|
|
|
|
data/{leaderboard-all.csv.zip → tabarena_leaderboard.csv.zip}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c95306a347a69e561a82562c1a6306f5a9f6819a60f458d5e350639c35cde848
|
3 |
+
size 10202
|
main.py
CHANGED
@@ -67,7 +67,7 @@ def rename_map(model_name: str) -> str:
|
|
67 |
"MNCA": "ModernNCA",
|
68 |
"NN_TORCH": "TorchMLP",
|
69 |
"FASTAI": "FastaiMLP",
|
70 |
-
"
|
71 |
"EBM": "EBM",
|
72 |
"TABDPT": "TabDPT",
|
73 |
"TABICL": "TabICL",
|
@@ -88,8 +88,6 @@ def load_data(filename: str):
|
|
88 |
f"Loaded dataframe with {len(df_leaderboard)} rows and columns {df_leaderboard.columns}"
|
89 |
)
|
90 |
|
91 |
-
# sort by ELO
|
92 |
-
df_leaderboard = df_leaderboard.sort_values(by="elo", ascending=False)
|
93 |
|
94 |
# add model family information
|
95 |
|
@@ -101,22 +99,51 @@ def load_data(filename: str):
|
|
101 |
)
|
102 |
df_leaderboard["method"] = df_leaderboard["method"].apply(rename_map)
|
103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
# select only the columns we want to display
|
105 |
df_leaderboard = df_leaderboard.loc[
|
106 |
-
:,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
]
|
108 |
|
109 |
# round for better display
|
110 |
-
df_leaderboard = df_leaderboard.round(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
|
112 |
# rename some columns
|
113 |
return df_leaderboard.rename(
|
114 |
columns={
|
115 |
-
"
|
116 |
-
"
|
117 |
"method": "Model",
|
118 |
"elo": "Elo [⬆️]",
|
119 |
"rank": "Rank [⬇️]",
|
|
|
120 |
}
|
121 |
)
|
122 |
|
@@ -138,6 +165,26 @@ def make_leaderboard(df_leaderboard: pd.DataFrame) -> Leaderboard:
|
|
138 |
"(tuned + ensemble)"
|
139 |
) | df_leaderboard["Model"].str.endswith("(4h)")
|
140 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
return Leaderboard(
|
142 |
value=df_leaderboard,
|
143 |
select_columns=SelectColumns(
|
@@ -152,17 +199,29 @@ def make_leaderboard(df_leaderboard: pd.DataFrame) -> Leaderboard:
|
|
152 |
"Only Default",
|
153 |
"Only Tuned",
|
154 |
"Only Tuned + Ensemble",
|
|
|
155 |
],
|
156 |
search_columns=["Model", "Type"],
|
157 |
filter_columns=[
|
158 |
-
ColumnFilter(
|
159 |
-
"TypeFiler", type="checkboxgroup", label="Filter by Model Type"
|
160 |
-
),
|
161 |
ColumnFilter("Only Default", type="boolean", default=False),
|
162 |
ColumnFilter("Only Tuned", type="boolean", default=False),
|
163 |
ColumnFilter("Only Tuned + Ensemble", type="boolean", default=False),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
],
|
165 |
-
bool_checkboxgroup_label="Custom Views (
|
166 |
)
|
167 |
|
168 |
|
@@ -174,7 +233,7 @@ def main():
|
|
174 |
|
175 |
with gr.Tabs(elem_classes="tab-buttons"):
|
176 |
with gr.TabItem("🏅 Overall", elem_id="llm-benchmark-tab-table", id=2):
|
177 |
-
df_leaderboard = load_data("
|
178 |
make_leaderboard(df_leaderboard)
|
179 |
|
180 |
# TODO: decide on which subsets we want to support here.
|
|
|
67 |
"MNCA": "ModernNCA",
|
68 |
"NN_TORCH": "TorchMLP",
|
69 |
"FASTAI": "FastaiMLP",
|
70 |
+
"TABPFNV2": "TabPFNv2",
|
71 |
"EBM": "EBM",
|
72 |
"TABDPT": "TabDPT",
|
73 |
"TABICL": "TabICL",
|
|
|
88 |
f"Loaded dataframe with {len(df_leaderboard)} rows and columns {df_leaderboard.columns}"
|
89 |
)
|
90 |
|
|
|
|
|
91 |
|
92 |
# add model family information
|
93 |
|
|
|
99 |
)
|
100 |
df_leaderboard["method"] = df_leaderboard["method"].apply(rename_map)
|
101 |
|
102 |
+
# elo,elo+,elo-,mrr
|
103 |
+
df_leaderboard["Elo 95% CI"] = (
|
104 |
+
"+"
|
105 |
+
+ df_leaderboard["elo+"].round(0).astype(int).astype(str)
|
106 |
+
+ "/-"
|
107 |
+
+ df_leaderboard["elo-"].round(0).astype(int).astype(str)
|
108 |
+
)
|
109 |
# select only the columns we want to display
|
110 |
df_leaderboard = df_leaderboard.loc[
|
111 |
+
:,
|
112 |
+
[
|
113 |
+
"Type",
|
114 |
+
"TypeName",
|
115 |
+
"method",
|
116 |
+
"elo",
|
117 |
+
"Elo 95% CI",
|
118 |
+
"rank",
|
119 |
+
"normalized-error",
|
120 |
+
"median_time_train_s_per_1K",
|
121 |
+
"median_time_infer_s_per_1K",
|
122 |
+
],
|
123 |
]
|
124 |
|
125 |
# round for better display
|
126 |
+
df_leaderboard[["elo", "Elo 95% CI"]] = df_leaderboard[["elo", "Elo 95% CI"]].round(0)
|
127 |
+
df_leaderboard[["median_time_train_s_per_1K", "rank"]] = df_leaderboard[
|
128 |
+
["median_time_train_s_per_1K", "rank"]
|
129 |
+
].round(2)
|
130 |
+
df_leaderboard[["normalized-error", "median_time_infer_s_per_1K"]] = df_leaderboard[
|
131 |
+
["normalized-error", "median_time_infer_s_per_1K"]
|
132 |
+
].round(3)
|
133 |
+
|
134 |
+
df_leaderboard = df_leaderboard.sort_values(by="elo", ascending=False)
|
135 |
+
df_leaderboard = df_leaderboard.reset_index(drop=True)
|
136 |
+
df_leaderboard = df_leaderboard.reset_index(names="#")
|
137 |
|
138 |
# rename some columns
|
139 |
return df_leaderboard.rename(
|
140 |
columns={
|
141 |
+
"median_time_train_s_per_1K": "Median Train Time (s/1K) [⬇️]",
|
142 |
+
"median_time_infer_s_per_1K": "Median Predict Time (s/1K)) [⬇️]",
|
143 |
"method": "Model",
|
144 |
"elo": "Elo [⬆️]",
|
145 |
"rank": "Rank [⬇️]",
|
146 |
+
"normalized-error": "Normalized Error [⬇️]",
|
147 |
}
|
148 |
)
|
149 |
|
|
|
165 |
"(tuned + ensemble)"
|
166 |
) | df_leaderboard["Model"].str.endswith("(4h)")
|
167 |
|
168 |
+
# Add Imputed count postfix
|
169 |
+
mask = df_leaderboard["Model"].str.startswith("TabPFNv2")
|
170 |
+
df_leaderboard.loc[mask, "Model"] = (
|
171 |
+
df_leaderboard.loc[mask, "Model"] + " [35.29% IMPUTED]"
|
172 |
+
)
|
173 |
+
mask = df_leaderboard["Model"].str.startswith("TabICL")
|
174 |
+
df_leaderboard.loc[mask, "Model"] = (
|
175 |
+
df_leaderboard.loc[mask, "Model"] + " [29.41% IMPUTED]"
|
176 |
+
)
|
177 |
+
|
178 |
+
df_leaderboard["Imputed"] = df_leaderboard["Model"].str.startswith(
|
179 |
+
"TabPFNv2"
|
180 |
+
) | df_leaderboard["Model"].str.startswith("TabICL")
|
181 |
+
df_leaderboard["Imputed"] = df_leaderboard["Imputed"].replace(
|
182 |
+
{
|
183 |
+
True: "Imputed",
|
184 |
+
False: "Not Imputed",
|
185 |
+
}
|
186 |
+
)
|
187 |
+
|
188 |
return Leaderboard(
|
189 |
value=df_leaderboard,
|
190 |
select_columns=SelectColumns(
|
|
|
199 |
"Only Default",
|
200 |
"Only Tuned",
|
201 |
"Only Tuned + Ensemble",
|
202 |
+
"Imputed",
|
203 |
],
|
204 |
search_columns=["Model", "Type"],
|
205 |
filter_columns=[
|
206 |
+
ColumnFilter("TypeFiler", type="checkboxgroup", label="Model Types."),
|
|
|
|
|
207 |
ColumnFilter("Only Default", type="boolean", default=False),
|
208 |
ColumnFilter("Only Tuned", type="boolean", default=False),
|
209 |
ColumnFilter("Only Tuned + Ensemble", type="boolean", default=False),
|
210 |
+
ColumnFilter(
|
211 |
+
"Imputed",
|
212 |
+
type="checkboxgroup",
|
213 |
+
label="(Not) Imputed Models.",
|
214 |
+
info="We impute the performance for models that cannot run on all"
|
215 |
+
" datasets due to task or dataset size constraints (e.g. TabPFN,"
|
216 |
+
" TabICL). We impute with the performance of a defaultRandomForest. "
|
217 |
+
" We add a postfix [X% IMPUTED] to the model if any results were "
|
218 |
+
"imputed. The X% shows the percentage of"
|
219 |
+
" datasets that were imputed. In general, imputation negatively"
|
220 |
+
" represents the model performance, punishing the model for not"
|
221 |
+
" being able to run on all datasets.",
|
222 |
+
),
|
223 |
],
|
224 |
+
bool_checkboxgroup_label="Custom Views (exclusive, only toggle one at a time):",
|
225 |
)
|
226 |
|
227 |
|
|
|
233 |
|
234 |
with gr.Tabs(elem_classes="tab-buttons"):
|
235 |
with gr.TabItem("🏅 Overall", elem_id="llm-benchmark-tab-table", id=2):
|
236 |
+
df_leaderboard = load_data("tabarena_leaderboard")
|
237 |
make_leaderboard(df_leaderboard)
|
238 |
|
239 |
# TODO: decide on which subsets we want to support here.
|