LennartPurucker commited on
Commit
1dedb52
Β·
1 Parent(s): 16d8300

maint: iterate on the LB

Browse files
Files changed (2) hide show
  1. constants.py +9 -9
  2. main.py +140 -52
constants.py CHANGED
@@ -1,19 +1,19 @@
1
  class Constants:
2
  col_name: str = "method_type"
3
- automl: str = "AutoML"
4
  tree: str = "Tree-based"
5
- foundational: str = "Foundational"
6
- finetuned: str = "Neural-network"
7
  baseline: str = "Baseline"
 
8
  other: str = "Other"
9
-
10
-
11
 
12
  model_type_emoji = {
13
- Constants.tree: "🌴",
14
- Constants.foundational: "🧠",
15
- Constants.finetuned: "🌐",
16
- Constants.automl: "πŸ€–",
17
  Constants.baseline: "πŸ“",
 
18
  Constants.other: "❓",
 
19
  }
 
1
  class Constants:
2
  col_name: str = "method_type"
 
3
  tree: str = "Tree-based"
4
+ foundational: str = "Foundation Model"
5
+ neural_network: str ="Neural Network"
6
  baseline: str = "Baseline"
7
+ # Not Used
8
  other: str = "Other"
9
+ automl: str = "AutoML"
 
10
 
11
  model_type_emoji = {
12
+ Constants.tree: "🌳",
13
+ Constants.foundational: "🧠⚑",
14
+ Constants.neural_network:"πŸ§ πŸ”",
 
15
  Constants.baseline: "πŸ“",
16
+ # Not used
17
  Constants.other: "❓",
18
+ Constants.automl: "πŸ€–",
19
  }
main.py CHANGED
@@ -1,33 +1,37 @@
 
 
1
  from pathlib import Path
2
 
3
- from apscheduler.schedulers.background import BackgroundScheduler
4
- import pandas as pd
5
  import gradio as gr
6
- from gradio_leaderboard import Leaderboard, ColumnFilter
7
-
8
  from constants import Constants, model_type_emoji
9
-
10
 
11
  TITLE = """<h1 align="center" id="space-title">TabArena: Public leaderboard for Tabular methods</h1>"""
12
 
13
- INTRODUCTION_TEXT = ("TabArena Leaderboard measures the performance of tabular models on a collection of tabular "
14
- "datasets manually curated. The datasets are collected to make sure they are tabular, with "
15
- "permissive license without ethical issues and so on, we refer to the paper for a full "
16
- "description of our approach.")
 
 
17
 
18
- ABOUT_TEXT = f"""
19
  ## How It Works.
20
 
21
- To evaluate the leaderboard, follow install instructions in
22
- `https://github.com/autogluon/tabrepo/tree/tabarena` and run
23
  `https://github.com/autogluon/tabrepo/blob/tabarena/examples/tabarena/run_tabarena_eval.py`.
24
 
25
 
26
  This will generate a leaderboard. You can add your own method and contact the authors if you want it to be added
27
- to the leaderboard. We require method to have public code available to be considered in the leaderboard.
28
  """
29
 
30
- CITATION_BUTTON_LABEL = "If you use this leaderboard in your research please cite the following:"
 
 
31
  CITATION_BUTTON_TEXT = r"""
32
  @article{
33
  TODO update when arxiv version is ready,
@@ -38,11 +42,12 @@ TODO update when arxiv version is ready,
38
  def get_model_family(model_name: str) -> str:
39
  prefixes_mapping = {
40
  Constants.automl: ["AutoGluon"],
41
- Constants.finetuned: ["REALMLP", "TabM", "FASTAI", "MNCA", "NN_TORCH"],
42
- Constants.tree: ["GBM", "CAT", "EBM", "XGB"],
43
  Constants.foundational: ["TABDPT", "TABICL", "TABPFN"],
44
- Constants.baseline: ["KNN", "LR"]
45
  }
 
46
  for method_type, prefixes in prefixes_mapping.items():
47
  for prefix in prefixes:
48
  if prefix.lower() in model_name.lower():
@@ -50,76 +55,159 @@ def get_model_family(model_name: str) -> str:
50
  return Constants.other
51
 
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  def load_data(filename: str):
54
  df_leaderboard = pd.read_csv(Path(__file__).parent / "data" / f"{filename}.csv.zip")
55
- print(f"Loaded dataframe with {len(df_leaderboard)} rows and columns {df_leaderboard.columns}")
 
 
56
 
57
  # sort by ELO
58
- df_leaderboard.sort_values(by="elo", ascending=False, inplace=True)
59
 
60
  # add model family information
61
- df_leaderboard["family"] = df_leaderboard.loc[:, "method"].apply(
62
- lambda s: get_model_family(s) + " " + model_type_emoji[get_model_family(s)]
 
63
  )
 
 
 
 
64
 
65
  # select only the columns we want to display
66
- df_leaderboard = df_leaderboard.loc[:, ["method", "family", "time_train_s", "time_infer_s", "rank", "elo"]]
 
 
67
 
68
  # round for better display
69
  df_leaderboard = df_leaderboard.round(1)
70
 
71
  # rename some columns
72
- df_leaderboard.rename(columns={
73
- "time_train_s": "training time (s)",
74
- "time_infer_s": "inference time (s)",
75
- }, inplace=True)
 
 
 
 
 
76
 
77
  # TODO show ELO +/- sem
78
- return df_leaderboard
79
 
80
 
81
  def make_leaderboard(df_leaderboard: pd.DataFrame) -> Leaderboard:
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  return Leaderboard(
83
  value=df_leaderboard,
84
- search_columns=["method"],
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  filter_columns=[
86
- # "method",
87
- ColumnFilter("family", type="dropdown", label="Filter by family"),
88
- ]
 
 
 
 
 
89
  )
90
 
91
 
92
  def main():
93
-
94
  demo = gr.Blocks()
95
  with demo:
96
  gr.HTML(TITLE)
97
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
98
 
99
- with gr.Tabs(elem_classes="tab-buttons") as tabs:
100
- with gr.TabItem('πŸ… Overall', elem_id="llm-benchmark-tab-table", id=2):
101
  df_leaderboard = load_data("leaderboard-all")
102
- leaderboard = make_leaderboard(df_leaderboard)
103
-
104
- with gr.TabItem('πŸ… Regression', elem_id="llm-benchmark-tab-table", id=0):
105
- df_leaderboard = load_data("leaderboard-regression")
106
- leaderboard = make_leaderboard(df_leaderboard)
107
-
108
- with gr.TabItem('πŸ… Classification', elem_id="llm-benchmark-tab-table", id=1):
109
- df_leaderboard = load_data("leaderboard-classification")
110
- leaderboard = make_leaderboard(df_leaderboard)
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=4):
113
  gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text")
114
- with gr.Row():
115
- with gr.Accordion("πŸ“™ Citation", open=False):
116
- citation_button = gr.Textbox(
117
- value=CITATION_BUTTON_TEXT,
118
- label=CITATION_BUTTON_LABEL,
119
- lines=20,
120
- elem_id="citation-button",
121
- show_copy_button=True,
122
- )
123
 
124
  scheduler = BackgroundScheduler()
125
  # scheduler.add_job(restart_space, "interval", seconds=1800)
 
1
+ from __future__ import annotations
2
+
3
  from pathlib import Path
4
 
 
 
5
  import gradio as gr
6
+ import pandas as pd
7
+ from apscheduler.schedulers.background import BackgroundScheduler
8
  from constants import Constants, model_type_emoji
9
+ from gradio_leaderboard import ColumnFilter, Leaderboard, SelectColumns
10
 
11
  TITLE = """<h1 align="center" id="space-title">TabArena: Public leaderboard for Tabular methods</h1>"""
12
 
13
+ INTRODUCTION_TEXT = (
14
+ "TabArena Leaderboard measures the performance of tabular models on a collection of tabular "
15
+ "datasets manually curated. The datasets are collected to make sure they are tabular, with "
16
+ "permissive license without ethical issues and so on, we refer to the paper for a full "
17
+ "description of our approach."
18
+ )
19
 
20
+ ABOUT_TEXT = """
21
  ## How It Works.
22
 
23
+ To evaluate the leaderboard, follow install instructions in
24
+ `https://github.com/autogluon/tabrepo/tree/tabarena` and run
25
  `https://github.com/autogluon/tabrepo/blob/tabarena/examples/tabarena/run_tabarena_eval.py`.
26
 
27
 
28
  This will generate a leaderboard. You can add your own method and contact the authors if you want it to be added
29
+ to the leaderboard. We require method to have public code available to be considered in the leaderboard.
30
  """
31
 
32
+ CITATION_BUTTON_LABEL = (
33
+ "If you use this leaderboard in your research please cite the following:"
34
+ )
35
  CITATION_BUTTON_TEXT = r"""
36
  @article{
37
  TODO update when arxiv version is ready,
 
42
  def get_model_family(model_name: str) -> str:
43
  prefixes_mapping = {
44
  Constants.automl: ["AutoGluon"],
45
+ Constants.neural_network: ["REALMLP", "TabM", "FASTAI", "MNCA", "NN_TORCH"],
46
+ Constants.tree: ["GBM", "CAT", "EBM", "XGB", "XT", "RF"],
47
  Constants.foundational: ["TABDPT", "TABICL", "TABPFN"],
48
+ Constants.baseline: ["KNN", "LR"],
49
  }
50
+
51
  for method_type, prefixes in prefixes_mapping.items():
52
  for prefix in prefixes:
53
  if prefix.lower() in model_name.lower():
 
55
  return Constants.other
56
 
57
 
58
+ def rename_map(model_name: str) -> str:
59
+ rename_map = {
60
+ "TABM": "TabM",
61
+ "REALMLP": "RealMLP",
62
+ "GBM": "LightGBM",
63
+ "CAT": "CatBoost",
64
+ "XGB": "XGBoost",
65
+ "XT": "ExtraTrees",
66
+ "RF": "RandomForest",
67
+ "MNCA": "ModernNCA",
68
+ "NN_TORCH": "TorchMLP",
69
+ "FASTAI": "FastaiMLP",
70
+ "TABPFN": "TabPFNv2",
71
+ "EBM": "EBM",
72
+ "TABDPT": "TabDPT",
73
+ "TABICL": "TabICL",
74
+ "KNN": "KNN",
75
+ "LR": "Linear",
76
+ }
77
+
78
+ for prefix in rename_map:
79
+ if prefix in model_name:
80
+ return model_name.replace(prefix, rename_map[prefix])
81
+
82
+ return model_name
83
+
84
+
85
  def load_data(filename: str):
86
  df_leaderboard = pd.read_csv(Path(__file__).parent / "data" / f"{filename}.csv.zip")
87
+ print(
88
+ f"Loaded dataframe with {len(df_leaderboard)} rows and columns {df_leaderboard.columns}"
89
+ )
90
 
91
  # sort by ELO
92
+ df_leaderboard = df_leaderboard.sort_values(by="elo", ascending=False)
93
 
94
  # add model family information
95
+
96
+ df_leaderboard["Type"] = df_leaderboard.loc[:, "method"].apply(
97
+ lambda s: model_type_emoji[get_model_family(s)]
98
  )
99
+ df_leaderboard["TypeName"] = df_leaderboard.loc[:, "method"].apply(
100
+ lambda s: get_model_family(s)
101
+ )
102
+ df_leaderboard["method"] = df_leaderboard["method"].apply(rename_map)
103
 
104
  # select only the columns we want to display
105
+ df_leaderboard = df_leaderboard.loc[
106
+ :, ["Type", "TypeName", "method", "elo", "rank", "time_train_s", "time_infer_s"]
107
+ ]
108
 
109
  # round for better display
110
  df_leaderboard = df_leaderboard.round(1)
111
 
112
  # rename some columns
113
+ return df_leaderboard.rename(
114
+ columns={
115
+ "time_train_s": "training time (s) [⬇️]",
116
+ "time_infer_s": "inference time (s) [⬇️]",
117
+ "method": "Model",
118
+ "elo": "Elo [⬆️]",
119
+ "rank": "Rank [⬇️]",
120
+ }
121
+ )
122
 
123
  # TODO show ELO +/- sem
124
+ # TODO: rename and re-order columns
125
 
126
 
127
  def make_leaderboard(df_leaderboard: pd.DataFrame) -> Leaderboard:
128
+ df_leaderboard["TypeFiler"] = df_leaderboard["TypeName"].apply(
129
+ lambda m: f"{m} {model_type_emoji[m]}"
130
+ )
131
+ # De-selects but does not filter...
132
+ # default = df_leaderboard["TypeFiler"].unique().tolist()
133
+ # default = [(s, s) for s in default if "AutoML" not in s]
134
+
135
+ df_leaderboard["Only Default"] = df_leaderboard["Model"].str.endswith("(default)")
136
+ df_leaderboard["Only Tuned"] = df_leaderboard["Model"].str.endswith("(tuned)")
137
+ df_leaderboard["Only Tuned + Ensemble"] = df_leaderboard["Model"].str.endswith(
138
+ "(tuned + ensemble)"
139
+ ) | df_leaderboard["Model"].str.endswith("(4h)")
140
+
141
  return Leaderboard(
142
  value=df_leaderboard,
143
+ select_columns=SelectColumns(
144
+ default_selection=list(df_leaderboard.columns),
145
+ cant_deselect=["Type", "Model"],
146
+ label="Select Columns to Display:",
147
+ ),
148
+ hide_columns=[
149
+ "TypeName",
150
+ "TypeFiler",
151
+ "RefModel",
152
+ "Only Default",
153
+ "Only Tuned",
154
+ "Only Tuned + Ensemble",
155
+ ],
156
+ search_columns=["Model", "Type"],
157
  filter_columns=[
158
+ ColumnFilter(
159
+ "TypeFiler", type="checkboxgroup", label="Filter by Model Type"
160
+ ),
161
+ ColumnFilter("Only Default", type="boolean", default=False),
162
+ ColumnFilter("Only Tuned", type="boolean", default=False),
163
+ ColumnFilter("Only Tuned + Ensemble", type="boolean", default=False),
164
+ ],
165
+ bool_checkboxgroup_label="Custom Views (Exclusive, only toggle one at a time):",
166
  )
167
 
168
 
169
  def main():
 
170
  demo = gr.Blocks()
171
  with demo:
172
  gr.HTML(TITLE)
173
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
174
 
175
+ with gr.Tabs(elem_classes="tab-buttons"):
176
+ with gr.TabItem("πŸ… Overall", elem_id="llm-benchmark-tab-table", id=2):
177
  df_leaderboard = load_data("leaderboard-all")
178
+ make_leaderboard(df_leaderboard)
179
+
180
+ # TODO: decide on which subsets we want to support here.
181
+ # with gr.TabItem('πŸ… Regression', elem_id="llm-benchmark-tab-table", id=0):
182
+ # df_leaderboard = load_data("leaderboard-regression")
183
+ # leaderboard = make_leaderboard(df_leaderboard)
184
+ #
185
+ # with gr.TabItem('πŸ… Classification', elem_id="llm-benchmark-tab-table", id=1):
186
+ # df_leaderboard = load_data("leaderboard-classification")
187
+ # leaderboard = make_leaderboard(df_leaderboard)
188
+ #
189
+ # with gr.TabItem('πŸ… Classification', elem_id="llm-benchmark-tab-table", id=1):
190
+ # df_leaderboard = load_data("leaderboard-classification")
191
+ # leaderboard = make_leaderboard(df_leaderboard)
192
+ #
193
+ # with gr.TabItem('πŸ… TabPFNv2-Compatible', elem_id="llm-benchmark-tab-table", id=1):
194
+ # df_leaderboard = load_data("leaderboard-classification")
195
+ # leaderboard = make_leaderboard(df_leaderboard)
196
+ #
197
+ # with gr.TabItem('πŸ… TabICL-Compatible', elem_id="llm-benchmark-tab-table", id=1):
198
+ # df_leaderboard = load_data("leaderboard-classification")
199
+ # leaderboard = make_leaderboard(df_leaderboard)
200
 
201
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=4):
202
  gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text")
203
+ with gr.Row(), gr.Accordion("πŸ“™ Citation", open=False):
204
+ gr.Textbox(
205
+ value=CITATION_BUTTON_TEXT,
206
+ label=CITATION_BUTTON_LABEL,
207
+ lines=20,
208
+ elem_id="citation-button",
209
+ show_copy_button=True,
210
+ )
 
211
 
212
  scheduler = BackgroundScheduler()
213
  # scheduler.add_job(restart_space, "interval", seconds=1800)