Spaces:

TabArena
/

leaderboard

Running

App Files Files Community

LennartPurucker commited on 11 days ago

Commit

1dedb52

1 Parent(s): 16d8300

maint: iterate on the LB

Browse files

Files changed (2) hide show

constants.py +9 -9
main.py +140 -52

constants.py CHANGED Viewed

@@ -1,19 +1,19 @@
 class Constants:
     col_name: str = "method_type"
-    automl: str = "AutoML"
     tree: str = "Tree-based"
-    foundational: str = "Foundational"
-    finetuned: str = "Neural-network"
     baseline: str = "Baseline"
     other: str = "Other"
 model_type_emoji = {
-    Constants.tree: "🌴",
-    Constants.foundational: "🧠",
-    Constants.finetuned: "🌐",
-    Constants.automl: "🤖",
     Constants.baseline: "📏",
     Constants.other: "❓",
 }

 class Constants:
     col_name: str = "method_type"
     tree: str = "Tree-based"
+    foundational: str = "Foundation Model"
+    neural_network: str ="Neural Network"
     baseline: str = "Baseline"
+    # Not Used
     other: str = "Other"
+    automl: str = "AutoML"
 model_type_emoji = {
+    Constants.tree: "🌳",
+    Constants.foundational: "🧠⚡",
+    Constants.neural_network:"🧠🔁",
     Constants.baseline: "📏",
+    # Not used
     Constants.other: "❓",
+    Constants.automl: "🤖",
 }

main.py CHANGED Viewed

@@ -1,33 +1,37 @@
 from pathlib import Path
-from apscheduler.schedulers.background import BackgroundScheduler
-import pandas as pd
 import gradio as gr
-from gradio_leaderboard import Leaderboard, ColumnFilter
 from constants import Constants, model_type_emoji
 TITLE = """<h1 align="center" id="space-title">TabArena: Public leaderboard for Tabular methods</h1>"""
-INTRODUCTION_TEXT = ("TabArena Leaderboard measures the performance of tabular models on a collection of tabular "
-                     "datasets manually curated. The datasets are collected to make sure they are tabular, with "
-                     "permissive license without ethical issues and so on, we refer to the paper for a full "
-                     "description of our approach.")
-ABOUT_TEXT = f"""
 ## How It Works.
-To evaluate the leaderboard, follow install instructions in
-`https://github.com/autogluon/tabrepo/tree/tabarena` and run
 `https://github.com/autogluon/tabrepo/blob/tabarena/examples/tabarena/run_tabarena_eval.py`.
 This will generate a leaderboard. You can add your own method and contact the authors if you want it to be added
-to the leaderboard. We require method to have public code available to be considered in the leaderboard.
 """
-CITATION_BUTTON_LABEL = "If you use this leaderboard in your research please cite the following:"
 CITATION_BUTTON_TEXT = r"""
 @article{
 TODO update when arxiv version is ready,
@@ -38,11 +42,12 @@ TODO update when arxiv version is ready,
 def get_model_family(model_name: str) -> str:
     prefixes_mapping = {
         Constants.automl: ["AutoGluon"],
-        Constants.finetuned: ["REALMLP", "TabM", "FASTAI", "MNCA", "NN_TORCH"],
-        Constants.tree: ["GBM", "CAT", "EBM", "XGB"],
         Constants.foundational: ["TABDPT", "TABICL", "TABPFN"],
-        Constants.baseline: ["KNN", "LR"]
     }
     for method_type, prefixes in prefixes_mapping.items():
         for prefix in prefixes:
             if prefix.lower() in model_name.lower():
@@ -50,76 +55,159 @@ def get_model_family(model_name: str) -> str:
     return Constants.other
 def load_data(filename: str):
     df_leaderboard = pd.read_csv(Path(__file__).parent / "data" / f"{filename}.csv.zip")
-    print(f"Loaded dataframe with {len(df_leaderboard)} rows and columns {df_leaderboard.columns}")
     # sort by ELO
-    df_leaderboard.sort_values(by="elo", ascending=False, inplace=True)
     # add model family information
-    df_leaderboard["family"] = df_leaderboard.loc[:, "method"].apply(
-        lambda s: get_model_family(s) + " " + model_type_emoji[get_model_family(s)]
     )
     # select only the columns we want to display
-    df_leaderboard = df_leaderboard.loc[:, ["method", "family", "time_train_s", "time_infer_s", "rank", "elo"]]
     # round for better display
     df_leaderboard = df_leaderboard.round(1)
     # rename some columns
-    df_leaderboard.rename(columns={
-        "time_train_s": "training time (s)",
-        "time_infer_s": "inference time (s)",
-    }, inplace=True)
     # TODO show ELO +/- sem
-    return df_leaderboard
 def make_leaderboard(df_leaderboard: pd.DataFrame) -> Leaderboard:
     return Leaderboard(
         value=df_leaderboard,
-        search_columns=["method"],
         filter_columns=[
-            # "method",
-            ColumnFilter("family", type="dropdown", label="Filter by family"),
-        ]
     )
 def main():
     demo = gr.Blocks()
     with demo:
         gr.HTML(TITLE)
         gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
-        with gr.Tabs(elem_classes="tab-buttons") as tabs:
-            with gr.TabItem('🏅 Overall', elem_id="llm-benchmark-tab-table", id=2):
                 df_leaderboard = load_data("leaderboard-all")
-                leaderboard = make_leaderboard(df_leaderboard)
-            with gr.TabItem('🏅 Regression', elem_id="llm-benchmark-tab-table", id=0):
-                df_leaderboard = load_data("leaderboard-regression")
-                leaderboard = make_leaderboard(df_leaderboard)
-            with gr.TabItem('🏅 Classification', elem_id="llm-benchmark-tab-table", id=1):
-                df_leaderboard = load_data("leaderboard-classification")
-                leaderboard = make_leaderboard(df_leaderboard)
             with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=4):
                 gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text")
-        with gr.Row():
-            with gr.Accordion("📙 Citation", open=False):
-                citation_button = gr.Textbox(
-                    value=CITATION_BUTTON_TEXT,
-                    label=CITATION_BUTTON_LABEL,
-                    lines=20,
-                    elem_id="citation-button",
-                    show_copy_button=True,
-                )
     scheduler = BackgroundScheduler()
     # scheduler.add_job(restart_space, "interval", seconds=1800)

+from __future__ import annotations
 from pathlib import Path
 import gradio as gr
+import pandas as pd
+from apscheduler.schedulers.background import BackgroundScheduler
 from constants import Constants, model_type_emoji
+from gradio_leaderboard import ColumnFilter, Leaderboard, SelectColumns
 TITLE = """<h1 align="center" id="space-title">TabArena: Public leaderboard for Tabular methods</h1>"""
+INTRODUCTION_TEXT = (
+    "TabArena Leaderboard measures the performance of tabular models on a collection of tabular "
+    "datasets manually curated. The datasets are collected to make sure they are tabular, with "
+    "permissive license without ethical issues and so on, we refer to the paper for a full "
+    "description of our approach."
+)
+ABOUT_TEXT = """
 ## How It Works.
+To evaluate the leaderboard, follow install instructions in
+`https://github.com/autogluon/tabrepo/tree/tabarena` and run
 `https://github.com/autogluon/tabrepo/blob/tabarena/examples/tabarena/run_tabarena_eval.py`.
 This will generate a leaderboard. You can add your own method and contact the authors if you want it to be added
+to the leaderboard. We require method to have public code available to be considered in the leaderboard.
 """
+CITATION_BUTTON_LABEL = (
+    "If you use this leaderboard in your research please cite the following:"
+)
 CITATION_BUTTON_TEXT = r"""
 @article{
 TODO update when arxiv version is ready,
 def get_model_family(model_name: str) -> str:
     prefixes_mapping = {
         Constants.automl: ["AutoGluon"],
+        Constants.neural_network: ["REALMLP", "TabM", "FASTAI", "MNCA", "NN_TORCH"],
+        Constants.tree: ["GBM", "CAT", "EBM", "XGB", "XT", "RF"],
         Constants.foundational: ["TABDPT", "TABICL", "TABPFN"],
+        Constants.baseline: ["KNN", "LR"],
     }
     for method_type, prefixes in prefixes_mapping.items():
         for prefix in prefixes:
             if prefix.lower() in model_name.lower():
     return Constants.other
+def rename_map(model_name: str) -> str:
+    rename_map = {
+        "TABM": "TabM",
+        "REALMLP": "RealMLP",
+        "GBM": "LightGBM",
+        "CAT": "CatBoost",
+        "XGB": "XGBoost",
+        "XT": "ExtraTrees",
+        "RF": "RandomForest",
+        "MNCA": "ModernNCA",
+        "NN_TORCH": "TorchMLP",
+        "FASTAI": "FastaiMLP",
+        "TABPFN": "TabPFNv2",
+        "EBM": "EBM",
+        "TABDPT": "TabDPT",
+        "TABICL": "TabICL",
+        "KNN": "KNN",
+        "LR": "Linear",
+    }
+    for prefix in rename_map:
+        if prefix in model_name:
+            return model_name.replace(prefix, rename_map[prefix])
+    return model_name
 def load_data(filename: str):
     df_leaderboard = pd.read_csv(Path(__file__).parent / "data" / f"{filename}.csv.zip")
+    print(
+        f"Loaded dataframe with {len(df_leaderboard)} rows and columns {df_leaderboard.columns}"
+    )
     # sort by ELO
+    df_leaderboard = df_leaderboard.sort_values(by="elo", ascending=False)
     # add model family information
+    df_leaderboard["Type"] = df_leaderboard.loc[:, "method"].apply(
+        lambda s: model_type_emoji[get_model_family(s)]
     )
+    df_leaderboard["TypeName"] = df_leaderboard.loc[:, "method"].apply(
+        lambda s: get_model_family(s)
+    )
+    df_leaderboard["method"] = df_leaderboard["method"].apply(rename_map)
     # select only the columns we want to display
+    df_leaderboard = df_leaderboard.loc[
+        :, ["Type", "TypeName", "method", "elo", "rank", "time_train_s", "time_infer_s"]
+    ]
     # round for better display
     df_leaderboard = df_leaderboard.round(1)
     # rename some columns
+    return df_leaderboard.rename(
+        columns={
+            "time_train_s": "training time (s) [⬇️]",
+            "time_infer_s": "inference time (s) [⬇️]",
+            "method": "Model",
+            "elo": "Elo [⬆️]",
+            "rank": "Rank [⬇️]",
+        }
+    )
     # TODO show ELO +/- sem
+    # TODO: rename and re-order columns
 def make_leaderboard(df_leaderboard: pd.DataFrame) -> Leaderboard:
+    df_leaderboard["TypeFiler"] = df_leaderboard["TypeName"].apply(
+        lambda m: f"{m} {model_type_emoji[m]}"
+    )
+    # De-selects but does not filter...
+    # default = df_leaderboard["TypeFiler"].unique().tolist()
+    # default = [(s, s) for s in default if "AutoML" not in s]
+    df_leaderboard["Only Default"] = df_leaderboard["Model"].str.endswith("(default)")
+    df_leaderboard["Only Tuned"] = df_leaderboard["Model"].str.endswith("(tuned)")
+    df_leaderboard["Only Tuned + Ensemble"] = df_leaderboard["Model"].str.endswith(
+        "(tuned + ensemble)"
+    ) | df_leaderboard["Model"].str.endswith("(4h)")
     return Leaderboard(
         value=df_leaderboard,
+        select_columns=SelectColumns(
+            default_selection=list(df_leaderboard.columns),
+            cant_deselect=["Type", "Model"],
+            label="Select Columns to Display:",
+        ),
+        hide_columns=[
+            "TypeName",
+            "TypeFiler",
+            "RefModel",
+            "Only Default",
+            "Only Tuned",
+            "Only Tuned + Ensemble",
+        ],
+        search_columns=["Model", "Type"],
         filter_columns=[
+            ColumnFilter(
+                "TypeFiler", type="checkboxgroup", label="Filter by Model Type"
+            ),
+            ColumnFilter("Only Default", type="boolean", default=False),
+            ColumnFilter("Only Tuned", type="boolean", default=False),
+            ColumnFilter("Only Tuned + Ensemble", type="boolean", default=False),
+        ],
+        bool_checkboxgroup_label="Custom Views (Exclusive, only toggle one at a time):",
     )
 def main():
     demo = gr.Blocks()
     with demo:
         gr.HTML(TITLE)
         gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
+        with gr.Tabs(elem_classes="tab-buttons"):
+            with gr.TabItem("🏅 Overall", elem_id="llm-benchmark-tab-table", id=2):
                 df_leaderboard = load_data("leaderboard-all")
+                make_leaderboard(df_leaderboard)
+            # TODO: decide on which subsets we want to support here.
+            # with gr.TabItem('🏅 Regression', elem_id="llm-benchmark-tab-table", id=0):
+            #     df_leaderboard = load_data("leaderboard-regression")
+            #     leaderboard = make_leaderboard(df_leaderboard)
+            #
+            # with gr.TabItem('🏅 Classification', elem_id="llm-benchmark-tab-table", id=1):
+            #     df_leaderboard = load_data("leaderboard-classification")
+            #     leaderboard = make_leaderboard(df_leaderboard)
+            #
+            # with gr.TabItem('🏅 Classification', elem_id="llm-benchmark-tab-table", id=1):
+            #     df_leaderboard = load_data("leaderboard-classification")
+            #     leaderboard = make_leaderboard(df_leaderboard)
+            #
+            # with gr.TabItem('🏅 TabPFNv2-Compatible', elem_id="llm-benchmark-tab-table", id=1):
+            #     df_leaderboard = load_data("leaderboard-classification")
+            #     leaderboard = make_leaderboard(df_leaderboard)
+            #
+            # with gr.TabItem('🏅 TabICL-Compatible', elem_id="llm-benchmark-tab-table", id=1):
+            #     df_leaderboard = load_data("leaderboard-classification")
+            #     leaderboard = make_leaderboard(df_leaderboard)
             with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=4):
                 gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text")
+        with gr.Row(), gr.Accordion("📙 Citation", open=False):
+            gr.Textbox(
+                value=CITATION_BUTTON_TEXT,
+                label=CITATION_BUTTON_LABEL,
+                lines=20,
+                elem_id="citation-button",
+                show_copy_button=True,
+            )
     scheduler = BackgroundScheduler()
     # scheduler.add_job(restart_space, "interval", seconds=1800)