Spaces:

TabArena
/

leaderboard

Running

App Files Files Community

geoalgo commited on 11 days ago

Commit

16d8300

1 Parent(s): 6d5f6e0

add comments

Browse files

Files changed (2) hide show

constants.py +7 -7
main.py +24 -10

constants.py CHANGED Viewed

@@ -1,4 +1,4 @@
-class MethodTypes:
     col_name: str = "method_type"
     automl: str = "AutoML"
     tree: str = "Tree-based"
@@ -10,10 +10,10 @@ class MethodTypes:
 model_type_emoji = {
-    MethodTypes.tree: "🌴",
-    MethodTypes.foundational: "🧠",
-    MethodTypes.finetuned: "🌐",
-    MethodTypes.automl: "🤖",
-    MethodTypes.baseline: "📏",
-    MethodTypes.other: "❓",
 }

+class Constants:
     col_name: str = "method_type"
     automl: str = "AutoML"
     tree: str = "Tree-based"
 model_type_emoji = {
+    Constants.tree: "🌴",
+    Constants.foundational: "🧠",
+    Constants.finetuned: "🌐",
+    Constants.automl: "🤖",
+    Constants.baseline: "📏",
+    Constants.other: "❓",
 }

main.py CHANGED Viewed

@@ -5,14 +5,14 @@ import pandas as pd
 import gradio as gr
 from gradio_leaderboard import Leaderboard, ColumnFilter
-from constants import MethodTypes, model_type_emoji
 TITLE = """<h1 align="center" id="space-title">TabArena: Public leaderboard for Tabular methods</h1>"""
 INTRODUCTION_TEXT = ("TabArena Leaderboard measures the performance of tabular models on a collection of tabular "
                      "datasets manually curated. The datasets are collected to make sure they are tabular, with "
-                     "permissive license without ethical issues and so on, we refer to the paper XXX for a "
                      "description of our approach.")
 ABOUT_TEXT = f"""
@@ -37,30 +37,44 @@ TODO update when arxiv version is ready,
 def get_model_family(model_name: str) -> str:
     prefixes_mapping = {
-        MethodTypes.automl: ["AutoGluon"],
-        MethodTypes.finetuned: ["REALMLP", "TabM", "FASTAI", "MNCA", "NN_TORCH"],
-        MethodTypes.tree: ["GBM", "CAT", "EBM", "XGB"],
-        MethodTypes.foundational: ["TABDPT", "TABICL", "TABPFN"],
-        MethodTypes.baseline: ["KNN", "LR"]
     }
     for method_type, prefixes in prefixes_mapping.items():
         for prefix in prefixes:
             if prefix.lower() in model_name.lower():
                 return method_type
-    return MethodTypes.other
 def load_data(filename: str):
     df_leaderboard = pd.read_csv(Path(__file__).parent / "data" / f"{filename}.csv.zip")
     print(f"Loaded dataframe with {len(df_leaderboard)} rows and columns {df_leaderboard.columns}")
-    df_leaderboard["family"] = df_leaderboard.loc[:, "method"].apply(get_model_family)
-    df_leaderboard["family"] = df_leaderboard.loc[:, "family"].apply(lambda s: s + " " + model_type_emoji[s])
     df_leaderboard = df_leaderboard.loc[:, ["method", "family", "time_train_s", "time_infer_s", "rank", "elo"]]
     df_leaderboard = df_leaderboard.round(1)
     df_leaderboard.rename(columns={
         "time_train_s": "training time (s)",
         "time_infer_s": "inference time (s)",
     }, inplace=True)
     return df_leaderboard

 import gradio as gr
 from gradio_leaderboard import Leaderboard, ColumnFilter
+from constants import Constants, model_type_emoji
 TITLE = """<h1 align="center" id="space-title">TabArena: Public leaderboard for Tabular methods</h1>"""
 INTRODUCTION_TEXT = ("TabArena Leaderboard measures the performance of tabular models on a collection of tabular "
                      "datasets manually curated. The datasets are collected to make sure they are tabular, with "
+                     "permissive license without ethical issues and so on, we refer to the paper for a full "
                      "description of our approach.")
 ABOUT_TEXT = f"""
 def get_model_family(model_name: str) -> str:
     prefixes_mapping = {
+        Constants.automl: ["AutoGluon"],
+        Constants.finetuned: ["REALMLP", "TabM", "FASTAI", "MNCA", "NN_TORCH"],
+        Constants.tree: ["GBM", "CAT", "EBM", "XGB"],
+        Constants.foundational: ["TABDPT", "TABICL", "TABPFN"],
+        Constants.baseline: ["KNN", "LR"]
     }
     for method_type, prefixes in prefixes_mapping.items():
         for prefix in prefixes:
             if prefix.lower() in model_name.lower():
                 return method_type
+    return Constants.other
 def load_data(filename: str):
     df_leaderboard = pd.read_csv(Path(__file__).parent / "data" / f"{filename}.csv.zip")
     print(f"Loaded dataframe with {len(df_leaderboard)} rows and columns {df_leaderboard.columns}")
+    # sort by ELO
+    df_leaderboard.sort_values(by="elo", ascending=False, inplace=True)
+    # add model family information
+    df_leaderboard["family"] = df_leaderboard.loc[:, "method"].apply(
+        lambda s: get_model_family(s) + " " + model_type_emoji[get_model_family(s)]
+    )
+    # select only the columns we want to display
     df_leaderboard = df_leaderboard.loc[:, ["method", "family", "time_train_s", "time_infer_s", "rank", "elo"]]
+    # round for better display
     df_leaderboard = df_leaderboard.round(1)
+    # rename some columns
     df_leaderboard.rename(columns={
         "time_train_s": "training time (s)",
         "time_infer_s": "inference time (s)",
     }, inplace=True)
+    # TODO show ELO +/- sem
     return df_leaderboard