fev-leaderboard

Running

App Files Files Community

shchuro commited on 11 days ago

Commit

72f0175

1 Parent(s): a30697b

Fix colorbar

Browse files

Files changed (1) hide show

src/utils.py +80 -19

src/utils.py CHANGED Viewed

@@ -53,12 +53,22 @@ MODEL_CONFIG = {
     "sundial-base": ("thuml/sundial-base-128m", "Tsinghua University", True, "DL"),
     "ttm-r2": ("ibm-granite/granite-timeseries-ttm-r2", "IBM", True, "DL"),
     # Task-specific models
-    "stat. ensemble": ("https://nixtlaverse.nixtla.io/statsforecast/", "—", False, "ST"),
     "autoarima": ("https://nixtlaverse.nixtla.io/statsforecast/", "—", False, "ST"),
     "autotheta": ("https://nixtlaverse.nixtla.io/statsforecast/", "—", False, "ST"),
     "autoets": ("https://nixtlaverse.nixtla.io/statsforecast/", "—", False, "ST"),
     "seasonalnaive": ("https://nixtlaverse.nixtla.io/statsforecast/", "—", False, "ST"),
-    "seasonal naive": ("https://nixtlaverse.nixtla.io/statsforecast/", "—", False, "ST"),
     "drift": ("https://nixtlaverse.nixtla.io/statsforecast/", "—", False, "ST"),
     "naive": ("https://nixtlaverse.nixtla.io/statsforecast/", "—", False, "ST"),
 }
@@ -130,7 +140,10 @@ def format_leaderboard(df: pd.DataFrame):
     df["zero_shot"] = df["model_name"].apply(get_zero_shot_status)
     # Format leakage column: convert to int for all models, 0 for non-zero-shot
     df["training_corpus_overlap"] = df.apply(
-        lambda row: int(round(row["training_corpus_overlap"] * 100)) if row["zero_shot"] == "✓" else 0, axis=1
     )
     df["link"] = df["model_name"].apply(get_model_link)
     df["org"] = df["model_name"].apply(get_model_organization)
@@ -150,7 +163,12 @@ def format_leaderboard(df: pd.DataFrame):
     return (
         df.style.map(highlight_model_type_color, subset=["model_name"])
         .map(lambda x: "font-weight: bold", subset=["zero_shot"])
-        .apply(lambda x: ["background-color: #f8f9fa" if i % 2 == 1 else "" for i in range(len(x))], axis=0)
     )
@@ -164,12 +182,18 @@ def construct_bar_chart(df: pd.DataFrame, col: str, metric_name: str):
         alt.Tooltip(f"{col}_upper:Q", title="95% CI Upper", format=".2f"),
     ]
-    base_encode = {"y": alt.Y("model_name:N", title="Forecasting Model", sort=None), "tooltip": tooltip}
     bars = (
         alt.Chart(df)
         .mark_bar(color=COLORS["bar_fill"], cornerRadius=4)
-        .encode(x=alt.X(f"{col}:Q", title=f"{label} (%)", scale=alt.Scale(zero=False)), **base_encode)
     )
     error_bars = (
@@ -207,7 +231,9 @@ def construct_pairwise_chart(df: pd.DataFrame, col: str, metric_name: str):
     for c in [col, f"{col}_lower", f"{col}_upper"]:
         df[c] *= 100
-    model_order = df.groupby("model_1")[col].mean().sort_values(ascending=False).index.tolist()
     tooltip = [
         alt.Tooltip("model_1:N", title="Model 1"),
@@ -218,34 +244,56 @@ def construct_pairwise_chart(df: pd.DataFrame, col: str, metric_name: str):
     ]
     base = alt.Chart(df).encode(
-        x=alt.X("model_2:N", sort=model_order, title="Model 2", axis=alt.Axis(orient="top", labelAngle=-90)),
         y=alt.Y("model_1:N", sort=model_order, title="Model 1"),
     )
     heatmap = base.mark_rect().encode(
         color=alt.Color(
             f"{col}:Q",
-            legend=alt.Legend(title=f"{cbar_label} (%)", direction="vertical", orient="right"),
-            scale=alt.Scale(scheme=HEATMAP_COLOR_SCHEME, domain=domain, domainMid=domain_mid, clamp=True),
         ),
         tooltip=tooltip,
     )
     text_main = base.mark_text(dy=-8, fontSize=8, baseline="top", yOffset=5).encode(
         text=alt.Text(f"{col}:Q", format=".1f"),
-        color=alt.condition(text_condition, alt.value(COLORS["text_white"]), alt.value(COLORS["text_black"])),
         tooltip=tooltip,
     )
     return (
         (heatmap + text_main)
-        .properties(height=550, title={"text": f"Pairwise {cbar_label} ({metric_name}) with 95% CIs", "fontSize": 16})
         .configure_axis(labelFontSize=11, titleFontSize=13, titleFontWeight="bold")
         .resolve_scale(color="independent")
     )
-def construct_pivot_table_from_df(errors: pd.DataFrame, metric_name: str) -> pd.io.formats.style.Styler:
     """Construct styled pivot table from precomputed DataFrame."""
     def highlight_by_position(styler):
@@ -265,7 +313,8 @@ def construct_pivot_table_from_df(errors: pd.DataFrame, metric_name: str) -> pd.
                 if style_parts:
                     styler = styler.map(
-                        lambda x, s="; ".join(style_parts): s, subset=pd.IndexSlice[row_idx:row_idx, col_idx:col_idx]
                     )
         return styler
@@ -273,11 +322,20 @@ def construct_pivot_table_from_df(errors: pd.DataFrame, metric_name: str) -> pd.
 def construct_pivot_table(
-    summaries: pd.DataFrame, metric_name: str, baseline_model: str, leakage_imputation_model: str
 ) -> pd.io.formats.style.Styler:
-    errors = fev.pivot_table(summaries=summaries, metric_column=metric_name, task_columns=["task_name"])
     train_overlap = (
-        fev.pivot_table(summaries=summaries, metric_column="trained_on_this_dataset", task_columns=["task_name"])
         .fillna(False)
         .astype(bool)
     )
@@ -312,12 +370,15 @@ def construct_pivot_table(
                     style_parts.append(f"color: {COLORS['leakage_impute']}")
                 elif is_imputed_baseline.loc[row_idx, col_idx]:
                     style_parts.append(f"color: {COLORS['failure_impute']}")
-                elif not style_parts or (len(style_parts) == 1 and "font-weight" in style_parts[0]):
                     style_parts.append(f"color: {COLORS['text_default']}")
                 if style_parts:
                     styler = styler.map(
-                        lambda x, s="; ".join(style_parts): s, subset=pd.IndexSlice[row_idx:row_idx, col_idx:col_idx]
                     )
         return styler

     "sundial-base": ("thuml/sundial-base-128m", "Tsinghua University", True, "DL"),
     "ttm-r2": ("ibm-granite/granite-timeseries-ttm-r2", "IBM", True, "DL"),
     # Task-specific models
+    "stat. ensemble": (
+        "https://nixtlaverse.nixtla.io/statsforecast/",
+        "—",
+        False,
+        "ST",
+    ),
     "autoarima": ("https://nixtlaverse.nixtla.io/statsforecast/", "—", False, "ST"),
     "autotheta": ("https://nixtlaverse.nixtla.io/statsforecast/", "—", False, "ST"),
     "autoets": ("https://nixtlaverse.nixtla.io/statsforecast/", "—", False, "ST"),
     "seasonalnaive": ("https://nixtlaverse.nixtla.io/statsforecast/", "—", False, "ST"),
+    "seasonal naive": (
+        "https://nixtlaverse.nixtla.io/statsforecast/",
+        "—",
+        False,
+        "ST",
+    ),
     "drift": ("https://nixtlaverse.nixtla.io/statsforecast/", "—", False, "ST"),
     "naive": ("https://nixtlaverse.nixtla.io/statsforecast/", "—", False, "ST"),
 }
     df["zero_shot"] = df["model_name"].apply(get_zero_shot_status)
     # Format leakage column: convert to int for all models, 0 for non-zero-shot
     df["training_corpus_overlap"] = df.apply(
+        lambda row: int(round(row["training_corpus_overlap"] * 100))
+        if row["zero_shot"] == "✓"
+        else 0,
+        axis=1,
     )
     df["link"] = df["model_name"].apply(get_model_link)
     df["org"] = df["model_name"].apply(get_model_organization)
     return (
         df.style.map(highlight_model_type_color, subset=["model_name"])
         .map(lambda x: "font-weight: bold", subset=["zero_shot"])
+        .apply(
+            lambda x: [
+                "background-color: #f8f9fa" if i % 2 == 1 else "" for i in range(len(x))
+            ],
+            axis=0,
+        )
     )
         alt.Tooltip(f"{col}_upper:Q", title="95% CI Upper", format=".2f"),
     ]
+    base_encode = {
+        "y": alt.Y("model_name:N", title="Forecasting Model", sort=None),
+        "tooltip": tooltip,
+    }
     bars = (
         alt.Chart(df)
         .mark_bar(color=COLORS["bar_fill"], cornerRadius=4)
+        .encode(
+            x=alt.X(f"{col}:Q", title=f"{label} (%)", scale=alt.Scale(zero=False)),
+            **base_encode,
+        )
     )
     error_bars = (
     for c in [col, f"{col}_lower", f"{col}_upper"]:
         df[c] *= 100
+    model_order = (
+        df.groupby("model_1")[col].mean().sort_values(ascending=False).index.tolist()
+    )
     tooltip = [
         alt.Tooltip("model_1:N", title="Model 1"),
     ]
     base = alt.Chart(df).encode(
+        x=alt.X(
+            "model_2:N",
+            sort=model_order,
+            title="Model 2",
+            axis=alt.Axis(orient="top", labelAngle=-90),
+        ),
         y=alt.Y("model_1:N", sort=model_order, title="Model 1"),
     )
     heatmap = base.mark_rect().encode(
         color=alt.Color(
             f"{col}:Q",
+            legend=None,
+            scale=alt.Scale(
+                scheme=HEATMAP_COLOR_SCHEME,
+                domain=domain,
+                domainMid=domain_mid,
+                clamp=True,
+            ),
         ),
         tooltip=tooltip,
     )
     text_main = base.mark_text(dy=-8, fontSize=8, baseline="top", yOffset=5).encode(
         text=alt.Text(f"{col}:Q", format=".1f"),
+        color=alt.condition(
+            text_condition,
+            alt.value(COLORS["text_white"]),
+            alt.value(COLORS["text_black"]),
+        ),
         tooltip=tooltip,
     )
     return (
         (heatmap + text_main)
+        .properties(
+            height=550,
+            title={
+                "text": f"Pairwise {cbar_label} ({metric_name}) with 95% CIs",
+                "fontSize": 16,
+            },
+        )
         .configure_axis(labelFontSize=11, titleFontSize=13, titleFontWeight="bold")
         .resolve_scale(color="independent")
     )
+def construct_pivot_table_from_df(
+    errors: pd.DataFrame, metric_name: str
+) -> pd.io.formats.style.Styler:
     """Construct styled pivot table from precomputed DataFrame."""
     def highlight_by_position(styler):
                 if style_parts:
                     styler = styler.map(
+                        lambda x, s="; ".join(style_parts): s,
+                        subset=pd.IndexSlice[row_idx:row_idx, col_idx:col_idx],
                     )
         return styler
 def construct_pivot_table(
+    summaries: pd.DataFrame,
+    metric_name: str,
+    baseline_model: str,
+    leakage_imputation_model: str,
 ) -> pd.io.formats.style.Styler:
+    errors = fev.pivot_table(
+        summaries=summaries, metric_column=metric_name, task_columns=["task_name"]
+    )
     train_overlap = (
+        fev.pivot_table(
+            summaries=summaries,
+            metric_column="trained_on_this_dataset",
+            task_columns=["task_name"],
+        )
         .fillna(False)
         .astype(bool)
     )
                     style_parts.append(f"color: {COLORS['leakage_impute']}")
                 elif is_imputed_baseline.loc[row_idx, col_idx]:
                     style_parts.append(f"color: {COLORS['failure_impute']}")
+                elif not style_parts or (
+                    len(style_parts) == 1 and "font-weight" in style_parts[0]
+                ):
                     style_parts.append(f"color: {COLORS['text_default']}")
                 if style_parts:
                     styler = styler.map(
+                        lambda x, s="; ".join(style_parts): s,
+                        subset=pd.IndexSlice[row_idx:row_idx, col_idx:col_idx],
                     )
         return styler