Spaces:
Running
Running
Update src/leaderboard.py
Browse files- src/leaderboard.py +100 -31
src/leaderboard.py
CHANGED
@@ -82,13 +82,27 @@ def load_leaderboard() -> pd.DataFrame:
|
|
82 |
else:
|
83 |
df[col] = ""
|
84 |
|
85 |
-
# Ensure proper data types for numeric columns
|
86 |
numeric_columns = [
|
87 |
col for col in df.columns
|
88 |
if any(x in col for x in ["quality", "bleu", "chrf", "ci_", "samples", "pairs"])
|
89 |
]
|
|
|
90 |
for col in numeric_columns:
|
91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
print(f"✅ Loaded leaderboard with {len(df)} entries")
|
94 |
return df
|
@@ -270,31 +284,75 @@ def get_track_leaderboard(
|
|
270 |
) -> pd.DataFrame:
|
271 |
"""Get leaderboard for a specific track with filtering."""
|
272 |
|
|
|
|
|
273 |
if df.empty:
|
|
|
274 |
return df
|
275 |
|
276 |
track_quality_col = f"{track}_{metric}"
|
277 |
|
278 |
# Ensure columns exist
|
279 |
if track_quality_col not in df.columns:
|
280 |
-
print(f"Warning: Missing column for track {track}")
|
|
|
281 |
return pd.DataFrame()
|
282 |
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
298 |
|
299 |
|
300 |
def prepare_leaderboard_display(df: pd.DataFrame, track: str) -> pd.DataFrame:
|
@@ -320,20 +378,31 @@ def prepare_leaderboard_display(df: pd.DataFrame, track: str) -> pd.DataFrame:
|
|
320 |
available_columns = [col for col in base_columns + track_columns if col in df.columns]
|
321 |
display_df = df[available_columns].copy()
|
322 |
|
323 |
-
# Format numeric columns
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
331 |
|
332 |
-
|
333 |
-
|
334 |
-
display_df[col] = display_df[col].apply(
|
335 |
-
lambda x: fmt.format(float(x)) if pd.notnull(x) else "0.0000"
|
336 |
-
)
|
337 |
|
338 |
# Format confidence intervals
|
339 |
if f"{track}_ci_lower" in display_df.columns and f"{track}_ci_upper" in display_df.columns:
|
|
|
82 |
else:
|
83 |
df[col] = ""
|
84 |
|
85 |
+
# Ensure proper data types for numeric columns with robust conversion
|
86 |
numeric_columns = [
|
87 |
col for col in df.columns
|
88 |
if any(x in col for x in ["quality", "bleu", "chrf", "ci_", "samples", "pairs"])
|
89 |
]
|
90 |
+
|
91 |
for col in numeric_columns:
|
92 |
+
try:
|
93 |
+
# Convert to numeric, coercing errors to NaN, then fill NaN with 0
|
94 |
+
df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0.0)
|
95 |
+
# Ensure it's float type for consistency
|
96 |
+
df[col] = df[col].astype(float)
|
97 |
+
except Exception as e:
|
98 |
+
print(f"Warning: Could not convert column {col} to numeric: {e}")
|
99 |
+
df[col] = 0.0
|
100 |
+
|
101 |
+
# Ensure string columns are properly typed
|
102 |
+
string_columns = ["model_name", "author", "model_category", "description", "submission_date", "evaluation_date"]
|
103 |
+
for col in string_columns:
|
104 |
+
if col in df.columns:
|
105 |
+
df[col] = df[col].fillna("").astype(str)
|
106 |
|
107 |
print(f"✅ Loaded leaderboard with {len(df)} entries")
|
108 |
return df
|
|
|
284 |
) -> pd.DataFrame:
|
285 |
"""Get leaderboard for a specific track with filtering."""
|
286 |
|
287 |
+
print(f"Getting track leaderboard for {track}, input df has {len(df)} rows")
|
288 |
+
|
289 |
if df.empty:
|
290 |
+
print("Input DataFrame is empty")
|
291 |
return df
|
292 |
|
293 |
track_quality_col = f"{track}_{metric}"
|
294 |
|
295 |
# Ensure columns exist
|
296 |
if track_quality_col not in df.columns:
|
297 |
+
print(f"Warning: Missing column {track_quality_col} for track {track}")
|
298 |
+
print(f"Available columns: {list(df.columns)}")
|
299 |
return pd.DataFrame()
|
300 |
|
301 |
+
try:
|
302 |
+
# Make a copy to avoid modifying original
|
303 |
+
df_filtered = df.copy()
|
304 |
+
print(f"Created copy with {len(df_filtered)} rows")
|
305 |
+
|
306 |
+
# Filter by category
|
307 |
+
if category_filter != "all":
|
308 |
+
original_count = len(df_filtered)
|
309 |
+
df_filtered = df_filtered[df_filtered["model_category"] == category_filter]
|
310 |
+
print(f"After category filter '{category_filter}': {len(df_filtered)} rows (was {original_count})")
|
311 |
+
|
312 |
+
# Ensure numeric columns are properly typed
|
313 |
+
numeric_columns = [
|
314 |
+
f"{track}_quality", f"{track}_bleu", f"{track}_chrf",
|
315 |
+
f"{track}_ci_lower", f"{track}_ci_upper",
|
316 |
+
f"{track}_samples", f"{track}_pairs"
|
317 |
+
]
|
318 |
+
|
319 |
+
print(f"Converting numeric columns: {[col for col in numeric_columns if col in df_filtered.columns]}")
|
320 |
+
|
321 |
+
for col in numeric_columns:
|
322 |
+
if col in df_filtered.columns:
|
323 |
+
try:
|
324 |
+
# Check original data type
|
325 |
+
print(f"Column {col} dtype: {df_filtered[col].dtype}, sample values: {df_filtered[col].head(3).tolist()}")
|
326 |
+
|
327 |
+
# Convert to numeric
|
328 |
+
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce').fillna(0.0)
|
329 |
+
print(f"Column {col} converted successfully")
|
330 |
+
except Exception as e:
|
331 |
+
print(f"Error converting column {col}: {e}")
|
332 |
+
df_filtered[col] = 0.0
|
333 |
+
|
334 |
+
# Filter to models that have this track
|
335 |
+
original_count = len(df_filtered)
|
336 |
+
quality_mask = df_filtered[track_quality_col] > 0
|
337 |
+
df_filtered = df_filtered[quality_mask]
|
338 |
+
print(f"After quality filter (>{track_quality_col} > 0): {len(df_filtered)} rows (was {original_count})")
|
339 |
+
|
340 |
+
if df_filtered.empty:
|
341 |
+
print("No models found with quality > 0 for this track")
|
342 |
+
return df_filtered
|
343 |
+
|
344 |
+
# Sort by track-specific metric
|
345 |
+
print(f"Sorting by {track_quality_col}")
|
346 |
+
df_filtered = df_filtered.sort_values(track_quality_col, ascending=False).reset_index(drop=True)
|
347 |
+
print(f"Sorted successfully, final result has {len(df_filtered)} rows")
|
348 |
+
|
349 |
+
return df_filtered
|
350 |
+
|
351 |
+
except Exception as e:
|
352 |
+
print(f"Error in get_track_leaderboard: {e}")
|
353 |
+
import traceback
|
354 |
+
traceback.print_exc()
|
355 |
+
return pd.DataFrame()
|
356 |
|
357 |
|
358 |
def prepare_leaderboard_display(df: pd.DataFrame, track: str) -> pd.DataFrame:
|
|
|
378 |
available_columns = [col for col in base_columns + track_columns if col in df.columns]
|
379 |
display_df = df[available_columns].copy()
|
380 |
|
381 |
+
# Format numeric columns safely
|
382 |
+
def safe_format(value, precision=4):
|
383 |
+
"""Safely format numeric values."""
|
384 |
+
try:
|
385 |
+
if pd.isna(value) or value is None:
|
386 |
+
return "0.0000" if precision == 4 else "0.00"
|
387 |
+
return f"{float(value):.{precision}f}"
|
388 |
+
except (ValueError, TypeError):
|
389 |
+
return "0.0000" if precision == 4 else "0.00"
|
390 |
+
|
391 |
+
# Apply formatting to numeric columns
|
392 |
+
if f"{track}_quality" in display_df.columns:
|
393 |
+
display_df[f"{track}_quality"] = display_df[f"{track}_quality"].apply(lambda x: safe_format(x, 4))
|
394 |
+
|
395 |
+
if f"{track}_bleu" in display_df.columns:
|
396 |
+
display_df[f"{track}_bleu"] = display_df[f"{track}_bleu"].apply(lambda x: safe_format(x, 2))
|
397 |
+
|
398 |
+
if f"{track}_chrf" in display_df.columns:
|
399 |
+
display_df[f"{track}_chrf"] = display_df[f"{track}_chrf"].apply(lambda x: safe_format(x, 4))
|
400 |
+
|
401 |
+
if f"{track}_ci_lower" in display_df.columns:
|
402 |
+
display_df[f"{track}_ci_lower"] = display_df[f"{track}_ci_lower"].apply(lambda x: safe_format(x, 4))
|
403 |
|
404 |
+
if f"{track}_ci_upper" in display_df.columns:
|
405 |
+
display_df[f"{track}_ci_upper"] = display_df[f"{track}_ci_upper"].apply(lambda x: safe_format(x, 4))
|
|
|
|
|
|
|
406 |
|
407 |
# Format confidence intervals
|
408 |
if f"{track}_ci_lower" in display_df.columns and f"{track}_ci_upper" in display_df.columns:
|