Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -98,6 +98,14 @@ def initialize_data():
|
|
98 |
print("π Loading leaderboard...")
|
99 |
current_leaderboard = load_leaderboard()
|
100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
print(f"β
Initialization complete!")
|
102 |
print(f" - Test set: {len(public_test_set):,} samples")
|
103 |
print(f" - Current models: {len(current_leaderboard)}")
|
@@ -106,6 +114,7 @@ def initialize_data():
|
|
106 |
|
107 |
except Exception as e:
|
108 |
print(f"β Initialization failed: {e}")
|
|
|
109 |
traceback.print_exc()
|
110 |
return False
|
111 |
|
@@ -263,42 +272,102 @@ def evaluate_submission(
|
|
263 |
def refresh_track_leaderboard(track: str, search_query: str = "", category_filter: str = "all") -> Tuple[pd.DataFrame, object, object, str]:
|
264 |
"""Refresh leaderboard for a specific track with filters."""
|
265 |
try:
|
|
|
|
|
266 |
global current_leaderboard
|
267 |
if current_leaderboard is None:
|
|
|
268 |
current_leaderboard = load_leaderboard()
|
269 |
|
270 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
271 |
|
272 |
# Apply search filter
|
273 |
if search_query and not track_leaderboard.empty:
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
287 |
### π {track_config['name']} Statistics
|
288 |
|
289 |
- **Total Models**: {len(track_leaderboard)}
|
290 |
-
- **Best Model**: {
|
291 |
-
- **Best Score**: {
|
292 |
|
293 |
### π¬ Track Information:
|
294 |
-
{track_config
|
295 |
-
|
296 |
-
|
|
|
|
|
|
|
|
|
|
|
297 |
return display_df, ranking_plot, comparison_plot, stats_text
|
298 |
|
299 |
except Exception as e:
|
300 |
error_msg = f"Error loading {track} leaderboard: {str(e)}"
|
301 |
-
print(error_msg)
|
|
|
|
|
302 |
return pd.DataFrame(), None, None, error_msg
|
303 |
|
304 |
def get_language_pair_comparison(track: str) -> Tuple[pd.DataFrame, object]:
|
@@ -726,10 +795,33 @@ with gr.Blocks(
|
|
726 |
outputs=[pairs_table, pairs_comparison_plot]
|
727 |
)
|
728 |
|
729 |
-
# Load initial data
|
730 |
def load_initial_data():
|
731 |
-
|
732 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
733 |
|
734 |
demo.load(
|
735 |
fn=load_initial_data,
|
|
|
98 |
print("π Loading leaderboard...")
|
99 |
current_leaderboard = load_leaderboard()
|
100 |
|
101 |
+
# Debug leaderboard content
|
102 |
+
print(f"Leaderboard loaded with {len(current_leaderboard)} entries")
|
103 |
+
if not current_leaderboard.empty:
|
104 |
+
print(f"Leaderboard columns: {list(current_leaderboard.columns)}")
|
105 |
+
print(f"Sample row types: {current_leaderboard.dtypes.to_dict()}")
|
106 |
+
else:
|
107 |
+
print("Leaderboard is empty - will show empty interface")
|
108 |
+
|
109 |
print(f"β
Initialization complete!")
|
110 |
print(f" - Test set: {len(public_test_set):,} samples")
|
111 |
print(f" - Current models: {len(current_leaderboard)}")
|
|
|
114 |
|
115 |
except Exception as e:
|
116 |
print(f"β Initialization failed: {e}")
|
117 |
+
import traceback
|
118 |
traceback.print_exc()
|
119 |
return False
|
120 |
|
|
|
272 |
def refresh_track_leaderboard(track: str, search_query: str = "", category_filter: str = "all") -> Tuple[pd.DataFrame, object, object, str]:
|
273 |
"""Refresh leaderboard for a specific track with filters."""
|
274 |
try:
|
275 |
+
print(f"Refreshing {track} leaderboard...")
|
276 |
+
|
277 |
global current_leaderboard
|
278 |
if current_leaderboard is None:
|
279 |
+
print("Loading leaderboard...")
|
280 |
current_leaderboard = load_leaderboard()
|
281 |
|
282 |
+
print(f"Leaderboard loaded with {len(current_leaderboard)} entries")
|
283 |
+
|
284 |
+
# Get track leaderboard with robust error handling
|
285 |
+
try:
|
286 |
+
print(f"Getting track leaderboard for {track}...")
|
287 |
+
track_leaderboard = get_track_leaderboard(current_leaderboard, track, category_filter=category_filter)
|
288 |
+
print(f"Track leaderboard has {len(track_leaderboard)} entries")
|
289 |
+
except Exception as e:
|
290 |
+
print(f"Error getting track leaderboard: {e}")
|
291 |
+
track_leaderboard = pd.DataFrame()
|
292 |
|
293 |
# Apply search filter
|
294 |
if search_query and not track_leaderboard.empty:
|
295 |
+
try:
|
296 |
+
print(f"Applying search filter: {search_query}")
|
297 |
+
query_lower = search_query.lower()
|
298 |
+
mask = (
|
299 |
+
track_leaderboard['model_name'].str.lower().str.contains(query_lower, na=False) |
|
300 |
+
track_leaderboard['author'].str.lower().str.contains(query_lower, na=False)
|
301 |
+
)
|
302 |
+
track_leaderboard = track_leaderboard[mask]
|
303 |
+
print(f"After search filter: {len(track_leaderboard)} entries")
|
304 |
+
except Exception as e:
|
305 |
+
print(f"Error applying search filter: {e}")
|
306 |
+
|
307 |
+
# Prepare display with error handling
|
308 |
+
try:
|
309 |
+
print("Preparing display...")
|
310 |
+
display_df = prepare_leaderboard_display(track_leaderboard, track)
|
311 |
+
print(f"Display prepared with {len(display_df)} rows")
|
312 |
+
except Exception as e:
|
313 |
+
print(f"Error preparing display: {e}")
|
314 |
+
display_df = pd.DataFrame()
|
315 |
+
|
316 |
+
# Create plots with error handling
|
317 |
+
try:
|
318 |
+
print("Creating ranking plot...")
|
319 |
+
ranking_plot = create_leaderboard_plot(track_leaderboard, track)
|
320 |
+
except Exception as e:
|
321 |
+
print(f"Error creating ranking plot: {e}")
|
322 |
+
ranking_plot = None
|
323 |
+
|
324 |
+
try:
|
325 |
+
print("Creating comparison plot...")
|
326 |
+
comparison_plot = create_performance_comparison_plot(track_leaderboard, track)
|
327 |
+
except Exception as e:
|
328 |
+
print(f"Error creating comparison plot: {e}")
|
329 |
+
comparison_plot = None
|
330 |
+
|
331 |
+
# Generate stats text with safe formatting
|
332 |
+
try:
|
333 |
+
print("Generating stats...")
|
334 |
+
track_config = EVALUATION_TRACKS[track]
|
335 |
+
best_model = "None"
|
336 |
+
best_score = 0.0
|
337 |
+
|
338 |
+
if not track_leaderboard.empty:
|
339 |
+
best_model = str(track_leaderboard.iloc[0]['model_name'])
|
340 |
+
quality_col = f'{track}_quality'
|
341 |
+
if quality_col in track_leaderboard.columns:
|
342 |
+
try:
|
343 |
+
score_val = track_leaderboard.iloc[0][quality_col]
|
344 |
+
best_score = float(score_val) if pd.notnull(score_val) else 0.0
|
345 |
+
except (ValueError, TypeError):
|
346 |
+
best_score = 0.0
|
347 |
+
|
348 |
+
stats_text = f"""
|
349 |
### π {track_config['name']} Statistics
|
350 |
|
351 |
- **Total Models**: {len(track_leaderboard)}
|
352 |
+
- **Best Model**: {best_model}
|
353 |
+
- **Best Score**: {best_score:.4f}
|
354 |
|
355 |
### π¬ Track Information:
|
356 |
+
{track_config.get('description', 'No description available')}
|
357 |
+
"""
|
358 |
+
print("Stats generated successfully")
|
359 |
+
except Exception as e:
|
360 |
+
print(f"Error generating stats: {e}")
|
361 |
+
stats_text = f"Error loading {track} statistics: {str(e)}"
|
362 |
+
|
363 |
+
print("Track refresh completed successfully")
|
364 |
return display_df, ranking_plot, comparison_plot, stats_text
|
365 |
|
366 |
except Exception as e:
|
367 |
error_msg = f"Error loading {track} leaderboard: {str(e)}"
|
368 |
+
print(f"MAIN ERROR: {error_msg}")
|
369 |
+
import traceback
|
370 |
+
traceback.print_exc()
|
371 |
return pd.DataFrame(), None, None, error_msg
|
372 |
|
373 |
def get_language_pair_comparison(track: str) -> Tuple[pd.DataFrame, object]:
|
|
|
795 |
outputs=[pairs_table, pairs_comparison_plot]
|
796 |
)
|
797 |
|
798 |
+
# Load initial data and update dropdowns
|
799 |
def load_initial_data():
|
800 |
+
try:
|
801 |
+
print("Loading initial data...")
|
802 |
+
global current_leaderboard
|
803 |
+
|
804 |
+
# Make sure we have a leaderboard
|
805 |
+
if current_leaderboard is None:
|
806 |
+
current_leaderboard = load_leaderboard()
|
807 |
+
|
808 |
+
print(f"Current leaderboard has {len(current_leaderboard)} entries")
|
809 |
+
|
810 |
+
# Try to load Google track data
|
811 |
+
try:
|
812 |
+
google_data = refresh_track_leaderboard("google_comparable", "", "all")
|
813 |
+
print("Successfully loaded Google track data")
|
814 |
+
return google_data
|
815 |
+
except Exception as e:
|
816 |
+
print(f"Error loading Google track: {e}")
|
817 |
+
# Return empty data if there's an error
|
818 |
+
empty_df = pd.DataFrame()
|
819 |
+
return (empty_df, None, None, "No data available")
|
820 |
+
|
821 |
+
except Exception as e:
|
822 |
+
print(f"Error in load_initial_data: {e}")
|
823 |
+
empty_df = pd.DataFrame()
|
824 |
+
return (empty_df, None, None, "Error loading data")
|
825 |
|
826 |
demo.load(
|
827 |
fn=load_initial_data,
|