akera commited on
Commit
e179a7b
Β·
verified Β·
1 Parent(s): b9c4788

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -23
app.py CHANGED
@@ -98,6 +98,14 @@ def initialize_data():
98
  print("πŸ† Loading leaderboard...")
99
  current_leaderboard = load_leaderboard()
100
 
 
 
 
 
 
 
 
 
101
  print(f"βœ… Initialization complete!")
102
  print(f" - Test set: {len(public_test_set):,} samples")
103
  print(f" - Current models: {len(current_leaderboard)}")
@@ -106,6 +114,7 @@ def initialize_data():
106
 
107
  except Exception as e:
108
  print(f"❌ Initialization failed: {e}")
 
109
  traceback.print_exc()
110
  return False
111
 
@@ -263,42 +272,102 @@ def evaluate_submission(
263
  def refresh_track_leaderboard(track: str, search_query: str = "", category_filter: str = "all") -> Tuple[pd.DataFrame, object, object, str]:
264
  """Refresh leaderboard for a specific track with filters."""
265
  try:
 
 
266
  global current_leaderboard
267
  if current_leaderboard is None:
 
268
  current_leaderboard = load_leaderboard()
269
 
270
- track_leaderboard = get_track_leaderboard(current_leaderboard, track, category_filter=category_filter)
 
 
 
 
 
 
 
 
 
271
 
272
  # Apply search filter
273
  if search_query and not track_leaderboard.empty:
274
- query_lower = search_query.lower()
275
- mask = (
276
- track_leaderboard['model_name'].str.lower().str.contains(query_lower, na=False) |
277
- track_leaderboard['author'].str.lower().str.contains(query_lower, na=False)
278
- )
279
- track_leaderboard = track_leaderboard[mask]
280
-
281
- display_df = prepare_leaderboard_display(track_leaderboard, track)
282
- ranking_plot = create_leaderboard_plot(track_leaderboard, track)
283
- comparison_plot = create_performance_comparison_plot(track_leaderboard, track)
284
-
285
- track_config = EVALUATION_TRACKS[track]
286
- stats_text = f"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  ### πŸ“Š {track_config['name']} Statistics
288
 
289
  - **Total Models**: {len(track_leaderboard)}
290
- - **Best Model**: {track_leaderboard.iloc[0]['model_name'] if not track_leaderboard.empty else 'None'}
291
- - **Best Score**: {track_leaderboard.iloc[0][f'{track}_quality']:.4f if not track_leaderboard.empty else 0.0}
292
 
293
  ### πŸ”¬ Track Information:
294
- {track_config['description']}
295
- """
296
-
 
 
 
 
 
297
  return display_df, ranking_plot, comparison_plot, stats_text
298
 
299
  except Exception as e:
300
  error_msg = f"Error loading {track} leaderboard: {str(e)}"
301
- print(error_msg)
 
 
302
  return pd.DataFrame(), None, None, error_msg
303
 
304
  def get_language_pair_comparison(track: str) -> Tuple[pd.DataFrame, object]:
@@ -726,10 +795,33 @@ with gr.Blocks(
726
  outputs=[pairs_table, pairs_comparison_plot]
727
  )
728
 
729
- # Load initial data
730
  def load_initial_data():
731
- google_data = refresh_track_leaderboard("google_comparable", "", "all")
732
- return google_data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
733
 
734
  demo.load(
735
  fn=load_initial_data,
 
98
  print("πŸ† Loading leaderboard...")
99
  current_leaderboard = load_leaderboard()
100
 
101
+ # Debug leaderboard content
102
+ print(f"Leaderboard loaded with {len(current_leaderboard)} entries")
103
+ if not current_leaderboard.empty:
104
+ print(f"Leaderboard columns: {list(current_leaderboard.columns)}")
105
+ print(f"Sample row types: {current_leaderboard.dtypes.to_dict()}")
106
+ else:
107
+ print("Leaderboard is empty - will show empty interface")
108
+
109
  print(f"βœ… Initialization complete!")
110
  print(f" - Test set: {len(public_test_set):,} samples")
111
  print(f" - Current models: {len(current_leaderboard)}")
 
114
 
115
  except Exception as e:
116
  print(f"❌ Initialization failed: {e}")
117
+ import traceback
118
  traceback.print_exc()
119
  return False
120
 
 
272
  def refresh_track_leaderboard(track: str, search_query: str = "", category_filter: str = "all") -> Tuple[pd.DataFrame, object, object, str]:
273
  """Refresh leaderboard for a specific track with filters."""
274
  try:
275
+ print(f"Refreshing {track} leaderboard...")
276
+
277
  global current_leaderboard
278
  if current_leaderboard is None:
279
+ print("Loading leaderboard...")
280
  current_leaderboard = load_leaderboard()
281
 
282
+ print(f"Leaderboard loaded with {len(current_leaderboard)} entries")
283
+
284
+ # Get track leaderboard with robust error handling
285
+ try:
286
+ print(f"Getting track leaderboard for {track}...")
287
+ track_leaderboard = get_track_leaderboard(current_leaderboard, track, category_filter=category_filter)
288
+ print(f"Track leaderboard has {len(track_leaderboard)} entries")
289
+ except Exception as e:
290
+ print(f"Error getting track leaderboard: {e}")
291
+ track_leaderboard = pd.DataFrame()
292
 
293
  # Apply search filter
294
  if search_query and not track_leaderboard.empty:
295
+ try:
296
+ print(f"Applying search filter: {search_query}")
297
+ query_lower = search_query.lower()
298
+ mask = (
299
+ track_leaderboard['model_name'].str.lower().str.contains(query_lower, na=False) |
300
+ track_leaderboard['author'].str.lower().str.contains(query_lower, na=False)
301
+ )
302
+ track_leaderboard = track_leaderboard[mask]
303
+ print(f"After search filter: {len(track_leaderboard)} entries")
304
+ except Exception as e:
305
+ print(f"Error applying search filter: {e}")
306
+
307
+ # Prepare display with error handling
308
+ try:
309
+ print("Preparing display...")
310
+ display_df = prepare_leaderboard_display(track_leaderboard, track)
311
+ print(f"Display prepared with {len(display_df)} rows")
312
+ except Exception as e:
313
+ print(f"Error preparing display: {e}")
314
+ display_df = pd.DataFrame()
315
+
316
+ # Create plots with error handling
317
+ try:
318
+ print("Creating ranking plot...")
319
+ ranking_plot = create_leaderboard_plot(track_leaderboard, track)
320
+ except Exception as e:
321
+ print(f"Error creating ranking plot: {e}")
322
+ ranking_plot = None
323
+
324
+ try:
325
+ print("Creating comparison plot...")
326
+ comparison_plot = create_performance_comparison_plot(track_leaderboard, track)
327
+ except Exception as e:
328
+ print(f"Error creating comparison plot: {e}")
329
+ comparison_plot = None
330
+
331
+ # Generate stats text with safe formatting
332
+ try:
333
+ print("Generating stats...")
334
+ track_config = EVALUATION_TRACKS[track]
335
+ best_model = "None"
336
+ best_score = 0.0
337
+
338
+ if not track_leaderboard.empty:
339
+ best_model = str(track_leaderboard.iloc[0]['model_name'])
340
+ quality_col = f'{track}_quality'
341
+ if quality_col in track_leaderboard.columns:
342
+ try:
343
+ score_val = track_leaderboard.iloc[0][quality_col]
344
+ best_score = float(score_val) if pd.notnull(score_val) else 0.0
345
+ except (ValueError, TypeError):
346
+ best_score = 0.0
347
+
348
+ stats_text = f"""
349
  ### πŸ“Š {track_config['name']} Statistics
350
 
351
  - **Total Models**: {len(track_leaderboard)}
352
+ - **Best Model**: {best_model}
353
+ - **Best Score**: {best_score:.4f}
354
 
355
  ### πŸ”¬ Track Information:
356
+ {track_config.get('description', 'No description available')}
357
+ """
358
+ print("Stats generated successfully")
359
+ except Exception as e:
360
+ print(f"Error generating stats: {e}")
361
+ stats_text = f"Error loading {track} statistics: {str(e)}"
362
+
363
+ print("Track refresh completed successfully")
364
  return display_df, ranking_plot, comparison_plot, stats_text
365
 
366
  except Exception as e:
367
  error_msg = f"Error loading {track} leaderboard: {str(e)}"
368
+ print(f"MAIN ERROR: {error_msg}")
369
+ import traceback
370
+ traceback.print_exc()
371
  return pd.DataFrame(), None, None, error_msg
372
 
373
  def get_language_pair_comparison(track: str) -> Tuple[pd.DataFrame, object]:
 
795
  outputs=[pairs_table, pairs_comparison_plot]
796
  )
797
 
798
+ # Load initial data and update dropdowns
799
  def load_initial_data():
800
+ try:
801
+ print("Loading initial data...")
802
+ global current_leaderboard
803
+
804
+ # Make sure we have a leaderboard
805
+ if current_leaderboard is None:
806
+ current_leaderboard = load_leaderboard()
807
+
808
+ print(f"Current leaderboard has {len(current_leaderboard)} entries")
809
+
810
+ # Try to load Google track data
811
+ try:
812
+ google_data = refresh_track_leaderboard("google_comparable", "", "all")
813
+ print("Successfully loaded Google track data")
814
+ return google_data
815
+ except Exception as e:
816
+ print(f"Error loading Google track: {e}")
817
+ # Return empty data if there's an error
818
+ empty_df = pd.DataFrame()
819
+ return (empty_df, None, None, "No data available")
820
+
821
+ except Exception as e:
822
+ print(f"Error in load_initial_data: {e}")
823
+ empty_df = pd.DataFrame()
824
+ return (empty_df, None, None, "Error loading data")
825
 
826
  demo.load(
827
  fn=load_initial_data,