akera commited on
Commit
a4523eb
·
verified ·
1 Parent(s): ceca234

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +134 -30
app.py CHANGED
@@ -408,32 +408,53 @@ def refresh_track_leaderboard(
408
  if current_leaderboard is None:
409
  current_leaderboard = load_scientific_leaderboard()
410
 
411
- # Get track-specific leaderboard
412
- track_leaderboard = get_track_leaderboard(
413
- current_leaderboard, track, category_filter=category_filter, min_adequacy=min_adequacy
414
- )
 
 
 
 
415
 
416
  # Apply search filter
417
- if search_query:
418
- query_lower = search_query.lower()
419
- mask = (
420
- track_leaderboard['model_name'].str.lower().str.contains(query_lower, na=False) |
421
- track_leaderboard['author'].str.lower().str.contains(query_lower, na=False)
422
- )
423
- track_leaderboard = track_leaderboard[mask]
 
 
 
424
 
425
  # Prepare for display
426
- display_df = prepare_track_leaderboard_display(track_leaderboard, track)
 
 
 
 
427
 
428
- # Create plots
429
- ranking_plot = create_scientific_leaderboard_plot(track_leaderboard, track)
430
- comparison_plot = create_statistical_comparison_plot(track_leaderboard, track)
 
 
 
431
 
432
- # Get track statistics
433
- track_stats = get_scientific_leaderboard_stats(track_leaderboard, track)
434
- track_config = EVALUATION_TRACKS[track]
 
 
435
 
436
- stats_text = f"""
 
 
 
 
 
437
  ### 📊 {track_config['name']} Statistics
438
 
439
  - **Total Models**: {track_stats.get('total_models', 0)}
@@ -447,12 +468,16 @@ def refresh_track_leaderboard(
447
  - All metrics include 95% confidence intervals
448
  - Statistical adequacy verified for reliable comparisons
449
  - {track_config['description']}
450
- """
 
 
 
451
 
452
  return display_df, ranking_plot, comparison_plot, stats_text
453
 
454
  except Exception as e:
455
  error_msg = f"Error loading {track} leaderboard: {str(e)}"
 
456
  empty_df = pd.DataFrame()
457
  return empty_df, None, None, error_msg
458
 
@@ -659,46 +684,125 @@ with gr.Blocks(
659
  margin-bottom: 2rem;
660
  padding: 2rem;
661
  background: linear-gradient(135deg, #1e3a8a 0%, #3730a3 50%, #1e40af 100%);
662
- color: white;
663
  border-radius: 10px;
664
  box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
665
  }
 
 
 
 
 
 
 
 
666
  .track-tab {
667
  border-radius: 8px;
668
  margin: 0.5rem;
669
  padding: 1rem;
670
  border: 2px solid transparent;
 
 
671
  }
672
  .track-tab.google-comparable {
673
  border-color: #1f77b4;
674
- background: linear-gradient(45deg, #f0f9ff, #e0f2fe);
 
675
  }
676
  .track-tab.ug40-complete {
677
  border-color: #ff7f0e;
678
- background: linear-gradient(45deg, #fff7ed, #fed7aa);
 
679
  }
680
  .track-tab.language-pair-matrix {
681
  border-color: #2ca02c;
682
- background: linear-gradient(45deg, #f0fdf4, #dcfce7);
 
683
  }
684
  .metric-box {
685
- background: #f8fafc;
686
  padding: 1rem;
687
  border-radius: 8px;
688
  margin: 0.5rem 0;
689
  border-left: 4px solid #3b82f6;
 
690
  }
691
  .scientific-note {
692
- background: #fef3c7;
693
  border: 1px solid #f59e0b;
694
  border-radius: 8px;
695
  padding: 1rem;
696
  margin: 1rem 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
697
  }
698
- .adequacy-excellent { border-left-color: #22c55e; }
699
- .adequacy-good { border-left-color: #eab308; }
700
- .adequacy-fair { border-left-color: #f97316; }
701
- .adequacy-insufficient { border-left-color: #ef4444; }
702
  """
703
  ) as demo:
704
 
 
408
  if current_leaderboard is None:
409
  current_leaderboard = load_scientific_leaderboard()
410
 
411
+ # Get track-specific leaderboard with better error handling
412
+ try:
413
+ track_leaderboard = get_track_leaderboard(
414
+ current_leaderboard, track, category_filter=category_filter, min_adequacy=min_adequacy
415
+ )
416
+ except Exception as e:
417
+ print(f"Error getting track leaderboard for {track}: {e}")
418
+ track_leaderboard = pd.DataFrame()
419
 
420
  # Apply search filter
421
+ if search_query and not track_leaderboard.empty:
422
+ try:
423
+ query_lower = search_query.lower()
424
+ mask = (
425
+ track_leaderboard['model_name'].str.lower().str.contains(query_lower, na=False) |
426
+ track_leaderboard['author'].str.lower().str.contains(query_lower, na=False)
427
+ )
428
+ track_leaderboard = track_leaderboard[mask]
429
+ except Exception as e:
430
+ print(f"Error applying search filter: {e}")
431
 
432
  # Prepare for display
433
+ try:
434
+ display_df = prepare_track_leaderboard_display(track_leaderboard, track)
435
+ except Exception as e:
436
+ print(f"Error preparing display: {e}")
437
+ display_df = pd.DataFrame()
438
 
439
+ # Create plots with error handling
440
+ try:
441
+ ranking_plot = create_scientific_leaderboard_plot(track_leaderboard, track)
442
+ except Exception as e:
443
+ print(f"Error creating ranking plot: {e}")
444
+ ranking_plot = None
445
 
446
+ try:
447
+ comparison_plot = create_statistical_comparison_plot(track_leaderboard, track)
448
+ except Exception as e:
449
+ print(f"Error creating comparison plot: {e}")
450
+ comparison_plot = None
451
 
452
+ # Get track statistics
453
+ try:
454
+ track_stats = get_scientific_leaderboard_stats(track_leaderboard, track)
455
+ track_config = EVALUATION_TRACKS[track]
456
+
457
+ stats_text = f"""
458
  ### 📊 {track_config['name']} Statistics
459
 
460
  - **Total Models**: {track_stats.get('total_models', 0)}
 
468
  - All metrics include 95% confidence intervals
469
  - Statistical adequacy verified for reliable comparisons
470
  - {track_config['description']}
471
+ """
472
+ except Exception as e:
473
+ print(f"Error generating stats: {e}")
474
+ stats_text = f"Error loading {track} statistics: {str(e)}"
475
 
476
  return display_df, ranking_plot, comparison_plot, stats_text
477
 
478
  except Exception as e:
479
  error_msg = f"Error loading {track} leaderboard: {str(e)}"
480
+ print(error_msg)
481
  empty_df = pd.DataFrame()
482
  return empty_df, None, None, error_msg
483
 
 
684
  margin-bottom: 2rem;
685
  padding: 2rem;
686
  background: linear-gradient(135deg, #1e3a8a 0%, #3730a3 50%, #1e40af 100%);
687
+ color: white !important;
688
  border-radius: 10px;
689
  box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
690
  }
691
+ .scientific-header h1 {
692
+ color: white !important;
693
+ margin-bottom: 1rem;
694
+ }
695
+ .scientific-header p {
696
+ color: #e0f2fe !important;
697
+ margin: 0.5rem 0;
698
+ }
699
  .track-tab {
700
  border-radius: 8px;
701
  margin: 0.5rem;
702
  padding: 1rem;
703
  border: 2px solid transparent;
704
+ background: var(--background-fill-primary) !important;
705
+ color: var(--body-text-color) !important;
706
  }
707
  .track-tab.google-comparable {
708
  border-color: #1f77b4;
709
+ background: linear-gradient(45deg, #f0f9ff, #e0f2fe) !important;
710
+ color: #1e40af !important;
711
  }
712
  .track-tab.ug40-complete {
713
  border-color: #ff7f0e;
714
+ background: linear-gradient(45deg, #fff7ed, #fed7aa) !important;
715
+ color: #9a3412 !important;
716
  }
717
  .track-tab.language-pair-matrix {
718
  border-color: #2ca02c;
719
+ background: linear-gradient(45deg, #f0fdf4, #dcfce7) !important;
720
+ color: #166534 !important;
721
  }
722
  .metric-box {
723
+ background: var(--background-fill-secondary) !important;
724
  padding: 1rem;
725
  border-radius: 8px;
726
  margin: 0.5rem 0;
727
  border-left: 4px solid #3b82f6;
728
+ color: var(--body-text-color) !important;
729
  }
730
  .scientific-note {
731
+ background: var(--background-fill-secondary) !important;
732
  border: 1px solid #f59e0b;
733
  border-radius: 8px;
734
  padding: 1rem;
735
  margin: 1rem 0;
736
+ color: var(--body-text-color) !important;
737
+ }
738
+ .adequacy-excellent { border-left-color: #22c55e !important; }
739
+ .adequacy-good { border-left-color: #eab308 !important; }
740
+ .adequacy-fair { border-left-color: #f97316 !important; }
741
+ .adequacy-insufficient { border-left-color: #ef4444 !important; }
742
+
743
+ /* Force text visibility in both light and dark modes */
744
+ .markdown {
745
+ color: var(--body-text-color) !important;
746
+ }
747
+ .markdown h1, .markdown h2, .markdown h3, .markdown h4, .markdown h5, .markdown h6 {
748
+ color: var(--body-text-color) !important;
749
+ }
750
+ .markdown p, .markdown li, .markdown td, .markdown th {
751
+ color: var(--body-text-color) !important;
752
+ }
753
+ .markdown strong {
754
+ color: var(--body-text-color) !important;
755
+ font-weight: bold;
756
+ }
757
+ .markdown em {
758
+ color: var(--body-text-color) !important;
759
+ font-style: italic;
760
+ }
761
+ .markdown code {
762
+ background: var(--background-fill-secondary) !important;
763
+ color: var(--body-text-color) !important;
764
+ padding: 0.2em 0.4em;
765
+ border-radius: 4px;
766
+ }
767
+ .markdown pre {
768
+ background: var(--background-fill-secondary) !important;
769
+ color: var(--body-text-color) !important;
770
+ padding: 1rem;
771
+ border-radius: 8px;
772
+ overflow-x: auto;
773
+ }
774
+ .markdown blockquote {
775
+ border-left: 4px solid var(--border-color-primary);
776
+ padding-left: 1rem;
777
+ margin-left: 0;
778
+ color: var(--body-text-color) !important;
779
+ }
780
+
781
+ /* Ensure all text elements are visible */
782
+ * {
783
+ color: var(--body-text-color) !important;
784
+ }
785
+
786
+ /* Override any problematic text colors */
787
+ .gr-markdown, .gr-markdown *,
788
+ .gradio-html, .gradio-html *,
789
+ .gr-textbox, .gr-dropdown,
790
+ .gr-button, label {
791
+ color: var(--body-text-color) !important;
792
+ }
793
+
794
+ /* Special handling for buttons */
795
+ .gr-button {
796
+ background: var(--button-primary-background-fill) !important;
797
+ color: var(--button-primary-text-color) !important;
798
+ border: 1px solid var(--border-color-primary) !important;
799
+ }
800
+
801
+ /* Tables */
802
+ .gr-dataframe, .gr-dataframe * {
803
+ color: var(--body-text-color) !important;
804
+ background: var(--background-fill-primary) !important;
805
  }
 
 
 
 
806
  """
807
  ) as demo:
808