Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -408,32 +408,53 @@ def refresh_track_leaderboard(
|
|
408 |
if current_leaderboard is None:
|
409 |
current_leaderboard = load_scientific_leaderboard()
|
410 |
|
411 |
-
# Get track-specific leaderboard
|
412 |
-
|
413 |
-
|
414 |
-
|
|
|
|
|
|
|
|
|
415 |
|
416 |
# Apply search filter
|
417 |
-
if search_query:
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
|
|
|
|
|
|
424 |
|
425 |
# Prepare for display
|
426 |
-
|
|
|
|
|
|
|
|
|
427 |
|
428 |
-
# Create plots
|
429 |
-
|
430 |
-
|
|
|
|
|
|
|
431 |
|
432 |
-
|
433 |
-
|
434 |
-
|
|
|
|
|
435 |
|
436 |
-
|
|
|
|
|
|
|
|
|
|
|
437 |
### 📊 {track_config['name']} Statistics
|
438 |
|
439 |
- **Total Models**: {track_stats.get('total_models', 0)}
|
@@ -447,12 +468,16 @@ def refresh_track_leaderboard(
|
|
447 |
- All metrics include 95% confidence intervals
|
448 |
- Statistical adequacy verified for reliable comparisons
|
449 |
- {track_config['description']}
|
450 |
-
|
|
|
|
|
|
|
451 |
|
452 |
return display_df, ranking_plot, comparison_plot, stats_text
|
453 |
|
454 |
except Exception as e:
|
455 |
error_msg = f"Error loading {track} leaderboard: {str(e)}"
|
|
|
456 |
empty_df = pd.DataFrame()
|
457 |
return empty_df, None, None, error_msg
|
458 |
|
@@ -659,46 +684,125 @@ with gr.Blocks(
|
|
659 |
margin-bottom: 2rem;
|
660 |
padding: 2rem;
|
661 |
background: linear-gradient(135deg, #1e3a8a 0%, #3730a3 50%, #1e40af 100%);
|
662 |
-
color: white;
|
663 |
border-radius: 10px;
|
664 |
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
665 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
666 |
.track-tab {
|
667 |
border-radius: 8px;
|
668 |
margin: 0.5rem;
|
669 |
padding: 1rem;
|
670 |
border: 2px solid transparent;
|
|
|
|
|
671 |
}
|
672 |
.track-tab.google-comparable {
|
673 |
border-color: #1f77b4;
|
674 |
-
background: linear-gradient(45deg, #f0f9ff, #e0f2fe);
|
|
|
675 |
}
|
676 |
.track-tab.ug40-complete {
|
677 |
border-color: #ff7f0e;
|
678 |
-
background: linear-gradient(45deg, #fff7ed, #fed7aa);
|
|
|
679 |
}
|
680 |
.track-tab.language-pair-matrix {
|
681 |
border-color: #2ca02c;
|
682 |
-
background: linear-gradient(45deg, #f0fdf4, #dcfce7);
|
|
|
683 |
}
|
684 |
.metric-box {
|
685 |
-
background:
|
686 |
padding: 1rem;
|
687 |
border-radius: 8px;
|
688 |
margin: 0.5rem 0;
|
689 |
border-left: 4px solid #3b82f6;
|
|
|
690 |
}
|
691 |
.scientific-note {
|
692 |
-
background:
|
693 |
border: 1px solid #f59e0b;
|
694 |
border-radius: 8px;
|
695 |
padding: 1rem;
|
696 |
margin: 1rem 0;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
697 |
}
|
698 |
-
.adequacy-excellent { border-left-color: #22c55e; }
|
699 |
-
.adequacy-good { border-left-color: #eab308; }
|
700 |
-
.adequacy-fair { border-left-color: #f97316; }
|
701 |
-
.adequacy-insufficient { border-left-color: #ef4444; }
|
702 |
"""
|
703 |
) as demo:
|
704 |
|
|
|
408 |
if current_leaderboard is None:
|
409 |
current_leaderboard = load_scientific_leaderboard()
|
410 |
|
411 |
+
# Get track-specific leaderboard with better error handling
|
412 |
+
try:
|
413 |
+
track_leaderboard = get_track_leaderboard(
|
414 |
+
current_leaderboard, track, category_filter=category_filter, min_adequacy=min_adequacy
|
415 |
+
)
|
416 |
+
except Exception as e:
|
417 |
+
print(f"Error getting track leaderboard for {track}: {e}")
|
418 |
+
track_leaderboard = pd.DataFrame()
|
419 |
|
420 |
# Apply search filter
|
421 |
+
if search_query and not track_leaderboard.empty:
|
422 |
+
try:
|
423 |
+
query_lower = search_query.lower()
|
424 |
+
mask = (
|
425 |
+
track_leaderboard['model_name'].str.lower().str.contains(query_lower, na=False) |
|
426 |
+
track_leaderboard['author'].str.lower().str.contains(query_lower, na=False)
|
427 |
+
)
|
428 |
+
track_leaderboard = track_leaderboard[mask]
|
429 |
+
except Exception as e:
|
430 |
+
print(f"Error applying search filter: {e}")
|
431 |
|
432 |
# Prepare for display
|
433 |
+
try:
|
434 |
+
display_df = prepare_track_leaderboard_display(track_leaderboard, track)
|
435 |
+
except Exception as e:
|
436 |
+
print(f"Error preparing display: {e}")
|
437 |
+
display_df = pd.DataFrame()
|
438 |
|
439 |
+
# Create plots with error handling
|
440 |
+
try:
|
441 |
+
ranking_plot = create_scientific_leaderboard_plot(track_leaderboard, track)
|
442 |
+
except Exception as e:
|
443 |
+
print(f"Error creating ranking plot: {e}")
|
444 |
+
ranking_plot = None
|
445 |
|
446 |
+
try:
|
447 |
+
comparison_plot = create_statistical_comparison_plot(track_leaderboard, track)
|
448 |
+
except Exception as e:
|
449 |
+
print(f"Error creating comparison plot: {e}")
|
450 |
+
comparison_plot = None
|
451 |
|
452 |
+
# Get track statistics
|
453 |
+
try:
|
454 |
+
track_stats = get_scientific_leaderboard_stats(track_leaderboard, track)
|
455 |
+
track_config = EVALUATION_TRACKS[track]
|
456 |
+
|
457 |
+
stats_text = f"""
|
458 |
### 📊 {track_config['name']} Statistics
|
459 |
|
460 |
- **Total Models**: {track_stats.get('total_models', 0)}
|
|
|
468 |
- All metrics include 95% confidence intervals
|
469 |
- Statistical adequacy verified for reliable comparisons
|
470 |
- {track_config['description']}
|
471 |
+
"""
|
472 |
+
except Exception as e:
|
473 |
+
print(f"Error generating stats: {e}")
|
474 |
+
stats_text = f"Error loading {track} statistics: {str(e)}"
|
475 |
|
476 |
return display_df, ranking_plot, comparison_plot, stats_text
|
477 |
|
478 |
except Exception as e:
|
479 |
error_msg = f"Error loading {track} leaderboard: {str(e)}"
|
480 |
+
print(error_msg)
|
481 |
empty_df = pd.DataFrame()
|
482 |
return empty_df, None, None, error_msg
|
483 |
|
|
|
684 |
margin-bottom: 2rem;
|
685 |
padding: 2rem;
|
686 |
background: linear-gradient(135deg, #1e3a8a 0%, #3730a3 50%, #1e40af 100%);
|
687 |
+
color: white !important;
|
688 |
border-radius: 10px;
|
689 |
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
690 |
}
|
691 |
+
.scientific-header h1 {
|
692 |
+
color: white !important;
|
693 |
+
margin-bottom: 1rem;
|
694 |
+
}
|
695 |
+
.scientific-header p {
|
696 |
+
color: #e0f2fe !important;
|
697 |
+
margin: 0.5rem 0;
|
698 |
+
}
|
699 |
.track-tab {
|
700 |
border-radius: 8px;
|
701 |
margin: 0.5rem;
|
702 |
padding: 1rem;
|
703 |
border: 2px solid transparent;
|
704 |
+
background: var(--background-fill-primary) !important;
|
705 |
+
color: var(--body-text-color) !important;
|
706 |
}
|
707 |
.track-tab.google-comparable {
|
708 |
border-color: #1f77b4;
|
709 |
+
background: linear-gradient(45deg, #f0f9ff, #e0f2fe) !important;
|
710 |
+
color: #1e40af !important;
|
711 |
}
|
712 |
.track-tab.ug40-complete {
|
713 |
border-color: #ff7f0e;
|
714 |
+
background: linear-gradient(45deg, #fff7ed, #fed7aa) !important;
|
715 |
+
color: #9a3412 !important;
|
716 |
}
|
717 |
.track-tab.language-pair-matrix {
|
718 |
border-color: #2ca02c;
|
719 |
+
background: linear-gradient(45deg, #f0fdf4, #dcfce7) !important;
|
720 |
+
color: #166534 !important;
|
721 |
}
|
722 |
.metric-box {
|
723 |
+
background: var(--background-fill-secondary) !important;
|
724 |
padding: 1rem;
|
725 |
border-radius: 8px;
|
726 |
margin: 0.5rem 0;
|
727 |
border-left: 4px solid #3b82f6;
|
728 |
+
color: var(--body-text-color) !important;
|
729 |
}
|
730 |
.scientific-note {
|
731 |
+
background: var(--background-fill-secondary) !important;
|
732 |
border: 1px solid #f59e0b;
|
733 |
border-radius: 8px;
|
734 |
padding: 1rem;
|
735 |
margin: 1rem 0;
|
736 |
+
color: var(--body-text-color) !important;
|
737 |
+
}
|
738 |
+
.adequacy-excellent { border-left-color: #22c55e !important; }
|
739 |
+
.adequacy-good { border-left-color: #eab308 !important; }
|
740 |
+
.adequacy-fair { border-left-color: #f97316 !important; }
|
741 |
+
.adequacy-insufficient { border-left-color: #ef4444 !important; }
|
742 |
+
|
743 |
+
/* Force text visibility in both light and dark modes */
|
744 |
+
.markdown {
|
745 |
+
color: var(--body-text-color) !important;
|
746 |
+
}
|
747 |
+
.markdown h1, .markdown h2, .markdown h3, .markdown h4, .markdown h5, .markdown h6 {
|
748 |
+
color: var(--body-text-color) !important;
|
749 |
+
}
|
750 |
+
.markdown p, .markdown li, .markdown td, .markdown th {
|
751 |
+
color: var(--body-text-color) !important;
|
752 |
+
}
|
753 |
+
.markdown strong {
|
754 |
+
color: var(--body-text-color) !important;
|
755 |
+
font-weight: bold;
|
756 |
+
}
|
757 |
+
.markdown em {
|
758 |
+
color: var(--body-text-color) !important;
|
759 |
+
font-style: italic;
|
760 |
+
}
|
761 |
+
.markdown code {
|
762 |
+
background: var(--background-fill-secondary) !important;
|
763 |
+
color: var(--body-text-color) !important;
|
764 |
+
padding: 0.2em 0.4em;
|
765 |
+
border-radius: 4px;
|
766 |
+
}
|
767 |
+
.markdown pre {
|
768 |
+
background: var(--background-fill-secondary) !important;
|
769 |
+
color: var(--body-text-color) !important;
|
770 |
+
padding: 1rem;
|
771 |
+
border-radius: 8px;
|
772 |
+
overflow-x: auto;
|
773 |
+
}
|
774 |
+
.markdown blockquote {
|
775 |
+
border-left: 4px solid var(--border-color-primary);
|
776 |
+
padding-left: 1rem;
|
777 |
+
margin-left: 0;
|
778 |
+
color: var(--body-text-color) !important;
|
779 |
+
}
|
780 |
+
|
781 |
+
/* Ensure all text elements are visible */
|
782 |
+
* {
|
783 |
+
color: var(--body-text-color) !important;
|
784 |
+
}
|
785 |
+
|
786 |
+
/* Override any problematic text colors */
|
787 |
+
.gr-markdown, .gr-markdown *,
|
788 |
+
.gradio-html, .gradio-html *,
|
789 |
+
.gr-textbox, .gr-dropdown,
|
790 |
+
.gr-button, label {
|
791 |
+
color: var(--body-text-color) !important;
|
792 |
+
}
|
793 |
+
|
794 |
+
/* Special handling for buttons */
|
795 |
+
.gr-button {
|
796 |
+
background: var(--button-primary-background-fill) !important;
|
797 |
+
color: var(--button-primary-text-color) !important;
|
798 |
+
border: 1px solid var(--border-color-primary) !important;
|
799 |
+
}
|
800 |
+
|
801 |
+
/* Tables */
|
802 |
+
.gr-dataframe, .gr-dataframe * {
|
803 |
+
color: var(--body-text-color) !important;
|
804 |
+
background: var(--background-fill-primary) !important;
|
805 |
}
|
|
|
|
|
|
|
|
|
806 |
"""
|
807 |
) as demo:
|
808 |
|