Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -210,10 +210,10 @@ enhanced_css = """
|
|
210 |
--accent-color: #e5a50a;
|
211 |
--warning-color: #ff7800;
|
212 |
--text-color: #333333;
|
213 |
-
--background-color: #
|
214 |
-
--card-background: #
|
215 |
--border-color: #e0e0e0;
|
216 |
-
--shadow-color: rgba(0, 0, 0, 0.
|
217 |
}
|
218 |
|
219 |
/* Typography */
|
@@ -259,6 +259,7 @@ h3 {
|
|
259 |
border-radius: 12px !important;
|
260 |
overflow: hidden !important;
|
261 |
box-shadow: 0 4px 12px var(--shadow-color) !important;
|
|
|
262 |
}
|
263 |
|
264 |
.tab-nav button {
|
@@ -267,12 +268,16 @@ h3 {
|
|
267 |
padding: 0.8rem 1.5rem !important;
|
268 |
border-radius: 0 !important;
|
269 |
transition: all 0.2s ease !important;
|
|
|
|
|
|
|
270 |
}
|
271 |
|
272 |
.tab-nav button.selected {
|
273 |
-
background-color:
|
274 |
-
color:
|
275 |
font-weight: 600 !important;
|
|
|
276 |
}
|
277 |
|
278 |
/* Card styling */
|
@@ -281,6 +286,7 @@ h3 {
|
|
281 |
border: 1px solid var(--border-color) !important;
|
282 |
box-shadow: 0 4px 12px var(--shadow-color) !important;
|
283 |
overflow: hidden !important;
|
|
|
284 |
}
|
285 |
|
286 |
/* Table styling */
|
@@ -292,6 +298,7 @@ table {
|
|
292 |
border-radius: 8px !important;
|
293 |
overflow: hidden !important;
|
294 |
box-shadow: 0 4px 12px var(--shadow-color) !important;
|
|
|
295 |
}
|
296 |
|
297 |
th {
|
@@ -309,18 +316,19 @@ td {
|
|
309 |
border-bottom: 1px solid var(--border-color) !important;
|
310 |
font-size: 1rem !important;
|
311 |
vertical-align: middle !important;
|
|
|
312 |
}
|
313 |
|
314 |
-
tr:
|
315 |
-
|
316 |
}
|
317 |
|
318 |
-
tr:
|
319 |
-
background-color: #
|
320 |
}
|
321 |
|
322 |
-
tr:
|
323 |
-
|
324 |
}
|
325 |
|
326 |
/* Button styling */
|
@@ -362,6 +370,7 @@ button.primary:hover, .gr-button.primary:hover {
|
|
362 |
display: flex !important;
|
363 |
align-items: center !important;
|
364 |
gap: 8px !important;
|
|
|
365 |
}
|
366 |
|
367 |
.gr-radio label:hover {
|
@@ -383,6 +392,8 @@ input, textarea, select {
|
|
383 |
border-radius: 8px !important;
|
384 |
border: 1px solid var(--border-color) !important;
|
385 |
transition: all 0.2s ease !important;
|
|
|
|
|
386 |
}
|
387 |
|
388 |
input:focus, textarea:focus, select:focus {
|
@@ -397,6 +408,7 @@ input:focus, textarea:focus, select:focus {
|
|
397 |
overflow: hidden !important;
|
398 |
margin: 1rem 0 !important;
|
399 |
border: 1px solid var(--border-color) !important;
|
|
|
400 |
}
|
401 |
|
402 |
.gr-accordion-header {
|
@@ -405,17 +417,19 @@ input:focus, textarea:focus, select:focus {
|
|
405 |
font-weight: 600 !important;
|
406 |
font-size: 1.1rem !important;
|
407 |
color: var(--text-color) !important;
|
|
|
408 |
}
|
409 |
|
410 |
.gr-accordion-content {
|
411 |
padding: 1rem !important;
|
412 |
-
background-color:
|
413 |
}
|
414 |
|
415 |
/* Markdown text improvements */
|
416 |
.markdown-text {
|
417 |
font-size: 1.05rem !important;
|
418 |
line-height: 1.7 !important;
|
|
|
419 |
}
|
420 |
|
421 |
.markdown-text p {
|
@@ -433,7 +447,7 @@ input:focus, textarea:focus, select:focus {
|
|
433 |
|
434 |
.markdown-text strong {
|
435 |
font-weight: 600 !important;
|
436 |
-
color: #
|
437 |
}
|
438 |
|
439 |
/* Status indicators */
|
@@ -467,21 +481,25 @@ input:focus, textarea:focus, select:focus {
|
|
467 |
/* Footer */
|
468 |
.footer {
|
469 |
margin-top: 2rem;
|
470 |
-
padding: 1rem;
|
471 |
text-align: center;
|
472 |
font-size: 0.9rem;
|
473 |
-
color: #
|
474 |
border-top: 1px solid var(--border-color);
|
|
|
475 |
}
|
476 |
|
477 |
-
/* Enhanced leaderboard title */
|
478 |
.leaderboard-header {
|
479 |
display: flex;
|
480 |
align-items: center;
|
481 |
justify-content: space-between;
|
482 |
margin-bottom: 1.5rem;
|
483 |
-
padding
|
484 |
-
|
|
|
|
|
|
|
485 |
}
|
486 |
|
487 |
.leaderboard-title {
|
@@ -504,6 +522,9 @@ input:focus, textarea:focus, select:focus {
|
|
504 |
font-size: 0.85rem;
|
505 |
color: #666;
|
506 |
font-style: italic;
|
|
|
|
|
|
|
507 |
}
|
508 |
|
509 |
/* Category selector buttons */
|
@@ -559,10 +580,35 @@ input:focus, textarea:focus, select:focus {
|
|
559 |
color: #cd7f32;
|
560 |
font-weight: bold;
|
561 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
562 |
"""
|
563 |
|
564 |
# Combine with any existing CSS
|
565 |
-
custom_css = enhanced_css
|
566 |
|
567 |
# --- Gradio App Definition ---
|
568 |
demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft())
|
@@ -586,17 +632,19 @@ with demo:
|
|
586 |
""")
|
587 |
|
588 |
# Introduction with enhanced styling
|
589 |
-
gr.
|
|
|
590 |
|
591 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
592 |
with gr.TabItem("π Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
|
593 |
with gr.Column():
|
594 |
-
gr.
|
595 |
-
|
596 |
-
<
|
597 |
-
|
598 |
-
|
599 |
-
|
|
|
600 |
|
601 |
# Enhanced category selector
|
602 |
category_selector = gr.Radio(
|
@@ -604,7 +652,7 @@ with demo:
|
|
604 |
label="Select Performance Domain:",
|
605 |
value="π Overall",
|
606 |
interactive=True,
|
607 |
-
elem_classes="
|
608 |
)
|
609 |
|
610 |
# Visual separator
|
@@ -623,31 +671,32 @@ with demo:
|
|
623 |
)
|
624 |
|
625 |
# Stats cards (visual enhancement)
|
626 |
-
with gr.
|
627 |
-
with gr.
|
628 |
-
gr.
|
629 |
-
|
630 |
-
<div style="
|
631 |
-
|
632 |
-
|
633 |
-
|
634 |
-
|
635 |
-
|
636 |
-
gr.
|
637 |
-
|
638 |
-
<div style="
|
639 |
-
|
640 |
-
|
641 |
-
|
642 |
-
|
643 |
-
|
644 |
-
gr.
|
645 |
-
|
646 |
-
<div style="
|
647 |
-
|
648 |
-
|
649 |
-
|
650 |
-
|
|
|
651 |
|
652 |
# Link the radio button change to the update function
|
653 |
category_selector.change(
|
@@ -657,145 +706,147 @@ with demo:
|
|
657 |
)
|
658 |
|
659 |
with gr.TabItem("π About", elem_id="llm-benchmark-tab-about", id=1):
|
660 |
-
|
661 |
-
|
662 |
-
|
663 |
-
<div style="
|
664 |
-
|
665 |
-
<
|
666 |
-
|
667 |
-
|
668 |
-
</div>
|
669 |
-
""")
|
670 |
-
|
671 |
-
# Use the LLM_BENCHMARKS_TEXT variable
|
672 |
-
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
673 |
-
|
674 |
-
# Add methodology cards for visual enhancement
|
675 |
-
with gr.Row():
|
676 |
-
with gr.Column():
|
677 |
-
gr.HTML("""
|
678 |
-
<div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
|
679 |
-
<div style="font-size: 2em; text-align: center; margin-bottom: 15px;">π‘</div>
|
680 |
-
<h3 style="text-align: center; margin-top: 0;">MLE-Lite</h3>
|
681 |
-
<p>Evaluates a model's ability to handle basic machine learning engineering tasks including
|
682 |
-
data preprocessing, feature engineering, model selection, and basic deployment.</p>
|
683 |
-
</div>
|
684 |
-
""")
|
685 |
-
with gr.Column():
|
686 |
-
gr.HTML("""
|
687 |
-
<div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
|
688 |
-
<div style="font-size: 2em; text-align: center; margin-bottom: 15px;">π</div>
|
689 |
-
<h3 style="text-align: center; margin-top: 0;">Tabular</h3>
|
690 |
-
<p>Tests a model's ability to process, analyze and model structured data, including
|
691 |
-
statistical analysis,statistical analysis, predictive modeling, and data visualization with tabular datasets.</p>
|
692 |
-
</div>
|
693 |
-
""")
|
694 |
-
|
695 |
-
with gr.Row():
|
696 |
-
with gr.Column():
|
697 |
-
gr.HTML("""
|
698 |
-
<div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
|
699 |
-
<div style="font-size: 2em; text-align: center; margin-bottom: 15px;">π</div>
|
700 |
-
<h3 style="text-align: center; margin-top: 0;">NLP</h3>
|
701 |
-
<p>Evaluates natural language processing capabilities including text classification,
|
702 |
-
sentiment analysis, entity recognition, text generation, and language understanding.</p>
|
703 |
-
</div>
|
704 |
-
""")
|
705 |
-
with gr.Column():
|
706 |
-
gr.HTML("""
|
707 |
-
<div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
|
708 |
-
<div style="font-size: 2em; text-align: center; margin-bottom: 15px;">ποΈ</div>
|
709 |
-
<h3 style="text-align: center; margin-top: 0;">CV</h3>
|
710 |
-
<p>Tests computer vision capabilities including image classification, object detection,
|
711 |
-
image generation, and visual understanding tasks across various domains.</p>
|
712 |
</div>
|
713 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
714 |
|
715 |
# Optional: Uncomment if you want to re-enable the Submit tab
|
716 |
# with gr.TabItem("π Submit Model", elem_id="llm-benchmark-tab-submit", id=2):
|
717 |
-
# with gr.
|
718 |
-
# gr.
|
719 |
-
#
|
720 |
-
# <div style="
|
721 |
-
#
|
722 |
-
# <
|
723 |
-
#
|
|
|
|
|
724 |
# </div>
|
725 |
-
#
|
726 |
-
# """)
|
727 |
#
|
728 |
-
#
|
729 |
-
#
|
730 |
-
#
|
731 |
-
# with gr.Column():
|
732 |
-
# with gr.Accordion(f"β
Finished Evaluations ({len(finished_eval_queue_df)})", open=False):
|
733 |
-
# finished_eval_table = gr.components.Dataframe(
|
734 |
-
# value=finished_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
|
735 |
-
# )
|
736 |
-
# with gr.Accordion(f"π Running Evaluation Queue ({len(running_eval_queue_df)})", open=False):
|
737 |
-
# running_eval_table = gr.components.Dataframe(
|
738 |
-
# value=running_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
|
739 |
-
# )
|
740 |
-
# with gr.Accordion(f"β³ Pending Evaluation Queue ({len(pending_eval_queue_df)})", open=False):
|
741 |
-
# pending_eval_table = gr.components.Dataframe(
|
742 |
-
# value=pending_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
|
743 |
-
# )
|
744 |
-
#
|
745 |
-
# gr.HTML('<div style="height: 1px; background-color: #e0e0e0; margin: 20px 0;"></div>')
|
746 |
-
#
|
747 |
-
# gr.HTML("""
|
748 |
-
# <h2 style="display: flex; align-items: center; gap: 10px;">
|
749 |
-
# <span style="font-size: 1.3em;">π</span> Model Submission Form
|
750 |
-
# </h2>
|
751 |
-
# """)
|
752 |
-
#
|
753 |
-
# with gr.Row():
|
754 |
-
# with gr.Column():
|
755 |
-
# model_name_textbox = gr.Textbox(
|
756 |
-
# label="Model Name (on Hugging Face Hub)",
|
757 |
-
# placeholder="Enter your model name...",
|
758 |
-
# elem_classes="enhanced-input"
|
759 |
-
# )
|
760 |
-
# revision_name_textbox = gr.Textbox(
|
761 |
-
# label="Revision / Commit Hash",
|
762 |
-
# placeholder="main",
|
763 |
-
# elem_classes="enhanced-input"
|
764 |
-
# )
|
765 |
-
# model_type = gr.Dropdown(
|
766 |
-
# choices=["Type A", "Type B", "Type C"],
|
767 |
-
# label="Model Type",
|
768 |
-
# multiselect=False,
|
769 |
-
# value=None,
|
770 |
-
# interactive=True,
|
771 |
-
# elem_classes="enhanced-dropdown"
|
772 |
-
# )
|
773 |
# with gr.Column():
|
774 |
-
#
|
775 |
-
#
|
776 |
-
#
|
777 |
-
#
|
778 |
-
#
|
779 |
-
#
|
780 |
-
#
|
781 |
-
#
|
782 |
-
#
|
783 |
-
#
|
784 |
-
#
|
785 |
-
#
|
786 |
-
#
|
787 |
-
#
|
788 |
-
#
|
789 |
-
#
|
790 |
-
#
|
791 |
-
#
|
792 |
-
#
|
793 |
-
#
|
794 |
-
#
|
795 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
796 |
# submit_button = gr.Button(
|
797 |
# "Submit for Evaluation",
|
798 |
-
# elem_classes="primary
|
799 |
# )
|
800 |
# submission_result = gr.Markdown()
|
801 |
# submit_button.click(
|
@@ -805,24 +856,25 @@ with demo:
|
|
805 |
# )
|
806 |
|
807 |
# Enhanced citation section
|
808 |
-
with gr.
|
809 |
-
gr.
|
810 |
-
|
811 |
-
<div style="
|
812 |
-
|
813 |
-
<
|
814 |
-
|
|
|
|
|
815 |
</div>
|
816 |
-
|
817 |
-
|
818 |
-
|
819 |
-
|
820 |
-
|
821 |
-
|
822 |
-
|
823 |
-
|
824 |
-
|
825 |
-
)
|
826 |
|
827 |
# Footer
|
828 |
gr.HTML("""
|
|
|
210 |
--accent-color: #e5a50a;
|
211 |
--warning-color: #ff7800;
|
212 |
--text-color: #333333;
|
213 |
+
--background-color: #f4f6f8;
|
214 |
+
--card-background: #ffffff;
|
215 |
--border-color: #e0e0e0;
|
216 |
+
--shadow-color: rgba(0, 0, 0, 0.08);
|
217 |
}
|
218 |
|
219 |
/* Typography */
|
|
|
259 |
border-radius: 12px !important;
|
260 |
overflow: hidden !important;
|
261 |
box-shadow: 0 4px 12px var(--shadow-color) !important;
|
262 |
+
background-color: var(--card-background);
|
263 |
}
|
264 |
|
265 |
.tab-nav button {
|
|
|
268 |
padding: 0.8rem 1.5rem !important;
|
269 |
border-radius: 0 !important;
|
270 |
transition: all 0.2s ease !important;
|
271 |
+
border-bottom: 2px solid transparent !important;
|
272 |
+
background-color: transparent !important;
|
273 |
+
color: var(--text-color) !important;
|
274 |
}
|
275 |
|
276 |
.tab-nav button.selected {
|
277 |
+
background-color: transparent !important;
|
278 |
+
color: var(--primary-color) !important;
|
279 |
font-weight: 600 !important;
|
280 |
+
border-bottom: 2px solid var(--primary-color) !important;
|
281 |
}
|
282 |
|
283 |
/* Card styling */
|
|
|
286 |
border: 1px solid var(--border-color) !important;
|
287 |
box-shadow: 0 4px 12px var(--shadow-color) !important;
|
288 |
overflow: hidden !important;
|
289 |
+
background-color: var(--card-background) !important;
|
290 |
}
|
291 |
|
292 |
/* Table styling */
|
|
|
298 |
border-radius: 8px !important;
|
299 |
overflow: hidden !important;
|
300 |
box-shadow: 0 4px 12px var(--shadow-color) !important;
|
301 |
+
background-color: var(--card-background);
|
302 |
}
|
303 |
|
304 |
th {
|
|
|
316 |
border-bottom: 1px solid var(--border-color) !important;
|
317 |
font-size: 1rem !important;
|
318 |
vertical-align: middle !important;
|
319 |
+
background-color: var(--card-background);
|
320 |
}
|
321 |
|
322 |
+
tr:last-child td {
|
323 |
+
border-bottom: none !important;
|
324 |
}
|
325 |
|
326 |
+
tr:nth-child(even) td {
|
327 |
+
background-color: #f8fafd !important;
|
328 |
}
|
329 |
|
330 |
+
tr:hover td {
|
331 |
+
background-color: #edf2fb !important;
|
332 |
}
|
333 |
|
334 |
/* Button styling */
|
|
|
370 |
display: flex !important;
|
371 |
align-items: center !important;
|
372 |
gap: 8px !important;
|
373 |
+
color: var(--text-color) !important;
|
374 |
}
|
375 |
|
376 |
.gr-radio label:hover {
|
|
|
392 |
border-radius: 8px !important;
|
393 |
border: 1px solid var(--border-color) !important;
|
394 |
transition: all 0.2s ease !important;
|
395 |
+
background-color: #ffffff !important;
|
396 |
+
color: var(--text-color) !important;
|
397 |
}
|
398 |
|
399 |
input:focus, textarea:focus, select:focus {
|
|
|
408 |
overflow: hidden !important;
|
409 |
margin: 1rem 0 !important;
|
410 |
border: 1px solid var(--border-color) !important;
|
411 |
+
background-color: var(--card-background);
|
412 |
}
|
413 |
|
414 |
.gr-accordion-header {
|
|
|
417 |
font-weight: 600 !important;
|
418 |
font-size: 1.1rem !important;
|
419 |
color: var(--text-color) !important;
|
420 |
+
border-bottom: 1px solid var(--border-color) !important;
|
421 |
}
|
422 |
|
423 |
.gr-accordion-content {
|
424 |
padding: 1rem !important;
|
425 |
+
background-color: var(--card-background) !important;
|
426 |
}
|
427 |
|
428 |
/* Markdown text improvements */
|
429 |
.markdown-text {
|
430 |
font-size: 1.05rem !important;
|
431 |
line-height: 1.7 !important;
|
432 |
+
color: var(--text-color) !important;
|
433 |
}
|
434 |
|
435 |
.markdown-text p {
|
|
|
447 |
|
448 |
.markdown-text strong {
|
449 |
font-weight: 600 !important;
|
450 |
+
color: #111 !important;
|
451 |
}
|
452 |
|
453 |
/* Status indicators */
|
|
|
481 |
/* Footer */
|
482 |
.footer {
|
483 |
margin-top: 2rem;
|
484 |
+
padding: 1.5rem 1rem;
|
485 |
text-align: center;
|
486 |
font-size: 0.9rem;
|
487 |
+
color: #555;
|
488 |
border-top: 1px solid var(--border-color);
|
489 |
+
background-color: #e9edf1;
|
490 |
}
|
491 |
|
492 |
+
/* Enhanced leaderboard title area */
|
493 |
.leaderboard-header {
|
494 |
display: flex;
|
495 |
align-items: center;
|
496 |
justify-content: space-between;
|
497 |
margin-bottom: 1.5rem;
|
498 |
+
padding: 1.5rem;
|
499 |
+
background-color: var(--card-background);
|
500 |
+
border-radius: 12px;
|
501 |
+
border: 1px solid var(--border-color);
|
502 |
+
box-shadow: 0 4px 12px var(--shadow-color);
|
503 |
}
|
504 |
|
505 |
.leaderboard-title {
|
|
|
522 |
font-size: 0.85rem;
|
523 |
color: #666;
|
524 |
font-style: italic;
|
525 |
+
background-color: #f5f7fa;
|
526 |
+
padding: 5px 10px;
|
527 |
+
border-radius: 6px;
|
528 |
}
|
529 |
|
530 |
/* Category selector buttons */
|
|
|
580 |
color: #cd7f32;
|
581 |
font-weight: bold;
|
582 |
}
|
583 |
+
|
584 |
+
/* Style for About section cards */
|
585 |
+
.about-card {
|
586 |
+
background-color: #f5f7fa;
|
587 |
+
padding: 20px;
|
588 |
+
border-radius: 12px;
|
589 |
+
height: 100%;
|
590 |
+
border: 1px solid var(--border-color);
|
591 |
+
}
|
592 |
+
.about-card h3 {
|
593 |
+
text-align: center;
|
594 |
+
margin-top: 0;
|
595 |
+
color: var(--primary-color);
|
596 |
+
}
|
597 |
+
.about-card p {
|
598 |
+
color: var(--text-color);
|
599 |
+
font-size: 0.95rem;
|
600 |
+
line-height: 1.6;
|
601 |
+
}
|
602 |
+
.about-card-icon {
|
603 |
+
font-size: 2.5em;
|
604 |
+
text-align: center;
|
605 |
+
margin-bottom: 15px;
|
606 |
+
display: block;
|
607 |
+
}
|
608 |
"""
|
609 |
|
610 |
# Combine with any existing CSS
|
611 |
+
custom_css = enhanced_css
|
612 |
|
613 |
# --- Gradio App Definition ---
|
614 |
demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft())
|
|
|
632 |
""")
|
633 |
|
634 |
# Introduction with enhanced styling
|
635 |
+
with gr.Blocks():
|
636 |
+
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
637 |
|
638 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
639 |
with gr.TabItem("π Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
|
640 |
with gr.Column():
|
641 |
+
with gr.Blocks():
|
642 |
+
gr.HTML("""
|
643 |
+
<h2 style="display: flex; align-items: center; gap: 10px; margin-bottom: 0.5rem;">
|
644 |
+
<span style="font-size: 1.3em;">π</span> Model Performance Rankings
|
645 |
+
</h2>
|
646 |
+
<p class="leaderboard-subtitle" style="margin-top: 0;">Select a category to view specialized performance metrics</p>
|
647 |
+
""")
|
648 |
|
649 |
# Enhanced category selector
|
650 |
category_selector = gr.Radio(
|
|
|
652 |
label="Select Performance Domain:",
|
653 |
value="π Overall",
|
654 |
interactive=True,
|
655 |
+
elem_classes="gr-radio"
|
656 |
)
|
657 |
|
658 |
# Visual separator
|
|
|
671 |
)
|
672 |
|
673 |
# Stats cards (visual enhancement)
|
674 |
+
with gr.Blocks():
|
675 |
+
with gr.Row(equal_height=True):
|
676 |
+
with gr.Column(scale=1):
|
677 |
+
gr.HTML(f"""
|
678 |
+
<div class="about-card" style="text-align: center;">
|
679 |
+
<div class="about-card-icon">π</div>
|
680 |
+
<div style="font-size: 2em; font-weight: bold; color: #1a5fb4;">{len(master_df)}</div>
|
681 |
+
<div style="font-size: 1.1em; color: #666;">Models Evaluated</div>
|
682 |
+
</div>
|
683 |
+
""")
|
684 |
+
with gr.Column(scale=1):
|
685 |
+
gr.HTML(f"""
|
686 |
+
<div class="about-card" style="text-align: center;">
|
687 |
+
<div class="about-card-icon">π</div>
|
688 |
+
<div style="font-size: 2em; font-weight: bold; color: #00875a;">{master_df['organizer'].nunique()}</div>
|
689 |
+
<div style="font-size: 1.1em; color: #666;">Organizations</div>
|
690 |
+
</div>
|
691 |
+
""")
|
692 |
+
with gr.Column(scale=1):
|
693 |
+
gr.HTML(f"""
|
694 |
+
<div class="about-card" style="text-align: center;">
|
695 |
+
<div class="about-card-icon">π
</div>
|
696 |
+
<div style="font-size: 2em; font-weight: bold; color: #b58a00;">{len(CATEGORIES)}</div>
|
697 |
+
<div style="font-size: 1.1em; color: #666;">Performance Domains</div>
|
698 |
+
</div>
|
699 |
+
""")
|
700 |
|
701 |
# Link the radio button change to the update function
|
702 |
category_selector.change(
|
|
|
706 |
)
|
707 |
|
708 |
with gr.TabItem("π About", elem_id="llm-benchmark-tab-about", id=1):
|
709 |
+
with gr.Blocks():
|
710 |
+
# Enhanced about section header
|
711 |
+
gr.HTML("""
|
712 |
+
<div class="about-header" style="display: flex; align-items: center; gap: 20px; margin-bottom: 20px;">
|
713 |
+
<div style="font-size: 4em;">π§ͺ</div>
|
714 |
+
<div>
|
715 |
+
<h2 style="margin: 0;">About the MLE-Dojo Benchmark</h2>
|
716 |
+
<p style="margin: 5px 0 0 0; color: #666;">A comprehensive evaluation framework for AI models</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
717 |
</div>
|
718 |
+
</div>
|
719 |
+
""")
|
720 |
+
|
721 |
+
# Use the LLM_BENCHMARKS_TEXT variable
|
722 |
+
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
723 |
+
|
724 |
+
# Add methodology cards for visual enhancement
|
725 |
+
with gr.Row(equal_height=True):
|
726 |
+
with gr.Column():
|
727 |
+
gr.HTML("""
|
728 |
+
<div class="about-card">
|
729 |
+
<div class="about-card-icon">π‘</div>
|
730 |
+
<h3>MLE-Lite</h3>
|
731 |
+
<p>Evaluates a model's ability to handle basic machine learning engineering tasks including
|
732 |
+
data preprocessing, feature engineering, model selection, and basic deployment.</p>
|
733 |
+
</div>
|
734 |
+
""")
|
735 |
+
with gr.Column():
|
736 |
+
gr.HTML("""
|
737 |
+
<div class="about-card">
|
738 |
+
<div class="about-card-icon">π</div>
|
739 |
+
<h3>Tabular</h3>
|
740 |
+
<p>Tests a model's ability to process, analyze and model structured data, including
|
741 |
+
statistical analysis, predictive modeling, and data visualization with tabular datasets.</p>
|
742 |
+
</div>
|
743 |
+
""")
|
744 |
+
|
745 |
+
with gr.Row(equal_height=True):
|
746 |
+
with gr.Column():
|
747 |
+
gr.HTML("""
|
748 |
+
<div class="about-card">
|
749 |
+
<div class="about-card-icon">π</div>
|
750 |
+
<h3>NLP</h3>
|
751 |
+
<p>Evaluates natural language processing capabilities including text classification,
|
752 |
+
sentiment analysis, entity recognition, text generation, and language understanding.</p>
|
753 |
+
</div>
|
754 |
+
""")
|
755 |
+
with gr.Column():
|
756 |
+
gr.HTML("""
|
757 |
+
<div class="about-card">
|
758 |
+
<div class="about-card-icon">ποΈ</div>
|
759 |
+
<h3>CV</h3>
|
760 |
+
<p>Tests computer vision capabilities including image classification, object detection,
|
761 |
+
image generation, and visual understanding tasks across various domains.</p>
|
762 |
+
</div>
|
763 |
+
""")
|
764 |
|
765 |
# Optional: Uncomment if you want to re-enable the Submit tab
|
766 |
# with gr.TabItem("π Submit Model", elem_id="llm-benchmark-tab-submit", id=2):
|
767 |
+
# with gr.Blocks():
|
768 |
+
# with gr.Column():
|
769 |
+
# gr.HTML("""
|
770 |
+
# <div class="about-header" style="display: flex; align-items: center; gap: 20px; margin-bottom: 20px;">
|
771 |
+
# <div style="font-size: 4em;">π</div>
|
772 |
+
# <div>
|
773 |
+
# <h2 style="margin: 0;">Submit Your Model for Evaluation</h2>
|
774 |
+
# <p style="margin: 5px 0 0 0; color: #666;">Add your model to the MLE-Dojo leaderboard</p>
|
775 |
+
# </div>
|
776 |
# </div>
|
777 |
+
# """)
|
|
|
778 |
#
|
779 |
+
# with gr.Row():
|
780 |
+
# gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
781 |
+
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
782 |
# with gr.Column():
|
783 |
+
# with gr.Accordion(f"β
Finished Evaluations ({len(finished_eval_queue_df)})", open=False):
|
784 |
+
# finished_eval_table = gr.components.Dataframe(
|
785 |
+
# value=finished_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
|
786 |
+
# )
|
787 |
+
# with gr.Accordion(f"π Running Evaluation Queue ({len(running_eval_queue_df)})", open=False):
|
788 |
+
# running_eval_table = gr.components.Dataframe(
|
789 |
+
# value=running_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
|
790 |
+
# )
|
791 |
+
# with gr.Accordion(f"β³ Pending Evaluation Queue ({len(pending_eval_queue_df)})", open=False):
|
792 |
+
# pending_eval_table = gr.components.Dataframe(
|
793 |
+
# value=pending_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
|
794 |
+
# )
|
795 |
+
#
|
796 |
+
# gr.HTML('<div style="height: 1px; background-color: #e0e0e0; margin: 20px 0;"></div>')
|
797 |
+
#
|
798 |
+
# gr.HTML("""
|
799 |
+
# <h2 style="display: flex; align-items: center; gap: 10px;">
|
800 |
+
# <span style="font-size: 1.3em;">π</span> Model Submission Form
|
801 |
+
# </h2>
|
802 |
+
# """)
|
803 |
+
#
|
804 |
+
# with gr.Row():
|
805 |
+
# with gr.Column():
|
806 |
+
# model_name_textbox = gr.Textbox(
|
807 |
+
# label="Model Name (on Hugging Face Hub)",
|
808 |
+
# placeholder="Enter your model name...",
|
809 |
+
# elem_classes="enhanced-input"
|
810 |
+
# )
|
811 |
+
# revision_name_textbox = gr.Textbox(
|
812 |
+
# label="Revision / Commit Hash",
|
813 |
+
# placeholder="main",
|
814 |
+
# elem_classes="enhanced-input"
|
815 |
+
# )
|
816 |
+
# model_type = gr.Dropdown(
|
817 |
+
# choices=["Type A", "Type B", "Type C"],
|
818 |
+
# label="Model Type",
|
819 |
+
# multiselect=False,
|
820 |
+
# value=None,
|
821 |
+
# interactive=True,
|
822 |
+
# elem_classes="enhanced-dropdown"
|
823 |
+
# )
|
824 |
+
# with gr.Column():
|
825 |
+
# precision = gr.Dropdown(
|
826 |
+
# choices=["float16", "bfloat16", "float32", "int8", "auto"],
|
827 |
+
# label="Precision",
|
828 |
+
# multiselect=False,
|
829 |
+
# value="auto",
|
830 |
+
# interactive=True,
|
831 |
+
# elem_classes="enhanced-dropdown"
|
832 |
+
# )
|
833 |
+
# weight_type = gr.Dropdown(
|
834 |
+
# choices=["Original", "Adapter", "Delta"],
|
835 |
+
# label="Weights Type",
|
836 |
+
# multiselect=False,
|
837 |
+
# value="Original",
|
838 |
+
# interactive=True,
|
839 |
+
# elem_classes="enhanced-dropdown"
|
840 |
+
# )
|
841 |
+
# base_model_name_textbox = gr.Textbox(
|
842 |
+
# label="Base Model (for delta or adapter weights)",
|
843 |
+
# placeholder="Only needed for adapter/delta weights",
|
844 |
+
# elem_classes="enhanced-input"
|
845 |
+
# )
|
846 |
+
#
|
847 |
# submit_button = gr.Button(
|
848 |
# "Submit for Evaluation",
|
849 |
+
# elem_classes="primary"
|
850 |
# )
|
851 |
# submission_result = gr.Markdown()
|
852 |
# submit_button.click(
|
|
|
856 |
# )
|
857 |
|
858 |
# Enhanced citation section
|
859 |
+
with gr.Blocks():
|
860 |
+
with gr.Accordion("π Citation", open=False, elem_classes="citation-accordion"):
|
861 |
+
gr.HTML("""
|
862 |
+
<div style="display: flex; align-items: center; gap: 20px; margin-bottom: 15px;">
|
863 |
+
<div style="font-size: 2.5em;">π</div>
|
864 |
+
<div>
|
865 |
+
<h3 style="margin: 0;">How to Cite This Benchmark</h3>
|
866 |
+
<p style="margin: 5px 0 0 0; color: #666;">Please use the following citation if you use this benchmark in your research</p>
|
867 |
+
</div>
|
868 |
</div>
|
869 |
+
""")
|
870 |
+
|
871 |
+
citation_button = gr.Textbox(
|
872 |
+
value=CITATION_BUTTON_TEXT,
|
873 |
+
label=CITATION_BUTTON_LABEL,
|
874 |
+
lines=10,
|
875 |
+
elem_id="citation-button",
|
876 |
+
show_copy_button=True,
|
877 |
+
)
|
|
|
878 |
|
879 |
# Footer
|
880 |
gr.HTML("""
|