Jerrycool commited on
Commit
7be1d85
Β·
verified Β·
1 Parent(s): 6e3d36f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +251 -199
app.py CHANGED
@@ -210,10 +210,10 @@ enhanced_css = """
210
  --accent-color: #e5a50a;
211
  --warning-color: #ff7800;
212
  --text-color: #333333;
213
- --background-color: #ffffff;
214
- --card-background: #f9f9f9;
215
  --border-color: #e0e0e0;
216
- --shadow-color: rgba(0, 0, 0, 0.1);
217
  }
218
 
219
  /* Typography */
@@ -259,6 +259,7 @@ h3 {
259
  border-radius: 12px !important;
260
  overflow: hidden !important;
261
  box-shadow: 0 4px 12px var(--shadow-color) !important;
 
262
  }
263
 
264
  .tab-nav button {
@@ -267,12 +268,16 @@ h3 {
267
  padding: 0.8rem 1.5rem !important;
268
  border-radius: 0 !important;
269
  transition: all 0.2s ease !important;
 
 
 
270
  }
271
 
272
  .tab-nav button.selected {
273
- background-color: var(--primary-color) !important;
274
- color: white !important;
275
  font-weight: 600 !important;
 
276
  }
277
 
278
  /* Card styling */
@@ -281,6 +286,7 @@ h3 {
281
  border: 1px solid var(--border-color) !important;
282
  box-shadow: 0 4px 12px var(--shadow-color) !important;
283
  overflow: hidden !important;
 
284
  }
285
 
286
  /* Table styling */
@@ -292,6 +298,7 @@ table {
292
  border-radius: 8px !important;
293
  overflow: hidden !important;
294
  box-shadow: 0 4px 12px var(--shadow-color) !important;
 
295
  }
296
 
297
  th {
@@ -309,18 +316,19 @@ td {
309
  border-bottom: 1px solid var(--border-color) !important;
310
  font-size: 1rem !important;
311
  vertical-align: middle !important;
 
312
  }
313
 
314
- tr:nth-child(even) {
315
- background-color: #f8fafd !important;
316
  }
317
 
318
- tr:hover {
319
- background-color: #edf2fb !important;
320
  }
321
 
322
- tr:first-child td {
323
- border-top: none !important;
324
  }
325
 
326
  /* Button styling */
@@ -362,6 +370,7 @@ button.primary:hover, .gr-button.primary:hover {
362
  display: flex !important;
363
  align-items: center !important;
364
  gap: 8px !important;
 
365
  }
366
 
367
  .gr-radio label:hover {
@@ -383,6 +392,8 @@ input, textarea, select {
383
  border-radius: 8px !important;
384
  border: 1px solid var(--border-color) !important;
385
  transition: all 0.2s ease !important;
 
 
386
  }
387
 
388
  input:focus, textarea:focus, select:focus {
@@ -397,6 +408,7 @@ input:focus, textarea:focus, select:focus {
397
  overflow: hidden !important;
398
  margin: 1rem 0 !important;
399
  border: 1px solid var(--border-color) !important;
 
400
  }
401
 
402
  .gr-accordion-header {
@@ -405,17 +417,19 @@ input:focus, textarea:focus, select:focus {
405
  font-weight: 600 !important;
406
  font-size: 1.1rem !important;
407
  color: var(--text-color) !important;
 
408
  }
409
 
410
  .gr-accordion-content {
411
  padding: 1rem !important;
412
- background-color: white !important;
413
  }
414
 
415
  /* Markdown text improvements */
416
  .markdown-text {
417
  font-size: 1.05rem !important;
418
  line-height: 1.7 !important;
 
419
  }
420
 
421
  .markdown-text p {
@@ -433,7 +447,7 @@ input:focus, textarea:focus, select:focus {
433
 
434
  .markdown-text strong {
435
  font-weight: 600 !important;
436
- color: #333 !important;
437
  }
438
 
439
  /* Status indicators */
@@ -467,21 +481,25 @@ input:focus, textarea:focus, select:focus {
467
  /* Footer */
468
  .footer {
469
  margin-top: 2rem;
470
- padding: 1rem;
471
  text-align: center;
472
  font-size: 0.9rem;
473
- color: #666;
474
  border-top: 1px solid var(--border-color);
 
475
  }
476
 
477
- /* Enhanced leaderboard title */
478
  .leaderboard-header {
479
  display: flex;
480
  align-items: center;
481
  justify-content: space-between;
482
  margin-bottom: 1.5rem;
483
- padding-bottom: 1rem;
484
- border-bottom: 2px solid var(--border-color);
 
 
 
485
  }
486
 
487
  .leaderboard-title {
@@ -504,6 +522,9 @@ input:focus, textarea:focus, select:focus {
504
  font-size: 0.85rem;
505
  color: #666;
506
  font-style: italic;
 
 
 
507
  }
508
 
509
  /* Category selector buttons */
@@ -559,10 +580,35 @@ input:focus, textarea:focus, select:focus {
559
  color: #cd7f32;
560
  font-weight: bold;
561
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
562
  """
563
 
564
  # Combine with any existing CSS
565
- custom_css = enhanced_css + custom_css
566
 
567
  # --- Gradio App Definition ---
568
  demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft())
@@ -586,17 +632,19 @@ with demo:
586
  """)
587
 
588
  # Introduction with enhanced styling
589
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
 
590
 
591
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
592
  with gr.TabItem("πŸ“Š Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
593
  with gr.Column():
594
- gr.HTML("""
595
- <h2 style="display: flex; align-items: center; gap: 10px;">
596
- <span style="font-size: 1.3em;">πŸ“ˆ</span> Model Performance Rankings
597
- </h2>
598
- <p class="leaderboard-subtitle">Select a category to view specialized performance metrics</p>
599
- """)
 
600
 
601
  # Enhanced category selector
602
  category_selector = gr.Radio(
@@ -604,7 +652,7 @@ with demo:
604
  label="Select Performance Domain:",
605
  value="πŸ† Overall",
606
  interactive=True,
607
- elem_classes="fancy-radio"
608
  )
609
 
610
  # Visual separator
@@ -623,31 +671,32 @@ with demo:
623
  )
624
 
625
  # Stats cards (visual enhancement)
626
- with gr.Row():
627
- with gr.Column(scale=1):
628
- gr.HTML(f"""
629
- <div style="background-color: #f0f5ff; padding: 20px; border-radius: 12px; text-align: center;">
630
- <div style="font-size: 2em;">πŸ”</div>
631
- <div style="font-size: 2em; font-weight: bold; color: #1a5fb4;">{len(master_df)}</div>
632
- <div style="font-size: 1.1em; color: #666;">Models Evaluated</div>
633
- </div>
634
- """)
635
- with gr.Column(scale=1):
636
- gr.HTML(f"""
637
- <div style="background-color: #e6f7ef; padding: 20px; border-radius: 12px; text-align: center;">
638
- <div style="font-size: 2em;">🌐</div>
639
- <div style="font-size: 2em; font-weight: bold; color: #00875a;">{master_df['organizer'].nunique()}</div>
640
- <div style="font-size: 1.1em; color: #666;">Organizations</div>
641
- </div>
642
- """)
643
- with gr.Column(scale=1):
644
- gr.HTML(f"""
645
- <div style="background-color: #fff8e0; padding: 20px; border-radius: 12px; text-align: center;">
646
- <div style="font-size: 2em;">πŸ…</div>
647
- <div style="font-size: 2em; font-weight: bold; color: #b58a00;">{len(CATEGORIES)}</div>
648
- <div style="font-size: 1.1em; color: #666;">Performance Domains</div>
649
- </div>
650
- """)
 
651
 
652
  # Link the radio button change to the update function
653
  category_selector.change(
@@ -657,145 +706,147 @@ with demo:
657
  )
658
 
659
  with gr.TabItem("πŸ“š About", elem_id="llm-benchmark-tab-about", id=1):
660
- # Enhanced about section
661
- gr.HTML("""
662
- <div class="about-header" style="display: flex; align-items: center; gap: 20px; margin-bottom: 20px;">
663
- <div style="font-size: 4em;">πŸ§ͺ</div>
664
- <div>
665
- <h2 style="margin: 0;">About the MLE-Dojo Benchmark</h2>
666
- <p style="margin: 5px 0 0 0; color: #666;">A comprehensive evaluation framework for AI models</p>
667
- </div>
668
- </div>
669
- """)
670
-
671
- # Use the LLM_BENCHMARKS_TEXT variable
672
- gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
673
-
674
- # Add methodology cards for visual enhancement
675
- with gr.Row():
676
- with gr.Column():
677
- gr.HTML("""
678
- <div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
679
- <div style="font-size: 2em; text-align: center; margin-bottom: 15px;">πŸ’‘</div>
680
- <h3 style="text-align: center; margin-top: 0;">MLE-Lite</h3>
681
- <p>Evaluates a model's ability to handle basic machine learning engineering tasks including
682
- data preprocessing, feature engineering, model selection, and basic deployment.</p>
683
- </div>
684
- """)
685
- with gr.Column():
686
- gr.HTML("""
687
- <div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
688
- <div style="font-size: 2em; text-align: center; margin-bottom: 15px;">πŸ“Š</div>
689
- <h3 style="text-align: center; margin-top: 0;">Tabular</h3>
690
- <p>Tests a model's ability to process, analyze and model structured data, including
691
- statistical analysis,statistical analysis, predictive modeling, and data visualization with tabular datasets.</p>
692
- </div>
693
- """)
694
-
695
- with gr.Row():
696
- with gr.Column():
697
- gr.HTML("""
698
- <div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
699
- <div style="font-size: 2em; text-align: center; margin-bottom: 15px;">πŸ“</div>
700
- <h3 style="text-align: center; margin-top: 0;">NLP</h3>
701
- <p>Evaluates natural language processing capabilities including text classification,
702
- sentiment analysis, entity recognition, text generation, and language understanding.</p>
703
- </div>
704
- """)
705
- with gr.Column():
706
- gr.HTML("""
707
- <div style="background-color: #f5f7fa; padding: 20px; border-radius: 12px; height: 100%;">
708
- <div style="font-size: 2em; text-align: center; margin-bottom: 15px;">πŸ‘οΈ</div>
709
- <h3 style="text-align: center; margin-top: 0;">CV</h3>
710
- <p>Tests computer vision capabilities including image classification, object detection,
711
- image generation, and visual understanding tasks across various domains.</p>
712
  </div>
713
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
714
 
715
  # Optional: Uncomment if you want to re-enable the Submit tab
716
  # with gr.TabItem("πŸš€ Submit Model", elem_id="llm-benchmark-tab-submit", id=2):
717
- # with gr.Column():
718
- # gr.HTML("""
719
- # <div class="about-header" style="display: flex; align-items: center; gap: 20px; margin-bottom: 20px;">
720
- # <div style="font-size: 4em;">πŸš€</div>
721
- # <div>
722
- # <h2 style="margin: 0;">Submit Your Model for Evaluation</h2>
723
- # <p style="margin: 5px 0 0 0; color: #666;">Add your model to the MLE-Dojo leaderboard</p>
 
 
724
  # </div>
725
- # </div>
726
- # """)
727
  #
728
- # with gr.Row():
729
- # gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
730
- #
731
- # with gr.Column():
732
- # with gr.Accordion(f"βœ… Finished Evaluations ({len(finished_eval_queue_df)})", open=False):
733
- # finished_eval_table = gr.components.Dataframe(
734
- # value=finished_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
735
- # )
736
- # with gr.Accordion(f"πŸ”„ Running Evaluation Queue ({len(running_eval_queue_df)})", open=False):
737
- # running_eval_table = gr.components.Dataframe(
738
- # value=running_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
739
- # )
740
- # with gr.Accordion(f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})", open=False):
741
- # pending_eval_table = gr.components.Dataframe(
742
- # value=pending_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
743
- # )
744
- #
745
- # gr.HTML('<div style="height: 1px; background-color: #e0e0e0; margin: 20px 0;"></div>')
746
- #
747
- # gr.HTML("""
748
- # <h2 style="display: flex; align-items: center; gap: 10px;">
749
- # <span style="font-size: 1.3em;">πŸ“</span> Model Submission Form
750
- # </h2>
751
- # """)
752
- #
753
- # with gr.Row():
754
- # with gr.Column():
755
- # model_name_textbox = gr.Textbox(
756
- # label="Model Name (on Hugging Face Hub)",
757
- # placeholder="Enter your model name...",
758
- # elem_classes="enhanced-input"
759
- # )
760
- # revision_name_textbox = gr.Textbox(
761
- # label="Revision / Commit Hash",
762
- # placeholder="main",
763
- # elem_classes="enhanced-input"
764
- # )
765
- # model_type = gr.Dropdown(
766
- # choices=["Type A", "Type B", "Type C"],
767
- # label="Model Type",
768
- # multiselect=False,
769
- # value=None,
770
- # interactive=True,
771
- # elem_classes="enhanced-dropdown"
772
- # )
773
  # with gr.Column():
774
- # precision = gr.Dropdown(
775
- # choices=["float16", "bfloat16", "float32", "int8", "auto"],
776
- # label="Precision",
777
- # multiselect=False,
778
- # value="auto",
779
- # interactive=True,
780
- # elem_classes="enhanced-dropdown"
781
- # )
782
- # weight_type = gr.Dropdown(
783
- # choices=["Original", "Adapter", "Delta"],
784
- # label="Weights Type",
785
- # multiselect=False,
786
- # value="Original",
787
- # interactive=True,
788
- # elem_classes="enhanced-dropdown"
789
- # )
790
- # base_model_name_textbox = gr.Textbox(
791
- # label="Base Model (for delta or adapter weights)",
792
- # placeholder="Only needed for adapter/delta weights",
793
- # elem_classes="enhanced-input"
794
- # )
795
- #
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
796
  # submit_button = gr.Button(
797
  # "Submit for Evaluation",
798
- # elem_classes="primary-button"
799
  # )
800
  # submission_result = gr.Markdown()
801
  # submit_button.click(
@@ -805,24 +856,25 @@ with demo:
805
  # )
806
 
807
  # Enhanced citation section
808
- with gr.Accordion("πŸ“„ Citation", open=False, elem_classes="citation-accordion"):
809
- gr.HTML("""
810
- <div style="display: flex; align-items: center; gap: 20px; margin-bottom: 15px;">
811
- <div style="font-size: 2.5em;">πŸ“„</div>
812
- <div>
813
- <h3 style="margin: 0;">How to Cite This Benchmark</h3>
814
- <p style="margin: 5px 0 0 0; color: #666;">Please use the following citation if you use this benchmark in your research</p>
 
 
815
  </div>
816
- </div>
817
- """)
818
-
819
- citation_button = gr.Textbox(
820
- value=CITATION_BUTTON_TEXT,
821
- label=CITATION_BUTTON_LABEL,
822
- lines=10,
823
- elem_id="citation-button",
824
- show_copy_button=True,
825
- )
826
 
827
  # Footer
828
  gr.HTML("""
 
210
  --accent-color: #e5a50a;
211
  --warning-color: #ff7800;
212
  --text-color: #333333;
213
+ --background-color: #f4f6f8;
214
+ --card-background: #ffffff;
215
  --border-color: #e0e0e0;
216
+ --shadow-color: rgba(0, 0, 0, 0.08);
217
  }
218
 
219
  /* Typography */
 
259
  border-radius: 12px !important;
260
  overflow: hidden !important;
261
  box-shadow: 0 4px 12px var(--shadow-color) !important;
262
+ background-color: var(--card-background);
263
  }
264
 
265
  .tab-nav button {
 
268
  padding: 0.8rem 1.5rem !important;
269
  border-radius: 0 !important;
270
  transition: all 0.2s ease !important;
271
+ border-bottom: 2px solid transparent !important;
272
+ background-color: transparent !important;
273
+ color: var(--text-color) !important;
274
  }
275
 
276
  .tab-nav button.selected {
277
+ background-color: transparent !important;
278
+ color: var(--primary-color) !important;
279
  font-weight: 600 !important;
280
+ border-bottom: 2px solid var(--primary-color) !important;
281
  }
282
 
283
  /* Card styling */
 
286
  border: 1px solid var(--border-color) !important;
287
  box-shadow: 0 4px 12px var(--shadow-color) !important;
288
  overflow: hidden !important;
289
+ background-color: var(--card-background) !important;
290
  }
291
 
292
  /* Table styling */
 
298
  border-radius: 8px !important;
299
  overflow: hidden !important;
300
  box-shadow: 0 4px 12px var(--shadow-color) !important;
301
+ background-color: var(--card-background);
302
  }
303
 
304
  th {
 
316
  border-bottom: 1px solid var(--border-color) !important;
317
  font-size: 1rem !important;
318
  vertical-align: middle !important;
319
+ background-color: var(--card-background);
320
  }
321
 
322
+ tr:last-child td {
323
+ border-bottom: none !important;
324
  }
325
 
326
+ tr:nth-child(even) td {
327
+ background-color: #f8fafd !important;
328
  }
329
 
330
+ tr:hover td {
331
+ background-color: #edf2fb !important;
332
  }
333
 
334
  /* Button styling */
 
370
  display: flex !important;
371
  align-items: center !important;
372
  gap: 8px !important;
373
+ color: var(--text-color) !important;
374
  }
375
 
376
  .gr-radio label:hover {
 
392
  border-radius: 8px !important;
393
  border: 1px solid var(--border-color) !important;
394
  transition: all 0.2s ease !important;
395
+ background-color: #ffffff !important;
396
+ color: var(--text-color) !important;
397
  }
398
 
399
  input:focus, textarea:focus, select:focus {
 
408
  overflow: hidden !important;
409
  margin: 1rem 0 !important;
410
  border: 1px solid var(--border-color) !important;
411
+ background-color: var(--card-background);
412
  }
413
 
414
  .gr-accordion-header {
 
417
  font-weight: 600 !important;
418
  font-size: 1.1rem !important;
419
  color: var(--text-color) !important;
420
+ border-bottom: 1px solid var(--border-color) !important;
421
  }
422
 
423
  .gr-accordion-content {
424
  padding: 1rem !important;
425
+ background-color: var(--card-background) !important;
426
  }
427
 
428
  /* Markdown text improvements */
429
  .markdown-text {
430
  font-size: 1.05rem !important;
431
  line-height: 1.7 !important;
432
+ color: var(--text-color) !important;
433
  }
434
 
435
  .markdown-text p {
 
447
 
448
  .markdown-text strong {
449
  font-weight: 600 !important;
450
+ color: #111 !important;
451
  }
452
 
453
  /* Status indicators */
 
481
  /* Footer */
482
  .footer {
483
  margin-top: 2rem;
484
+ padding: 1.5rem 1rem;
485
  text-align: center;
486
  font-size: 0.9rem;
487
+ color: #555;
488
  border-top: 1px solid var(--border-color);
489
+ background-color: #e9edf1;
490
  }
491
 
492
+ /* Enhanced leaderboard title area */
493
  .leaderboard-header {
494
  display: flex;
495
  align-items: center;
496
  justify-content: space-between;
497
  margin-bottom: 1.5rem;
498
+ padding: 1.5rem;
499
+ background-color: var(--card-background);
500
+ border-radius: 12px;
501
+ border: 1px solid var(--border-color);
502
+ box-shadow: 0 4px 12px var(--shadow-color);
503
  }
504
 
505
  .leaderboard-title {
 
522
  font-size: 0.85rem;
523
  color: #666;
524
  font-style: italic;
525
+ background-color: #f5f7fa;
526
+ padding: 5px 10px;
527
+ border-radius: 6px;
528
  }
529
 
530
  /* Category selector buttons */
 
580
  color: #cd7f32;
581
  font-weight: bold;
582
  }
583
+
584
+ /* Style for About section cards */
585
+ .about-card {
586
+ background-color: #f5f7fa;
587
+ padding: 20px;
588
+ border-radius: 12px;
589
+ height: 100%;
590
+ border: 1px solid var(--border-color);
591
+ }
592
+ .about-card h3 {
593
+ text-align: center;
594
+ margin-top: 0;
595
+ color: var(--primary-color);
596
+ }
597
+ .about-card p {
598
+ color: var(--text-color);
599
+ font-size: 0.95rem;
600
+ line-height: 1.6;
601
+ }
602
+ .about-card-icon {
603
+ font-size: 2.5em;
604
+ text-align: center;
605
+ margin-bottom: 15px;
606
+ display: block;
607
+ }
608
  """
609
 
610
  # Combine with any existing CSS
611
+ custom_css = enhanced_css
612
 
613
  # --- Gradio App Definition ---
614
  demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft())
 
632
  """)
633
 
634
  # Introduction with enhanced styling
635
+ with gr.Blocks():
636
+ gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
637
 
638
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
639
  with gr.TabItem("πŸ“Š Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
640
  with gr.Column():
641
+ with gr.Blocks():
642
+ gr.HTML("""
643
+ <h2 style="display: flex; align-items: center; gap: 10px; margin-bottom: 0.5rem;">
644
+ <span style="font-size: 1.3em;">πŸ“ˆ</span> Model Performance Rankings
645
+ </h2>
646
+ <p class="leaderboard-subtitle" style="margin-top: 0;">Select a category to view specialized performance metrics</p>
647
+ """)
648
 
649
  # Enhanced category selector
650
  category_selector = gr.Radio(
 
652
  label="Select Performance Domain:",
653
  value="πŸ† Overall",
654
  interactive=True,
655
+ elem_classes="gr-radio"
656
  )
657
 
658
  # Visual separator
 
671
  )
672
 
673
  # Stats cards (visual enhancement)
674
+ with gr.Blocks():
675
+ with gr.Row(equal_height=True):
676
+ with gr.Column(scale=1):
677
+ gr.HTML(f"""
678
+ <div class="about-card" style="text-align: center;">
679
+ <div class="about-card-icon">πŸ”</div>
680
+ <div style="font-size: 2em; font-weight: bold; color: #1a5fb4;">{len(master_df)}</div>
681
+ <div style="font-size: 1.1em; color: #666;">Models Evaluated</div>
682
+ </div>
683
+ """)
684
+ with gr.Column(scale=1):
685
+ gr.HTML(f"""
686
+ <div class="about-card" style="text-align: center;">
687
+ <div class="about-card-icon">🌐</div>
688
+ <div style="font-size: 2em; font-weight: bold; color: #00875a;">{master_df['organizer'].nunique()}</div>
689
+ <div style="font-size: 1.1em; color: #666;">Organizations</div>
690
+ </div>
691
+ """)
692
+ with gr.Column(scale=1):
693
+ gr.HTML(f"""
694
+ <div class="about-card" style="text-align: center;">
695
+ <div class="about-card-icon">πŸ…</div>
696
+ <div style="font-size: 2em; font-weight: bold; color: #b58a00;">{len(CATEGORIES)}</div>
697
+ <div style="font-size: 1.1em; color: #666;">Performance Domains</div>
698
+ </div>
699
+ """)
700
 
701
  # Link the radio button change to the update function
702
  category_selector.change(
 
706
  )
707
 
708
  with gr.TabItem("πŸ“š About", elem_id="llm-benchmark-tab-about", id=1):
709
+ with gr.Blocks():
710
+ # Enhanced about section header
711
+ gr.HTML("""
712
+ <div class="about-header" style="display: flex; align-items: center; gap: 20px; margin-bottom: 20px;">
713
+ <div style="font-size: 4em;">πŸ§ͺ</div>
714
+ <div>
715
+ <h2 style="margin: 0;">About the MLE-Dojo Benchmark</h2>
716
+ <p style="margin: 5px 0 0 0; color: #666;">A comprehensive evaluation framework for AI models</p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
717
  </div>
718
+ </div>
719
+ """)
720
+
721
+ # Use the LLM_BENCHMARKS_TEXT variable
722
+ gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
723
+
724
+ # Add methodology cards for visual enhancement
725
+ with gr.Row(equal_height=True):
726
+ with gr.Column():
727
+ gr.HTML("""
728
+ <div class="about-card">
729
+ <div class="about-card-icon">πŸ’‘</div>
730
+ <h3>MLE-Lite</h3>
731
+ <p>Evaluates a model's ability to handle basic machine learning engineering tasks including
732
+ data preprocessing, feature engineering, model selection, and basic deployment.</p>
733
+ </div>
734
+ """)
735
+ with gr.Column():
736
+ gr.HTML("""
737
+ <div class="about-card">
738
+ <div class="about-card-icon">πŸ“Š</div>
739
+ <h3>Tabular</h3>
740
+ <p>Tests a model's ability to process, analyze and model structured data, including
741
+ statistical analysis, predictive modeling, and data visualization with tabular datasets.</p>
742
+ </div>
743
+ """)
744
+
745
+ with gr.Row(equal_height=True):
746
+ with gr.Column():
747
+ gr.HTML("""
748
+ <div class="about-card">
749
+ <div class="about-card-icon">πŸ“</div>
750
+ <h3>NLP</h3>
751
+ <p>Evaluates natural language processing capabilities including text classification,
752
+ sentiment analysis, entity recognition, text generation, and language understanding.</p>
753
+ </div>
754
+ """)
755
+ with gr.Column():
756
+ gr.HTML("""
757
+ <div class="about-card">
758
+ <div class="about-card-icon">πŸ‘οΈ</div>
759
+ <h3>CV</h3>
760
+ <p>Tests computer vision capabilities including image classification, object detection,
761
+ image generation, and visual understanding tasks across various domains.</p>
762
+ </div>
763
+ """)
764
 
765
  # Optional: Uncomment if you want to re-enable the Submit tab
766
  # with gr.TabItem("πŸš€ Submit Model", elem_id="llm-benchmark-tab-submit", id=2):
767
+ # with gr.Blocks():
768
+ # with gr.Column():
769
+ # gr.HTML("""
770
+ # <div class="about-header" style="display: flex; align-items: center; gap: 20px; margin-bottom: 20px;">
771
+ # <div style="font-size: 4em;">πŸš€</div>
772
+ # <div>
773
+ # <h2 style="margin: 0;">Submit Your Model for Evaluation</h2>
774
+ # <p style="margin: 5px 0 0 0; color: #666;">Add your model to the MLE-Dojo leaderboard</p>
775
+ # </div>
776
  # </div>
777
+ # """)
 
778
  #
779
+ # with gr.Row():
780
+ # gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
781
+ #
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
782
  # with gr.Column():
783
+ # with gr.Accordion(f"βœ… Finished Evaluations ({len(finished_eval_queue_df)})", open=False):
784
+ # finished_eval_table = gr.components.Dataframe(
785
+ # value=finished_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
786
+ # )
787
+ # with gr.Accordion(f"πŸ”„ Running Evaluation Queue ({len(running_eval_queue_df)})", open=False):
788
+ # running_eval_table = gr.components.Dataframe(
789
+ # value=running_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
790
+ # )
791
+ # with gr.Accordion(f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})", open=False):
792
+ # pending_eval_table = gr.components.Dataframe(
793
+ # value=pending_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
794
+ # )
795
+ #
796
+ # gr.HTML('<div style="height: 1px; background-color: #e0e0e0; margin: 20px 0;"></div>')
797
+ #
798
+ # gr.HTML("""
799
+ # <h2 style="display: flex; align-items: center; gap: 10px;">
800
+ # <span style="font-size: 1.3em;">πŸ“</span> Model Submission Form
801
+ # </h2>
802
+ # """)
803
+ #
804
+ # with gr.Row():
805
+ # with gr.Column():
806
+ # model_name_textbox = gr.Textbox(
807
+ # label="Model Name (on Hugging Face Hub)",
808
+ # placeholder="Enter your model name...",
809
+ # elem_classes="enhanced-input"
810
+ # )
811
+ # revision_name_textbox = gr.Textbox(
812
+ # label="Revision / Commit Hash",
813
+ # placeholder="main",
814
+ # elem_classes="enhanced-input"
815
+ # )
816
+ # model_type = gr.Dropdown(
817
+ # choices=["Type A", "Type B", "Type C"],
818
+ # label="Model Type",
819
+ # multiselect=False,
820
+ # value=None,
821
+ # interactive=True,
822
+ # elem_classes="enhanced-dropdown"
823
+ # )
824
+ # with gr.Column():
825
+ # precision = gr.Dropdown(
826
+ # choices=["float16", "bfloat16", "float32", "int8", "auto"],
827
+ # label="Precision",
828
+ # multiselect=False,
829
+ # value="auto",
830
+ # interactive=True,
831
+ # elem_classes="enhanced-dropdown"
832
+ # )
833
+ # weight_type = gr.Dropdown(
834
+ # choices=["Original", "Adapter", "Delta"],
835
+ # label="Weights Type",
836
+ # multiselect=False,
837
+ # value="Original",
838
+ # interactive=True,
839
+ # elem_classes="enhanced-dropdown"
840
+ # )
841
+ # base_model_name_textbox = gr.Textbox(
842
+ # label="Base Model (for delta or adapter weights)",
843
+ # placeholder="Only needed for adapter/delta weights",
844
+ # elem_classes="enhanced-input"
845
+ # )
846
+ #
847
  # submit_button = gr.Button(
848
  # "Submit for Evaluation",
849
+ # elem_classes="primary"
850
  # )
851
  # submission_result = gr.Markdown()
852
  # submit_button.click(
 
856
  # )
857
 
858
  # Enhanced citation section
859
+ with gr.Blocks():
860
+ with gr.Accordion("πŸ“„ Citation", open=False, elem_classes="citation-accordion"):
861
+ gr.HTML("""
862
+ <div style="display: flex; align-items: center; gap: 20px; margin-bottom: 15px;">
863
+ <div style="font-size: 2.5em;">πŸ“„</div>
864
+ <div>
865
+ <h3 style="margin: 0;">How to Cite This Benchmark</h3>
866
+ <p style="margin: 5px 0 0 0; color: #666;">Please use the following citation if you use this benchmark in your research</p>
867
+ </div>
868
  </div>
869
+ """)
870
+
871
+ citation_button = gr.Textbox(
872
+ value=CITATION_BUTTON_TEXT,
873
+ label=CITATION_BUTTON_LABEL,
874
+ lines=10,
875
+ elem_id="citation-button",
876
+ show_copy_button=True,
877
+ )
 
878
 
879
  # Footer
880
  gr.HTML("""