Aaron Mueller commited on
Commit
44212b3
·
1 Parent(s): 7086c8a

dynamic averages

Browse files
Files changed (1) hide show
  1. app.py +138 -24
app.py CHANGED
@@ -3,6 +3,7 @@ import gzip
3
  import gradio as gr
4
  from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
5
  import pandas as pd
 
6
  from apscheduler.schedulers.background import BackgroundScheduler
7
  from huggingface_hub import snapshot_download
8
  from io import StringIO
@@ -416,6 +417,18 @@ def init_leaderboard_mib_causalgraph(dataframe, track):
416
  # print(dataframe)
417
  renamed_df = dataframe.rename(columns=display_mapping)
418
 
 
 
 
 
 
 
 
 
 
 
 
 
419
  # Create only necessary columns
420
  return Leaderboard(
421
  value=renamed_df,
@@ -476,28 +489,81 @@ def process_json(temp_file):
476
 
477
  # Define the preset substrings for filtering
478
  PRESET_SUBSTRINGS = ["IOI", "MCQA", "Arithmetic", "ARC", "GPT-2", "Qwen-2.5", "Gemma-2", "Llama-3.1"]
 
 
479
 
480
- def filter_columns_by_substrings(dataframe: pd.DataFrame, selected_substrings: List[str]) -> pd.DataFrame:
 
481
  """
482
  Filter columns based on the selected substrings.
483
  """
484
  original_dataframe = deepcopy(dataframe)
485
- if not selected_substrings:
486
  return dataframe # No filtering if no substrings are selected
487
 
488
- # Filter columns that contain any of the selected substrings
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
489
  filtered_columns = [
490
  col for col in dataframe.columns
491
- if any(sub.lower() in col.lower() for sub in selected_substrings)
492
  or col == "Method"
493
  ]
 
 
 
 
 
 
494
  return dataframe[filtered_columns]
495
 
496
- def update_leaderboard(dataframe: pd.DataFrame, selected_substrings: List[str]):
 
497
  """
498
  Update the leaderboard based on the selected substrings.
499
  """
500
- filtered_dataframe = filter_columns_by_substrings(dataframe, selected_substrings)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
501
  return filtered_dataframe
502
 
503
  demo = gr.Blocks(css=custom_css)
@@ -533,17 +599,33 @@ with demo:
533
  You can combine filters to see specific task-model combinations.
534
  """)
535
  # CheckboxGroup for selecting substrings
536
- substring_checkbox = gr.CheckboxGroup(
537
- choices=PRESET_SUBSTRINGS,
538
- label="Filter results:",
539
- value=PRESET_SUBSTRINGS, # Default to all substrings selected
 
 
 
 
 
 
 
 
 
 
540
  )
 
541
  leaderboard, data = init_leaderboard_mib_subgraph(LEADERBOARD_DF_MIB_SUBGRAPH_FPL, "Subgraph")
542
  original_leaderboard = gr.State(value=data)
543
  # Update the leaderboard when the user selects/deselects substrings
544
- substring_checkbox.change(
545
  fn=update_leaderboard,
546
- inputs=[original_leaderboard, substring_checkbox],
 
 
 
 
 
547
  outputs=leaderboard
548
  )
549
  print(f"Leaderboard is {leaderboard}")
@@ -555,17 +637,33 @@ with demo:
555
  You can combine filters to see specific task-model combinations.
556
  """)
557
  # CheckboxGroup for selecting substrings
558
- substring_checkbox = gr.CheckboxGroup(
559
- choices=PRESET_SUBSTRINGS,
560
- label="Filter results:",
561
- value=PRESET_SUBSTRINGS, # Default to all substrings selected
 
 
 
 
 
 
 
 
 
 
562
  )
 
563
  leaderboard, data = init_leaderboard_mib_subgraph(LEADERBOARD_DF_MIB_SUBGRAPH_FEQ, "Subgraph")
564
  original_leaderboard = gr.State(value=data)
565
  # Update the leaderboard when the user selects/deselects substrings
566
- substring_checkbox.change(
567
  fn=update_leaderboard,
568
- inputs=[original_leaderboard, substring_checkbox],
 
 
 
 
 
569
  outputs=leaderboard
570
  )
571
  print(f"Leaderboard is {leaderboard}")
@@ -584,19 +682,35 @@ with demo:
584
  Use the dropdown menus below to filter results by specific tasks or models.
585
  You can combine filters to see specific task-model combinations.
586
  """)
587
- substring_checkbox = gr.CheckboxGroup(
588
- choices=PRESET_SUBSTRINGS,
589
- label="Filter results:",
590
- value=PRESET_SUBSTRINGS, # Default to all substrings selected
 
 
 
 
 
 
 
 
 
 
591
  )
 
592
  leaderboard_aggregated, data = init_leaderboard_mib_causalgraph(
593
  LEADERBOARD_DF_MIB_CAUSALGRAPH_AGGREGATED,
594
  "Causal Graph"
595
  )
596
  original_leaderboard = gr.State(value=data)
597
- substring_checkbox.change(
 
 
 
 
 
598
  fn=update_leaderboard,
599
- inputs=[original_leaderboard, substring_checkbox],
600
  outputs=leaderboard_aggregated
601
  )
602
  with gr.TabItem("Intervention Averaged", id=2):
 
3
  import gradio as gr
4
  from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
5
  import pandas as pd
6
+ import numpy as np
7
  from apscheduler.schedulers.background import BackgroundScheduler
8
  from huggingface_hub import snapshot_download
9
  from io import StringIO
 
417
  # print(dataframe)
418
  renamed_df = dataframe.rename(columns=display_mapping)
419
 
420
+ # idx_to_method = {0: "Full Vector", 1: "DAS", 2: "DBM", 3: "PCA", 4: "SAE"}
421
+ # idx_to_scores = {0: [0.38, 0.36, 0.38, 0.42],
422
+ # 1: [0.56, 0.62, 0.54, 0.51],
423
+ # 2: [0.43, 0.41, 0.53, 0.49],
424
+ # 3: [0.26, 0.20, 0.32, 0.40],
425
+ # 4: ["-", "-", 0.33, "-"]}
426
+ # renamed_df.loc[0]["Method"] = "Full Vector"
427
+ # for i in range(5):
428
+ # renamed_df.loc[i] = [idx_to_method[i]] + idx_to_scores[i]
429
+
430
+ print(renamed_df)
431
+
432
  # Create only necessary columns
433
  return Leaderboard(
434
  value=renamed_df,
 
489
 
490
  # Define the preset substrings for filtering
491
  PRESET_SUBSTRINGS = ["IOI", "MCQA", "Arithmetic", "ARC", "GPT-2", "Qwen-2.5", "Gemma-2", "Llama-3.1"]
492
+ TASK_SUBSTRINGS = ["IOI", "MCQA", "Arithmetic", "ARC"]
493
+ MODEL_SUBSTRINGS = ["GPT-2", "Qwen-2.5", "Gemma-2", "Llama-3.1"]
494
 
495
+ def filter_columns_by_substrings(dataframe: pd.DataFrame, selected_task_substrings: List[str],
496
+ selected_model_substrings: List[str]) -> pd.DataFrame:
497
  """
498
  Filter columns based on the selected substrings.
499
  """
500
  original_dataframe = deepcopy(dataframe)
501
+ if not selected_task_substrings and not selected_model_substrings:
502
  return dataframe # No filtering if no substrings are selected
503
 
504
+ if not selected_task_substrings:
505
+ # Filter columns that contain any of the selected model substrings
506
+ filtered_columns = [
507
+ col for col in dataframe.columns
508
+ if any(sub.lower() in col.lower() for sub in selected_model_substrings)
509
+ or col == "Method"
510
+ ]
511
+ return dataframe[filtered_columns]
512
+ elif not selected_model_substrings:
513
+ # Filter columns that contain any of the selected task substrings
514
+ filtered_columns = [
515
+ col for col in dataframe.columns
516
+ if any(sub.lower() in col.lower() for sub in selected_task_substrings)
517
+ or col == "Method"
518
+ ]
519
+ return dataframe[filtered_columns]
520
+
521
+ # Filter columns by task first. Use AND logic to combine with model filtering
522
  filtered_columns = [
523
  col for col in dataframe.columns
524
+ if any(sub.lower() in col.lower() for sub in selected_task_substrings)
525
  or col == "Method"
526
  ]
527
+ filtered_columns = [
528
+ col for col in dataframe[filtered_columns].columns
529
+ if any(sub.lower() in col.lower() for sub in selected_model_substrings)
530
+ or col == "Method"
531
+ ]
532
+
533
  return dataframe[filtered_columns]
534
 
535
+ def update_leaderboard(dataframe: pd.DataFrame, selected_task_substrings: List[str],
536
+ selected_model_substrings: List[str]):
537
  """
538
  Update the leaderboard based on the selected substrings.
539
  """
540
+ filtered_dataframe = filter_columns_by_substrings(dataframe, selected_task_substrings, selected_model_substrings)
541
+ if len(selected_task_substrings) >= 2 or len(selected_task_substrings) == 0:
542
+ if len(selected_model_substrings) >= 2 or len(selected_model_substrings) == 0:
543
+ show_average = True
544
+ else:
545
+ show_average = False
546
+ else:
547
+ show_average = False
548
+
549
+ if show_average:
550
+ means = filtered_dataframe.replace("-", float("nan")).mean(axis=1, skipna=False)
551
+ filtered_dataframe["Average"] = means.round(2)
552
+ filtered_dataframe = filtered_dataframe.sort_values(by=["Average"], ascending=False, na_position='last')
553
+ filtered_dataframe = filtered_dataframe.replace(float("nan"), "-")
554
+
555
+
556
+ # if show_average:
557
+ # print([row for index, row in filtered_dataframe.iterrows()])
558
+ # filtered_dataframe["Average"] = [round(np.mean(row.values()), 2) if "-" not in row.values() else "-" for index, row in filtered_dataframe.iterrows()]
559
+ # # Sort by Average score descending
560
+ # if 'Average' in dataframe.columns:
561
+ # # Convert '-' to NaN for sorting purposes
562
+ # df['Average'] = pd.to_numeric(['Average'], errors='coerce')
563
+ # df = df.sort_values(by=['Average'], ascending=True, na_position='last')
564
+ # # Convert NaN back to '-'
565
+ # df['Average'] = df['Average'].fillna('-')
566
+
567
  return filtered_dataframe
568
 
569
  demo = gr.Blocks(css=custom_css)
 
599
  You can combine filters to see specific task-model combinations.
600
  """)
601
  # CheckboxGroup for selecting substrings
602
+ # substring_checkbox = gr.CheckboxGroup(
603
+ # choices=PRESET_SUBSTRINGS,
604
+ # label="Filter results:",
605
+ # value=PRESET_SUBSTRINGS, # Default to all substrings selected
606
+ # )
607
+ task_substring_checkbox = gr.CheckboxGroup(
608
+ choices=TASK_SUBSTRINGS,
609
+ label="View tasks:",
610
+ value=TASK_SUBSTRINGS, # Default to all substrings selected
611
+ )
612
+ model_substring_checkbox = gr.CheckboxGroup(
613
+ choices = MODEL_SUBSTRINGS,
614
+ label = "View models:",
615
+ value = MODEL_SUBSTRINGS
616
  )
617
+
618
  leaderboard, data = init_leaderboard_mib_subgraph(LEADERBOARD_DF_MIB_SUBGRAPH_FPL, "Subgraph")
619
  original_leaderboard = gr.State(value=data)
620
  # Update the leaderboard when the user selects/deselects substrings
621
+ task_substring_checkbox.change(
622
  fn=update_leaderboard,
623
+ inputs=[original_leaderboard, task_substring_checkbox, model_substring_checkbox],
624
+ outputs=leaderboard
625
+ )
626
+ model_substring_checkbox.change(
627
+ fn=update_leaderboard,
628
+ inputs=[original_leaderboard, task_substring_checkbox, model_substring_checkbox],
629
  outputs=leaderboard
630
  )
631
  print(f"Leaderboard is {leaderboard}")
 
637
  You can combine filters to see specific task-model combinations.
638
  """)
639
  # CheckboxGroup for selecting substrings
640
+ # substring_checkbox = gr.CheckboxGroup(
641
+ # choices=PRESET_SUBSTRINGS,
642
+ # label="Filter results:",
643
+ # value=PRESET_SUBSTRINGS, # Default to all substrings selected
644
+ # )
645
+ task_substring_checkbox = gr.CheckboxGroup(
646
+ choices=TASK_SUBSTRINGS,
647
+ label="View tasks:",
648
+ value=TASK_SUBSTRINGS, # Default to all substrings selected
649
+ )
650
+ model_substring_checkbox = gr.CheckboxGroup(
651
+ choices = MODEL_SUBSTRINGS,
652
+ label = "View models:",
653
+ value = MODEL_SUBSTRINGS
654
  )
655
+
656
  leaderboard, data = init_leaderboard_mib_subgraph(LEADERBOARD_DF_MIB_SUBGRAPH_FEQ, "Subgraph")
657
  original_leaderboard = gr.State(value=data)
658
  # Update the leaderboard when the user selects/deselects substrings
659
+ task_substring_checkbox.change(
660
  fn=update_leaderboard,
661
+ inputs=[original_leaderboard, task_substring_checkbox, model_substring_checkbox],
662
+ outputs=leaderboard
663
+ )
664
+ model_substring_checkbox.change(
665
+ fn=update_leaderboard,
666
+ inputs=[original_leaderboard, task_substring_checkbox, model_substring_checkbox],
667
  outputs=leaderboard
668
  )
669
  print(f"Leaderboard is {leaderboard}")
 
682
  Use the dropdown menus below to filter results by specific tasks or models.
683
  You can combine filters to see specific task-model combinations.
684
  """)
685
+ # substring_checkbox = gr.CheckboxGroup(
686
+ # choices=PRESET_SUBSTRINGS,
687
+ # label="Filter results:",
688
+ # value=PRESET_SUBSTRINGS, # Default to all substrings selected
689
+ # )
690
+ task_substring_checkbox = gr.CheckboxGroup(
691
+ choices=TASK_SUBSTRINGS,
692
+ label="View tasks:",
693
+ value=TASK_SUBSTRINGS, # Default to all substrings selected
694
+ )
695
+ model_substring_checkbox = gr.CheckboxGroup(
696
+ choices = MODEL_SUBSTRINGS,
697
+ label = "View models:",
698
+ value = MODEL_SUBSTRINGS
699
  )
700
+
701
  leaderboard_aggregated, data = init_leaderboard_mib_causalgraph(
702
  LEADERBOARD_DF_MIB_CAUSALGRAPH_AGGREGATED,
703
  "Causal Graph"
704
  )
705
  original_leaderboard = gr.State(value=data)
706
+ task_substring_checkbox.change(
707
+ fn=update_leaderboard,
708
+ inputs=[original_leaderboard, task_substring_checkbox, model_substring_checkbox],
709
+ outputs=leaderboard_aggregated
710
+ )
711
+ model_substring_checkbox.change(
712
  fn=update_leaderboard,
713
+ inputs=[original_leaderboard, task_substring_checkbox, model_substring_checkbox],
714
  outputs=leaderboard_aggregated
715
  )
716
  with gr.TabItem("Intervention Averaged", id=2):