jasonshaoshun commited on
Commit
a2e0e8f
·
1 Parent(s): 2ba536b

Update causal graph page to display separate views for average and highest values

Browse files
Files changed (2) hide show
  1. app.py +35 -7
  2. src/leaderboard/read_evals.py +1 -1
app.py CHANGED
@@ -681,12 +681,12 @@ with demo:
681
  # Then modify the Causal Graph tab section
682
  with gr.TabItem("Causal Variable Localization", elem_id="causalgraph", id=1):
683
  with gr.Tabs() as causalgraph_tabs:
684
- with gr.TabItem("Detailed View", id=0):
685
- leaderboard_detailed, data = init_leaderboard_mib_causalgraph(
686
- LEADERBOARD_DF_MIB_CAUSALGRAPH_DETAILED,
687
- "Causal Graph"
688
- )
689
- with gr.TabItem("Aggregated View", id=1):
690
  gr.Markdown("""
691
  ### Filtering Options
692
  Use the dropdown menus below to filter results by specific tasks or models.
@@ -718,11 +718,39 @@ with demo:
718
  inputs=[original_leaderboard, task_substring_checkbox, model_substring_checkbox],
719
  outputs=leaderboard_aggregated
720
  )
721
- with gr.TabItem("Intervention Averaged", id=2):
 
 
 
 
 
 
 
 
 
 
 
 
722
  leaderboard_averaged, data = init_leaderboard_mib_causalgraph(
723
  LEADERBOARD_DF_MIB_CAUSALGRAPH_AVERAGED,
724
  "Causal Graph"
725
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
726
 
727
  with gr.TabItem("Submit", elem_id="llm-benchmark-tab-table", id=2):
728
  # Track selection
 
681
  # Then modify the Causal Graph tab section
682
  with gr.TabItem("Causal Variable Localization", elem_id="causalgraph", id=1):
683
  with gr.Tabs() as causalgraph_tabs:
684
+ # with gr.TabItem("Detailed View", id=0):
685
+ # leaderboard_detailed, data = init_leaderboard_mib_causalgraph(
686
+ # LEADERBOARD_DF_MIB_CAUSALGRAPH_DETAILED,
687
+ # "Causal Graph"
688
+ # )
689
+ with gr.TabItem("Highest View", id=0):
690
  gr.Markdown("""
691
  ### Filtering Options
692
  Use the dropdown menus below to filter results by specific tasks or models.
 
718
  inputs=[original_leaderboard, task_substring_checkbox, model_substring_checkbox],
719
  outputs=leaderboard_aggregated
720
  )
721
+ with gr.TabItem("Averaged View", id=1):
722
+
723
+ task_substring_checkbox = gr.CheckboxGroup(
724
+ choices=TASK_SUBSTRINGS,
725
+ label="View tasks:",
726
+ value=TASK_SUBSTRINGS, # Default to all substrings selected
727
+ )
728
+ model_substring_checkbox = gr.CheckboxGroup(
729
+ choices = MODEL_SUBSTRINGS,
730
+ label = "View models:",
731
+ value = MODEL_SUBSTRINGS
732
+ )
733
+
734
  leaderboard_averaged, data = init_leaderboard_mib_causalgraph(
735
  LEADERBOARD_DF_MIB_CAUSALGRAPH_AVERAGED,
736
  "Causal Graph"
737
  )
738
+ original_leaderboard = gr.State(value=data)
739
+ task_substring_checkbox.change(
740
+ fn=update_leaderboard,
741
+ inputs=[original_leaderboard, task_substring_checkbox, model_substring_checkbox],
742
+ outputs=leaderboard_averaged
743
+ )
744
+ model_substring_checkbox.change(
745
+ fn=update_leaderboard,
746
+ inputs=[original_leaderboard, task_substring_checkbox, model_substring_checkbox],
747
+ outputs=leaderboard_averaged
748
+ )
749
+
750
+ # leaderboard_averaged, data = init_leaderboard_mib_causalgraph(
751
+ # LEADERBOARD_DF_MIB_CAUSALGRAPH_AVERAGED,
752
+ # "Causal Graph"
753
+ # )
754
 
755
  with gr.TabItem("Submit", elem_id="llm-benchmark-tab-table", id=2):
756
  # Track selection
src/leaderboard/read_evals.py CHANGED
@@ -508,7 +508,7 @@ def get_raw_eval_results_mib_causalgraph(results_path: str) -> Tuple[pd.DataFram
508
  # intervention_averaged_mean_df = create_intervention_averaged_df(detailed_df_mean)
509
 
510
  # return detailed_df_highest, detailed_df_mean, intervention_averaged_highest_df
511
- return detailed_df_highest, detailed_df_mean, detailed_df_mean
512
 
513
 
514
 
 
508
  # intervention_averaged_mean_df = create_intervention_averaged_df(detailed_df_mean)
509
 
510
  # return detailed_df_highest, detailed_df_mean, intervention_averaged_highest_df
511
+ return detailed_df_highest, detailed_df_highest, detailed_df_mean
512
 
513
 
514