Aaron Mueller commited on
Commit
7086c8a
·
1 Parent(s): 3a309c3

formatting/filtering for causal variable

Browse files
app.py CHANGED
@@ -38,7 +38,7 @@ from src.populate import get_evaluation_queue_df, get_leaderboard_df, get_leader
38
  from src.submission.submit import add_new_eval
39
 
40
 
41
- from src.about import TasksMib_Subgraph
42
 
43
  # class SmartSelectColumns(SelectColumns):
44
  # """
@@ -389,21 +389,47 @@ def init_leaderboard_mib_causalgraph(dataframe, track):
389
  # print("Debugging column issues:")
390
  # print("\nActual DataFrame columns:")
391
  # print(dataframe.columns.tolist())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
392
 
393
  # Create only necessary columns
394
  return Leaderboard(
395
- value=dataframe,
396
  datatype=[c.type for c in fields(AutoEvalColumn_mib_causalgraph)],
397
- select_columns=SelectColumns(
398
- default_selection=["Method"], # Start with just Method column
399
- cant_deselect=["Method"], # Method column should always be visible
400
- label="Select Columns to Display:",
401
- ),
402
  search_columns=["Method"],
403
- hide_columns=[],
404
  bool_checkboxgroup_label="Hide models",
405
  interactive=False,
406
- )
407
 
408
 
409
  def init_leaderboard(dataframe, track):
@@ -497,7 +523,7 @@ with demo:
497
 
498
  # with gr.TabItem("Subgraph", elem_id="subgraph", id=0):
499
  # leaderboard = init_leaderboard_mib_subgraph(LEADERBOARD_DF_MIB_SUBGRAPH, "Subgraph")
500
- with gr.TabItem("Subgraph", elem_id="subgraph", id=0):
501
  with gr.Tabs() as subgraph_tabs:
502
  with gr.TabItem("F+", id=0):
503
  # Add description for filters
@@ -545,20 +571,36 @@ with demo:
545
  print(f"Leaderboard is {leaderboard}")
546
 
547
  # Then modify the Causal Graph tab section
548
- with gr.TabItem("Causal Graph", elem_id="causalgraph", id=1):
549
  with gr.Tabs() as causalgraph_tabs:
550
  with gr.TabItem("Detailed View", id=0):
551
- leaderboard_detailed = init_leaderboard_mib_causalgraph(
552
  LEADERBOARD_DF_MIB_CAUSALGRAPH_DETAILED,
553
  "Causal Graph"
554
  )
555
  with gr.TabItem("Aggregated View", id=1):
556
- leaderboard_aggregated = init_leaderboard_mib_causalgraph(
 
 
 
 
 
 
 
 
 
 
557
  LEADERBOARD_DF_MIB_CAUSALGRAPH_AGGREGATED,
558
  "Causal Graph"
559
  )
 
 
 
 
 
 
560
  with gr.TabItem("Intervention Averaged", id=2):
561
- leaderboard_averaged = init_leaderboard_mib_causalgraph(
562
  LEADERBOARD_DF_MIB_CAUSALGRAPH_AVERAGED,
563
  "Causal Graph"
564
  )
 
38
  from src.submission.submit import add_new_eval
39
 
40
 
41
+ from src.about import TasksMib_Subgraph, TasksMib_Causalgraph
42
 
43
  # class SmartSelectColumns(SelectColumns):
44
  # """
 
389
  # print("Debugging column issues:")
390
  # print("\nActual DataFrame columns:")
391
  # print(dataframe.columns.tolist())
392
+
393
+ model_name_mapping = {
394
+ "Qwen2ForCausalLM": "Qwen-2.5",
395
+ "GPT2ForCausalLM": "GPT-2",
396
+ "Gemma2ForCausalLM": "Gemma-2",
397
+ "LlamaForCausalLM": "Llama-3.1"
398
+ }
399
+
400
+ benchmark_mapping = {
401
+ "IOI": "IOI",
402
+ "MCQA": "MCQA",
403
+ "arithmetic_addition": "Arithmetic (+)",
404
+ "arithmetic_subtraction": "Arithmetic (-)",
405
+ "arc_easy": "ARC (Easy)",
406
+ "arc_challenge": "ARC (Challenge)"
407
+ }
408
+
409
+ display_mapping = {}
410
+ for task in TasksMib_Causalgraph:
411
+ for model in task.value.models:
412
+ field_name = f"{task.value.col_name}_{model}"
413
+ display_name = f"{benchmark_mapping[task.value.col_name]} - {model_name_mapping[model]}"
414
+ display_mapping[field_name] = display_name
415
+
416
+ # print(dataframe)
417
+ renamed_df = dataframe.rename(columns=display_mapping)
418
 
419
  # Create only necessary columns
420
  return Leaderboard(
421
+ value=renamed_df,
422
  datatype=[c.type for c in fields(AutoEvalColumn_mib_causalgraph)],
423
+ # select_columns=SelectColumns(
424
+ # default_selection=["Method"], # Start with just Method column
425
+ # cant_deselect=["Method"], # Method column should always be visible
426
+ # label="Select Columns to Display:",
427
+ # ),
428
  search_columns=["Method"],
429
+ hide_columns=["eval_name"],
430
  bool_checkboxgroup_label="Hide models",
431
  interactive=False,
432
+ ), renamed_df
433
 
434
 
435
  def init_leaderboard(dataframe, track):
 
523
 
524
  # with gr.TabItem("Subgraph", elem_id="subgraph", id=0):
525
  # leaderboard = init_leaderboard_mib_subgraph(LEADERBOARD_DF_MIB_SUBGRAPH, "Subgraph")
526
+ with gr.TabItem("Circuit Localization", elem_id="subgraph", id=0):
527
  with gr.Tabs() as subgraph_tabs:
528
  with gr.TabItem("F+", id=0):
529
  # Add description for filters
 
571
  print(f"Leaderboard is {leaderboard}")
572
 
573
  # Then modify the Causal Graph tab section
574
+ with gr.TabItem("Causal Variable Localization", elem_id="causalgraph", id=1):
575
  with gr.Tabs() as causalgraph_tabs:
576
  with gr.TabItem("Detailed View", id=0):
577
+ leaderboard_detailed, data = init_leaderboard_mib_causalgraph(
578
  LEADERBOARD_DF_MIB_CAUSALGRAPH_DETAILED,
579
  "Causal Graph"
580
  )
581
  with gr.TabItem("Aggregated View", id=1):
582
+ gr.Markdown("""
583
+ ### Filtering Options
584
+ Use the dropdown menus below to filter results by specific tasks or models.
585
+ You can combine filters to see specific task-model combinations.
586
+ """)
587
+ substring_checkbox = gr.CheckboxGroup(
588
+ choices=PRESET_SUBSTRINGS,
589
+ label="Filter results:",
590
+ value=PRESET_SUBSTRINGS, # Default to all substrings selected
591
+ )
592
+ leaderboard_aggregated, data = init_leaderboard_mib_causalgraph(
593
  LEADERBOARD_DF_MIB_CAUSALGRAPH_AGGREGATED,
594
  "Causal Graph"
595
  )
596
+ original_leaderboard = gr.State(value=data)
597
+ substring_checkbox.change(
598
+ fn=update_leaderboard,
599
+ inputs=[original_leaderboard, substring_checkbox],
600
+ outputs=leaderboard_aggregated
601
+ )
602
  with gr.TabItem("Intervention Averaged", id=2):
603
+ leaderboard_averaged, data = init_leaderboard_mib_causalgraph(
604
  LEADERBOARD_DF_MIB_CAUSALGRAPH_AVERAGED,
605
  "Causal Graph"
606
  )
src/__pycache__/about.cpython-310.pyc CHANGED
Binary files a/src/__pycache__/about.cpython-310.pyc and b/src/__pycache__/about.cpython-310.pyc differ
 
src/__pycache__/populate.cpython-310.pyc CHANGED
Binary files a/src/__pycache__/populate.cpython-310.pyc and b/src/__pycache__/populate.cpython-310.pyc differ
 
src/about.py CHANGED
@@ -98,7 +98,10 @@ class TaskMIB_Causalgraph:
98
  metrics: list[str] # metrics to store (average_score)
99
 
100
  class TasksMib_Causalgraph(Enum):
101
- task0 = TaskMIB_Subgraph("mcqa", ["Qwen2ForCausalLM", "Gemma2ForCausalLM", "LlamaForCausalLM"], "MCQA", ["average_score"])
 
 
 
102
 
103
  @classmethod
104
  def get_all_tasks(cls):
 
98
  metrics: list[str] # metrics to store (average_score)
99
 
100
  class TasksMib_Causalgraph(Enum):
101
+ task0 = TaskMIB_Subgraph("ioi", ["GPT2ForCausalLM"], "IOI", ["average_score"])
102
+ task1 = TaskMIB_Subgraph("mcqa", ["Qwen2ForCausalLM", "Gemma2ForCausalLM", "LlamaForCausalLM"], "MCQA", ["average_score"])
103
+ task2 = TaskMIB_Subgraph("arithmetic_addition", ["Gemma2ForCausalLM", "LlamaForCausalLM"], "arithmetic_addition", ["average_score"])
104
+ task3 = TaskMIB_Subgraph("arc_easy", ["Gemma2ForCausalLM", "LlamaForCausalLM"], "arc_easy", ["average_score"])
105
 
106
  @classmethod
107
  def get_all_tasks(cls):
src/display/__pycache__/utils.cpython-310.pyc CHANGED
Binary files a/src/display/__pycache__/utils.cpython-310.pyc and b/src/display/__pycache__/utils.cpython-310.pyc differ
 
src/leaderboard/__pycache__/read_evals.cpython-310.pyc CHANGED
Binary files a/src/leaderboard/__pycache__/read_evals.cpython-310.pyc and b/src/leaderboard/__pycache__/read_evals.cpython-310.pyc differ