Spaces:
Restarting
Restarting
Aaron Mueller
commited on
Commit
·
7086c8a
1
Parent(s):
3a309c3
formatting/filtering for causal variable
Browse files
app.py
CHANGED
@@ -38,7 +38,7 @@ from src.populate import get_evaluation_queue_df, get_leaderboard_df, get_leader
|
|
38 |
from src.submission.submit import add_new_eval
|
39 |
|
40 |
|
41 |
-
from src.about import TasksMib_Subgraph
|
42 |
|
43 |
# class SmartSelectColumns(SelectColumns):
|
44 |
# """
|
@@ -389,21 +389,47 @@ def init_leaderboard_mib_causalgraph(dataframe, track):
|
|
389 |
# print("Debugging column issues:")
|
390 |
# print("\nActual DataFrame columns:")
|
391 |
# print(dataframe.columns.tolist())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
392 |
|
393 |
# Create only necessary columns
|
394 |
return Leaderboard(
|
395 |
-
value=
|
396 |
datatype=[c.type for c in fields(AutoEvalColumn_mib_causalgraph)],
|
397 |
-
select_columns=SelectColumns(
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
),
|
402 |
search_columns=["Method"],
|
403 |
-
hide_columns=[],
|
404 |
bool_checkboxgroup_label="Hide models",
|
405 |
interactive=False,
|
406 |
-
)
|
407 |
|
408 |
|
409 |
def init_leaderboard(dataframe, track):
|
@@ -497,7 +523,7 @@ with demo:
|
|
497 |
|
498 |
# with gr.TabItem("Subgraph", elem_id="subgraph", id=0):
|
499 |
# leaderboard = init_leaderboard_mib_subgraph(LEADERBOARD_DF_MIB_SUBGRAPH, "Subgraph")
|
500 |
-
with gr.TabItem("
|
501 |
with gr.Tabs() as subgraph_tabs:
|
502 |
with gr.TabItem("F+", id=0):
|
503 |
# Add description for filters
|
@@ -545,20 +571,36 @@ with demo:
|
|
545 |
print(f"Leaderboard is {leaderboard}")
|
546 |
|
547 |
# Then modify the Causal Graph tab section
|
548 |
-
with gr.TabItem("Causal
|
549 |
with gr.Tabs() as causalgraph_tabs:
|
550 |
with gr.TabItem("Detailed View", id=0):
|
551 |
-
leaderboard_detailed = init_leaderboard_mib_causalgraph(
|
552 |
LEADERBOARD_DF_MIB_CAUSALGRAPH_DETAILED,
|
553 |
"Causal Graph"
|
554 |
)
|
555 |
with gr.TabItem("Aggregated View", id=1):
|
556 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
557 |
LEADERBOARD_DF_MIB_CAUSALGRAPH_AGGREGATED,
|
558 |
"Causal Graph"
|
559 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
560 |
with gr.TabItem("Intervention Averaged", id=2):
|
561 |
-
leaderboard_averaged = init_leaderboard_mib_causalgraph(
|
562 |
LEADERBOARD_DF_MIB_CAUSALGRAPH_AVERAGED,
|
563 |
"Causal Graph"
|
564 |
)
|
|
|
38 |
from src.submission.submit import add_new_eval
|
39 |
|
40 |
|
41 |
+
from src.about import TasksMib_Subgraph, TasksMib_Causalgraph
|
42 |
|
43 |
# class SmartSelectColumns(SelectColumns):
|
44 |
# """
|
|
|
389 |
# print("Debugging column issues:")
|
390 |
# print("\nActual DataFrame columns:")
|
391 |
# print(dataframe.columns.tolist())
|
392 |
+
|
393 |
+
model_name_mapping = {
|
394 |
+
"Qwen2ForCausalLM": "Qwen-2.5",
|
395 |
+
"GPT2ForCausalLM": "GPT-2",
|
396 |
+
"Gemma2ForCausalLM": "Gemma-2",
|
397 |
+
"LlamaForCausalLM": "Llama-3.1"
|
398 |
+
}
|
399 |
+
|
400 |
+
benchmark_mapping = {
|
401 |
+
"IOI": "IOI",
|
402 |
+
"MCQA": "MCQA",
|
403 |
+
"arithmetic_addition": "Arithmetic (+)",
|
404 |
+
"arithmetic_subtraction": "Arithmetic (-)",
|
405 |
+
"arc_easy": "ARC (Easy)",
|
406 |
+
"arc_challenge": "ARC (Challenge)"
|
407 |
+
}
|
408 |
+
|
409 |
+
display_mapping = {}
|
410 |
+
for task in TasksMib_Causalgraph:
|
411 |
+
for model in task.value.models:
|
412 |
+
field_name = f"{task.value.col_name}_{model}"
|
413 |
+
display_name = f"{benchmark_mapping[task.value.col_name]} - {model_name_mapping[model]}"
|
414 |
+
display_mapping[field_name] = display_name
|
415 |
+
|
416 |
+
# print(dataframe)
|
417 |
+
renamed_df = dataframe.rename(columns=display_mapping)
|
418 |
|
419 |
# Create only necessary columns
|
420 |
return Leaderboard(
|
421 |
+
value=renamed_df,
|
422 |
datatype=[c.type for c in fields(AutoEvalColumn_mib_causalgraph)],
|
423 |
+
# select_columns=SelectColumns(
|
424 |
+
# default_selection=["Method"], # Start with just Method column
|
425 |
+
# cant_deselect=["Method"], # Method column should always be visible
|
426 |
+
# label="Select Columns to Display:",
|
427 |
+
# ),
|
428 |
search_columns=["Method"],
|
429 |
+
hide_columns=["eval_name"],
|
430 |
bool_checkboxgroup_label="Hide models",
|
431 |
interactive=False,
|
432 |
+
), renamed_df
|
433 |
|
434 |
|
435 |
def init_leaderboard(dataframe, track):
|
|
|
523 |
|
524 |
# with gr.TabItem("Subgraph", elem_id="subgraph", id=0):
|
525 |
# leaderboard = init_leaderboard_mib_subgraph(LEADERBOARD_DF_MIB_SUBGRAPH, "Subgraph")
|
526 |
+
with gr.TabItem("Circuit Localization", elem_id="subgraph", id=0):
|
527 |
with gr.Tabs() as subgraph_tabs:
|
528 |
with gr.TabItem("F+", id=0):
|
529 |
# Add description for filters
|
|
|
571 |
print(f"Leaderboard is {leaderboard}")
|
572 |
|
573 |
# Then modify the Causal Graph tab section
|
574 |
+
with gr.TabItem("Causal Variable Localization", elem_id="causalgraph", id=1):
|
575 |
with gr.Tabs() as causalgraph_tabs:
|
576 |
with gr.TabItem("Detailed View", id=0):
|
577 |
+
leaderboard_detailed, data = init_leaderboard_mib_causalgraph(
|
578 |
LEADERBOARD_DF_MIB_CAUSALGRAPH_DETAILED,
|
579 |
"Causal Graph"
|
580 |
)
|
581 |
with gr.TabItem("Aggregated View", id=1):
|
582 |
+
gr.Markdown("""
|
583 |
+
### Filtering Options
|
584 |
+
Use the dropdown menus below to filter results by specific tasks or models.
|
585 |
+
You can combine filters to see specific task-model combinations.
|
586 |
+
""")
|
587 |
+
substring_checkbox = gr.CheckboxGroup(
|
588 |
+
choices=PRESET_SUBSTRINGS,
|
589 |
+
label="Filter results:",
|
590 |
+
value=PRESET_SUBSTRINGS, # Default to all substrings selected
|
591 |
+
)
|
592 |
+
leaderboard_aggregated, data = init_leaderboard_mib_causalgraph(
|
593 |
LEADERBOARD_DF_MIB_CAUSALGRAPH_AGGREGATED,
|
594 |
"Causal Graph"
|
595 |
)
|
596 |
+
original_leaderboard = gr.State(value=data)
|
597 |
+
substring_checkbox.change(
|
598 |
+
fn=update_leaderboard,
|
599 |
+
inputs=[original_leaderboard, substring_checkbox],
|
600 |
+
outputs=leaderboard_aggregated
|
601 |
+
)
|
602 |
with gr.TabItem("Intervention Averaged", id=2):
|
603 |
+
leaderboard_averaged, data = init_leaderboard_mib_causalgraph(
|
604 |
LEADERBOARD_DF_MIB_CAUSALGRAPH_AVERAGED,
|
605 |
"Causal Graph"
|
606 |
)
|
src/__pycache__/about.cpython-310.pyc
CHANGED
Binary files a/src/__pycache__/about.cpython-310.pyc and b/src/__pycache__/about.cpython-310.pyc differ
|
|
src/__pycache__/populate.cpython-310.pyc
CHANGED
Binary files a/src/__pycache__/populate.cpython-310.pyc and b/src/__pycache__/populate.cpython-310.pyc differ
|
|
src/about.py
CHANGED
@@ -98,7 +98,10 @@ class TaskMIB_Causalgraph:
|
|
98 |
metrics: list[str] # metrics to store (average_score)
|
99 |
|
100 |
class TasksMib_Causalgraph(Enum):
|
101 |
-
task0 = TaskMIB_Subgraph("
|
|
|
|
|
|
|
102 |
|
103 |
@classmethod
|
104 |
def get_all_tasks(cls):
|
|
|
98 |
metrics: list[str] # metrics to store (average_score)
|
99 |
|
100 |
class TasksMib_Causalgraph(Enum):
|
101 |
+
task0 = TaskMIB_Subgraph("ioi", ["GPT2ForCausalLM"], "IOI", ["average_score"])
|
102 |
+
task1 = TaskMIB_Subgraph("mcqa", ["Qwen2ForCausalLM", "Gemma2ForCausalLM", "LlamaForCausalLM"], "MCQA", ["average_score"])
|
103 |
+
task2 = TaskMIB_Subgraph("arithmetic_addition", ["Gemma2ForCausalLM", "LlamaForCausalLM"], "arithmetic_addition", ["average_score"])
|
104 |
+
task3 = TaskMIB_Subgraph("arc_easy", ["Gemma2ForCausalLM", "LlamaForCausalLM"], "arc_easy", ["average_score"])
|
105 |
|
106 |
@classmethod
|
107 |
def get_all_tasks(cls):
|
src/display/__pycache__/utils.cpython-310.pyc
CHANGED
Binary files a/src/display/__pycache__/utils.cpython-310.pyc and b/src/display/__pycache__/utils.cpython-310.pyc differ
|
|
src/leaderboard/__pycache__/read_evals.cpython-310.pyc
CHANGED
Binary files a/src/leaderboard/__pycache__/read_evals.cpython-310.pyc and b/src/leaderboard/__pycache__/read_evals.cpython-310.pyc differ
|
|