Aaron Mueller commited on
Commit
e1faa87
Β·
1 Parent(s): f59c752

updates for causal variable track

Browse files
app.py CHANGED
@@ -303,13 +303,15 @@ LEADERBOARD_DF_MIB_CAUSALGRAPH_DETAILED, LEADERBOARD_DF_MIB_CAUSALGRAPH_AGGREGAT
303
  (
304
  finished_eval_queue_df_subgraph,
305
  pending_eval_queue_df_subgraph,
306
- ) = get_evaluation_queue_df(EVAL_REQUESTS_SUBGRAPH, EVAL_COLS)
307
 
308
- # (
309
- # finished_eval_queue_df_causalvariable,
310
- # pending_eval_queue_df_causalvariable,
311
- # ) = get_evaluation_queue_df(EVAL_REQUESTS_CAUSALGRAPH, EVAL_COLS)
312
 
 
 
313
 
314
  def init_leaderboard_mib_subgraph(dataframe, track):
315
  """Initialize the subgraph leaderboard with display names for better readability."""
@@ -800,6 +802,7 @@ with demo:
800
  # with gr.Group(visible=False) as causal_ui:
801
  with gr.Column(visible=False, elem_id="bordered-column") as causal_ui:
802
  gr.Markdown(EVALUATION_QUEUE_TEXT_CAUSALVARIABLE, elem_classes="markdown-text")
 
803
  with gr.Row():
804
  layer = gr.Number(
805
  label="Layer Number",
@@ -813,15 +816,16 @@ with demo:
813
  minimum=0,
814
  info="Integer specifying token position"
815
  )
 
816
  with gr.Row():
817
  hf_repo_cg = gr.Textbox(
818
  label="HuggingFace Repository URL",
819
  placeholder="https://huggingface.co/username/repo/path",
820
  info="Must be a valid HuggingFace URL pointing to a file containing the trained featurizer (.pt). " )
821
- code_upload = gr.File(
822
- label="Upload Python file implementing your featurization function",
823
- file_types=[".py"],
824
- )
825
 
826
  # Common fields
827
  with gr.Group():
@@ -843,7 +847,7 @@ with demo:
843
  # Submission handling
844
  status = gr.Textbox(label="Submission Status", visible=False)
845
 
846
- def handle_submission(track, hf_repo_circ, hf_repo_cg, level, layer, token_position, code_upload, method_name, contact_email):
847
  errors = []
848
  warnings = []
849
 
@@ -856,7 +860,7 @@ with demo:
856
  errors.append("Method name is required")
857
  if "@" not in contact_email or "." not in contact_email:
858
  errors.append("Valid email address is required")
859
- if not level:
860
  errors.append("Level of granularity is required")
861
 
862
  if not hf_repo.startswith("https://huggingface.co/") and not hf_repo.startswith("http://huggingface.co/"):
@@ -880,12 +884,12 @@ with demo:
880
  submission_errors, submission_warnings = verify_circuit_submission(hf_repo, level)
881
 
882
  elif not breaking_error:
883
- if not (isinstance(layer, int) and isinstance(token_position, int)):
884
- errors.append("Layer and token position must be integers")
885
- if not code_upload:
886
- errors.append("Code file upload is required")
887
 
888
- submission_errors, submission_warnings = verify_causal_variable_submission(hf_repo, layer, token_position, code_upload)
889
 
890
  if not breaking_error:
891
  errors.extend(submission_errors)
@@ -901,12 +905,12 @@ with demo:
901
  elif warnings:
902
  return [
903
  gr.Textbox("Warnings:", visible=True),
904
- gr.Markdown("\n".join(f"β€’ {w}" for w in warnings)),
905
- (track, hf_repo_circ, hf_repo_cg, level, layer, token_position, code_upload, method_name, contact_email, _id),
906
  gr.Column(visible=True)
907
  ]
908
  else:
909
- return upload_to_queue(track, hf_repo_circ, hf_repo_cg, level, layer, token_position, code_upload, method_name, contact_email, _id)
910
 
911
  # New warning confirmation dialog
912
  warning_modal = gr.Column(visible=False, variant="panel")
@@ -922,7 +926,7 @@ with demo:
922
  submit_btn = gr.Button("Submit Entry", variant="primary")
923
  submit_btn.click(
924
  handle_submission,
925
- inputs=[track, hf_repo_circ, hf_repo_cg, level, layer, token_position, code_upload, method_name, contact_email],
926
  outputs=[status, warning_display, pending_submission, warning_modal]
927
  )
928
 
@@ -939,24 +943,24 @@ with demo:
939
 
940
  with gr.Column():
941
  with gr.Accordion(
942
- f"βœ… Finished Evaluations ({len(finished_eval_queue_df_subgraph)})",
943
  open=False,
944
  ):
945
  with gr.Row():
946
  finished_eval_table = gr.components.Dataframe(
947
- value=finished_eval_queue_df_subgraph,
948
  headers=EVAL_COLS,
949
  datatype=EVAL_TYPES,
950
  row_count=5,
951
  )
952
 
953
  with gr.Accordion(
954
- f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df_subgraph)})",
955
  open=False,
956
  ):
957
  with gr.Row():
958
  pending_eval_table = gr.components.Dataframe(
959
- value=pending_eval_queue_df_subgraph,
960
  headers=EVAL_COLS,
961
  datatype=EVAL_TYPES,
962
  row_count=5,
 
303
  (
304
  finished_eval_queue_df_subgraph,
305
  pending_eval_queue_df_subgraph,
306
+ ) = get_evaluation_queue_df(EVAL_REQUESTS_SUBGRAPH, EVAL_COLS, "Circuit")
307
 
308
+ (
309
+ finished_eval_queue_df_causalvariable,
310
+ pending_eval_queue_df_causalvariable,
311
+ ) = get_evaluation_queue_df(EVAL_REQUESTS_CAUSALGRAPH, EVAL_COLS, "Causal Variable")
312
 
313
+ finished_eval_queue = pd.concat((finished_eval_queue_df_subgraph, finished_eval_queue_df_causalvariable))
314
+ pending_eval_queue = pd.concat((pending_eval_queue_df_subgraph, pending_eval_queue_df_causalvariable))
315
 
316
  def init_leaderboard_mib_subgraph(dataframe, track):
317
  """Initialize the subgraph leaderboard with display names for better readability."""
 
802
  # with gr.Group(visible=False) as causal_ui:
803
  with gr.Column(visible=False, elem_id="bordered-column") as causal_ui:
804
  gr.Markdown(EVALUATION_QUEUE_TEXT_CAUSALVARIABLE, elem_classes="markdown-text")
805
+ """
806
  with gr.Row():
807
  layer = gr.Number(
808
  label="Layer Number",
 
816
  minimum=0,
817
  info="Integer specifying token position"
818
  )
819
+ """
820
  with gr.Row():
821
  hf_repo_cg = gr.Textbox(
822
  label="HuggingFace Repository URL",
823
  placeholder="https://huggingface.co/username/repo/path",
824
  info="Must be a valid HuggingFace URL pointing to a file containing the trained featurizer (.pt). " )
825
+ # code_upload = gr.File(
826
+ # label="Upload Python file implementing your featurization function",
827
+ # file_types=[".py"],
828
+ # )
829
 
830
  # Common fields
831
  with gr.Group():
 
847
  # Submission handling
848
  status = gr.Textbox(label="Submission Status", visible=False)
849
 
850
+ def handle_submission(track, hf_repo_circ, hf_repo_cg, level, method_name, contact_email):
851
  errors = []
852
  warnings = []
853
 
 
860
  errors.append("Method name is required")
861
  if "@" not in contact_email or "." not in contact_email:
862
  errors.append("Valid email address is required")
863
+ if "Circuit" in track and not level:
864
  errors.append("Level of granularity is required")
865
 
866
  if not hf_repo.startswith("https://huggingface.co/") and not hf_repo.startswith("http://huggingface.co/"):
 
884
  submission_errors, submission_warnings = verify_circuit_submission(hf_repo, level)
885
 
886
  elif not breaking_error:
887
+ # if not (isinstance(layer, int) and isinstance(token_position, int)):
888
+ # errors.append("Layer and token position must be integers")
889
+ # if not code_upload:
890
+ # errors.append("Code file upload is required")
891
 
892
+ submission_errors, submission_warnings = verify_causal_variable_submission(hf_repo)
893
 
894
  if not breaking_error:
895
  errors.extend(submission_errors)
 
905
  elif warnings:
906
  return [
907
  gr.Textbox("Warnings:", visible=True),
908
+ gr.Markdown("\n\n".join(f"β€’ {w}" for w in warnings)),
909
+ (track, hf_repo_circ, hf_repo_cg, level, method_name, contact_email, _id),
910
  gr.Column(visible=True)
911
  ]
912
  else:
913
+ return upload_to_queue(track, hf_repo_circ, hf_repo_cg, level, method_name, contact_email, _id)
914
 
915
  # New warning confirmation dialog
916
  warning_modal = gr.Column(visible=False, variant="panel")
 
926
  submit_btn = gr.Button("Submit Entry", variant="primary")
927
  submit_btn.click(
928
  handle_submission,
929
+ inputs=[track, hf_repo_circ, hf_repo_cg, level, method_name, contact_email],
930
  outputs=[status, warning_display, pending_submission, warning_modal]
931
  )
932
 
 
943
 
944
  with gr.Column():
945
  with gr.Accordion(
946
+ f"βœ… Finished Evaluations ({len(finished_eval_queue)})",
947
  open=False,
948
  ):
949
  with gr.Row():
950
  finished_eval_table = gr.components.Dataframe(
951
+ value=finished_eval_queue,
952
  headers=EVAL_COLS,
953
  datatype=EVAL_TYPES,
954
  row_count=5,
955
  )
956
 
957
  with gr.Accordion(
958
+ f"⏳ Pending Evaluation Queue ({len(pending_eval_queue)})",
959
  open=False,
960
  ):
961
  with gr.Row():
962
  pending_eval_table = gr.components.Dataframe(
963
+ value=pending_eval_queue,
964
  headers=EVAL_COLS,
965
  datatype=EVAL_TYPES,
966
  row_count=5,
src/about.py CHANGED
@@ -145,19 +145,20 @@ EVALUATION_QUEUE_TEXT_CAUSALVARIABLE = """
145
 
146
  ### 1. Collect your materials
147
  You'll need the following:
148
- * A trained featurizer saved as a .pt object.
149
- * A python function that can load and run forward passes with your featurizer.
150
- * A dynamic token alignment function.
151
- * A hypothesized feature location.
152
 
153
  ### 2. Upload your materials
154
  Create a HuggingFace repository, and create a folder in that repository that will hold all of your materials.
155
- At the URL you provide, each of the above materials should be present. We will take the first python script lexicographically
156
- as your featurizer function, and the first .pt file lexicographically as your featurizer.
 
 
157
 
158
  ### 3. Manage your submission in the queue
159
  If your submission passes all checks, it will be added to the queue. You will receive a submission ID here when you do this; be sure to save it!
160
- This will allow you to remove your submission from the queue (e.g., if you find a bug in your circuits). This will prevent you from needing to wait until
161
  next week to resubmit.
162
 
163
  Before your submission has been validated by our backend, it will have the "PREVALIDATION" status in the queue. Once it has been validated, it will have the "PENDING" status.
 
145
 
146
  ### 1. Collect your materials
147
  You'll need the following:
148
+ * Trained featurizer, inverse featurizer, and indices objects.
149
+ * A python file containing the implementation of your featurizer and inverse featurizer.
150
+ * (Optional) Dynamic token alignment functions, provided in another python file.
 
151
 
152
  ### 2. Upload your materials
153
  Create a HuggingFace repository, and create a folder in that repository that will hold all of your materials.
154
+ At the URL you provide (we'll call this the "root"), each of the above materials should be present. At the linked folder,
155
+ we will take the first python script lexicographically at the root as the featurizer script. Within that folder, we expect
156
+ one subfolder per model/task/causal variable triplet. Each subfolder should contain the trained featurizer, inverse featurizer,
157
+ and indices.
158
 
159
  ### 3. Manage your submission in the queue
160
  If your submission passes all checks, it will be added to the queue. You will receive a submission ID here when you do this; be sure to save it!
161
+ This will allow you to remove your submission from the queue (e.g., if you find a bug). This will prevent you from needing to wait until
162
  next week to resubmit.
163
 
164
  Before your submission has been validated by our backend, it will have the "PREVALIDATION" status in the queue. Once it has been validated, it will have the "PENDING" status.
src/display/utils.py CHANGED
@@ -192,6 +192,7 @@ AutoEvalColumn_mib_causalgraph = make_dataclass(
192
  ## For the queue columns in the submission tab
193
  @dataclass(frozen=True)
194
  class EvalQueueColumn: # Queue column
 
195
  method_name = ColumnContent("method_name", "str", True)
196
  repo_id = ColumnContent("hf_repo", "markdown", True)
197
  revision = ColumnContent("revision", "str", True)
 
192
  ## For the queue columns in the submission tab
193
  @dataclass(frozen=True)
194
  class EvalQueueColumn: # Queue column
195
+ track_name = ColumnContent("track", "str", True)
196
  method_name = ColumnContent("method_name", "str", True)
197
  repo_id = ColumnContent("hf_repo", "markdown", True)
198
  revision = ColumnContent("revision", "str", True)
src/leaderboard/read_evals.py CHANGED
@@ -202,16 +202,6 @@ def get_raw_eval_results_mib_subgraph(results_path: str) -> List[EvalResult_MIB_
202
  return eval_results
203
 
204
 
205
-
206
-
207
-
208
-
209
-
210
-
211
-
212
-
213
-
214
-
215
  def aggregate_methods(df: pd.DataFrame) -> pd.DataFrame:
216
  """
217
  Aggregates rows with the same base method name by taking the max value for each column.
@@ -462,7 +452,7 @@ def get_raw_eval_results_mib_causalgraph(results_path: str) -> Tuple[pd.DataFram
462
 
463
  # Create the detailed DataFrame for highest accuracy
464
  highest_records = list(highest_method_groups.values())
465
- detailed_df_highest = pd.DataFrame(highest_records)
466
 
467
  # Process mean accuracy results
468
  # Group results by method
@@ -482,7 +472,7 @@ def get_raw_eval_results_mib_causalgraph(results_path: str) -> Tuple[pd.DataFram
482
 
483
  # Create the detailed DataFrame for mean accuracy
484
  mean_records = list(mean_method_groups.values())
485
- detailed_df_mean = pd.DataFrame(mean_records)
486
 
487
  if detailed_df_highest.empty or detailed_df_mean.empty:
488
  return pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
@@ -492,17 +482,39 @@ def get_raw_eval_results_mib_causalgraph(results_path: str) -> Tuple[pd.DataFram
492
  score_columns_mean = [col for col in detailed_df_mean.columns if col not in ["eval_name", "Method"]]
493
 
494
  if score_columns_highest:
495
- detailed_df_highest["Average"] = detailed_df_highest[score_columns_highest].mean(axis=1).round(3)
 
 
 
 
 
 
 
496
 
497
  if score_columns_mean:
498
- detailed_df_mean["Average"] = detailed_df_mean[score_columns_mean].mean(axis=1).round(3)
 
 
 
 
 
 
499
 
500
  # Sort by Average descending
501
  if "Average" in detailed_df_highest.columns:
502
- detailed_df_highest = detailed_df_highest.sort_values("Average", ascending=False)
 
 
 
 
 
503
 
504
  if "Average" in detailed_df_mean.columns:
505
- detailed_df_mean = detailed_df_mean.sort_values("Average", ascending=False)
 
 
 
 
506
 
507
  # # Create intervention-averaged DataFrames for both metrics
508
  # intervention_averaged_highest_df = create_intervention_averaged_df(detailed_df_highest)
 
202
  return eval_results
203
 
204
 
 
 
 
 
 
 
 
 
 
 
205
  def aggregate_methods(df: pd.DataFrame) -> pd.DataFrame:
206
  """
207
  Aggregates rows with the same base method name by taking the max value for each column.
 
452
 
453
  # Create the detailed DataFrame for highest accuracy
454
  highest_records = list(highest_method_groups.values())
455
+ detailed_df_highest = pd.DataFrame(highest_records).round(3).fillna("-")
456
 
457
  # Process mean accuracy results
458
  # Group results by method
 
472
 
473
  # Create the detailed DataFrame for mean accuracy
474
  mean_records = list(mean_method_groups.values())
475
+ detailed_df_mean = pd.DataFrame(mean_records).round(3).fillna("-")
476
 
477
  if detailed_df_highest.empty or detailed_df_mean.empty:
478
  return pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
 
482
  score_columns_mean = [col for col in detailed_df_mean.columns if col not in ["eval_name", "Method"]]
483
 
484
  if score_columns_highest:
485
+ # detailed_df_highest["Average"] = detailed_df_highest[score_columns_highest].mean(axis=1).round(3)
486
+ # Check if any cell in the row contains '-'
487
+ has_dash = detailed_df_highest[score_columns_highest].eq('-').any(axis=1)
488
+ detailed_df_highest['Average'] = '-'
489
+ mask = ~has_dash
490
+ if mask.any():
491
+ numeric_values = detailed_df_highest.loc[mask, score_columns_highest].apply(pd.to_numeric)
492
+ detailed_df_highest.loc[mask, 'Average'] = numeric_values.mean(axis=1).round(3)
493
 
494
  if score_columns_mean:
495
+ # detailed_df_mean["Average"] = detailed_df_mean[score_columns_mean].mean(axis=1).round(3)
496
+ has_dash = detailed_df_mean[score_columns_mean].eq('-').any(axis=1)
497
+ detailed_df_mean['Average'] = '-'
498
+ mask = ~has_dash
499
+ if mask.any():
500
+ numeric_values = detailed_df_mean.loc[mask, score_columns_mean].apply(pd.to_numeric)
501
+ detailed_df_mean.loc[mask, 'Average'] = numeric_values.mean(axis=1).round(3)
502
 
503
  # Sort by Average descending
504
  if "Average" in detailed_df_highest.columns:
505
+ # Convert '-' to NaN for sorting purposes
506
+ detailed_df_highest['Average'] = pd.to_numeric(detailed_df_highest['Average'], errors='coerce')
507
+ detailed_df_highest = detailed_df_highest.sort_values(by=['Average'], ascending=False, na_position='last')
508
+ # Convert NaN back to '-'
509
+ detailed_df_highest['Average'] = detailed_df_highest['Average'].fillna('-')
510
+ # detailed_df_highest = detailed_df_highest.sort_values("Average", ascending=False).round(3)
511
 
512
  if "Average" in detailed_df_mean.columns:
513
+ detailed_df_mean['Average'] = pd.to_numeric(detailed_df_mean['Average'], errors='coerce')
514
+ detailed_df_mean = detailed_df_mean.sort_values(by=['Average'], ascending=False, na_position='last')
515
+ # Convert NaN back to '-'
516
+ detailed_df_mean['Average'] = detailed_df_mean['Average'].fillna('-')
517
+ # detailed_df_mean = detailed_df_mean.sort_values("Average", ascending=False).round(3)
518
 
519
  # # Create intervention-averaged DataFrames for both metrics
520
  # intervention_averaged_highest_df = create_intervention_averaged_df(detailed_df_highest)
src/populate.py CHANGED
@@ -34,7 +34,6 @@ def get_leaderboard_df_mib_subgraph(results_path: str, cols: list, benchmark_col
34
 
35
 
36
 
37
-
38
  def aggregate_methods(df: pd.DataFrame) -> pd.DataFrame:
39
  """Aggregates rows with the same base method name by taking the max value for each column"""
40
  df_copy = df.copy()
@@ -139,11 +138,13 @@ def get_leaderboard_df_mib_causalgraph(results_path: str) -> Tuple[pd.DataFrame,
139
  return detailed_df, aggregated_df, intervention_averaged_df
140
 
141
 
142
- def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
143
  """Creates the different dataframes for the evaluation queues requests"""
144
  entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
145
  all_evals = []
146
 
 
 
147
  for entry in entries:
148
  if ".json" in entry:
149
  file_path = os.path.join(save_path, entry)
@@ -175,7 +176,7 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
175
  finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL" or e["status"] == "FAILED"]
176
  for list in (pending_list, finished_list):
177
  for item in list:
178
- item["track"] = "Circuit Localization"
179
  item["hf_repo"] = parse_huggingface_url(item["hf_repo"])[0]
180
  df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
181
  df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
 
34
 
35
 
36
 
 
37
  def aggregate_methods(df: pd.DataFrame) -> pd.DataFrame:
38
  """Aggregates rows with the same base method name by taking the max value for each column"""
39
  df_copy = df.copy()
 
138
  return detailed_df, aggregated_df, intervention_averaged_df
139
 
140
 
141
+ def get_evaluation_queue_df(save_path: str, cols: list, track: str) -> list[pd.DataFrame]:
142
  """Creates the different dataframes for the evaluation queues requests"""
143
  entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
144
  all_evals = []
145
 
146
+ print(track)
147
+
148
  for entry in entries:
149
  if ".json" in entry:
150
  file_path = os.path.join(save_path, entry)
 
176
  finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL" or e["status"] == "FAILED"]
177
  for list in (pending_list, finished_list):
178
  for item in list:
179
+ item["track"] = track
180
  item["hf_repo"] = parse_huggingface_url(item["hf_repo"])[0]
181
  df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
182
  df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
src/submission/check_validity.py CHANGED
@@ -8,7 +8,7 @@ import gradio as gr
8
  from urllib.parse import urlparse
9
  from collections import defaultdict
10
  from datetime import datetime, timedelta, timezone
11
- from typing import Literal
12
 
13
  from huggingface_hub import HfApi, HfFileSystem, hf_hub_url, get_hf_file_metadata
14
  from huggingface_hub import ModelCard
@@ -19,6 +19,162 @@ from transformers.models.auto.tokenization_auto import AutoTokenizer
19
  from src.display.utils import TEXT_TASKS, VISION_TASKS, NUM_EXPECTED_EXAMPLES
20
  from src.envs import EVAL_REQUESTS_SUBGRAPH, EVAL_REQUESTS_CAUSALGRAPH
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
24
  """Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
@@ -260,7 +416,7 @@ def parse_huggingface_url(url: str):
260
  return repo_id, folder_path, revision
261
 
262
 
263
- def validate_directory(fs: HfFileSystem, repo_id: str, dirname: str, curr_tm: str, circuit_level:Literal['edge', 'node','neuron']='edge'):
264
  errors = []
265
  warnings = []
266
 
@@ -307,9 +463,6 @@ def verify_circuit_submission(hf_repo, level, progress=gr.Progress()):
307
  "arc-challenge_llama3"
308
  ]
309
 
310
- TASKS = ["ioi", "mcqa", "arithmetic-addition", "arithmetic-subtraction", "arc-easy", "arc-challenge"]
311
- MODELS = ["gpt2", "qwen2.5", "gemma2", "llama3", "interpbench"]
312
-
313
  errors = []
314
  warnings = []
315
 
@@ -321,10 +474,9 @@ def verify_circuit_submission(hf_repo, level, progress=gr.Progress()):
321
  path = hf_repo
322
  level = level
323
 
324
- repo_id, folder_path, revision = parse_huggingface_url(hf_repo)
325
-
326
- folder_path = repo_id + "/" + folder_path
327
  try:
 
 
328
  files = fs.listdir(folder_path, revision=revision)
329
  except Exception as e:
330
  errors.append(f"Could not open Huggingface URL: {e}")
@@ -361,7 +513,7 @@ def verify_circuit_submission(hf_repo, level, progress=gr.Progress()):
361
 
362
  # Parse circuits directory
363
  print(f"validating {circuit_dir}")
364
- vd_errors, vd_warnings = validate_directory(fs, repo_id, circuit_dir, curr_tm, level)
365
  errors.extend(vd_errors)
366
  warnings.extend(vd_warnings)
367
 
@@ -390,5 +542,184 @@ def verify_circuit_submission(hf_repo, level, progress=gr.Progress()):
390
  return errors, warnings
391
 
392
 
393
- def verify_causal_variable_submission(hf_repo, layer, position, code_upload):
394
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  from urllib.parse import urlparse
9
  from collections import defaultdict
10
  from datetime import datetime, timedelta, timezone
11
+ from typing import Literal, Tuple, Union
12
 
13
  from huggingface_hub import HfApi, HfFileSystem, hf_hub_url, get_hf_file_metadata
14
  from huggingface_hub import ModelCard
 
19
  from src.display.utils import TEXT_TASKS, VISION_TASKS, NUM_EXPECTED_EXAMPLES
20
  from src.envs import EVAL_REQUESTS_SUBGRAPH, EVAL_REQUESTS_CAUSALGRAPH
21
 
22
+ TASKS = ["ioi", "mcqa", "arithmetic-addition", "arithmetic-subtraction", "arc-easy", "arc-challenge"]
23
+ MODELS = ["gpt2", "qwen2.5", "gemma2", "llama3", "interpbench"]
24
+
25
+ class FeaturizerValidator:
26
+ def __init__(self, base_featurizer_class):
27
+ self.base_featurizer_class = base_featurizer_class
28
+ self.featurizer_class_name = None
29
+
30
+ # torch.nn.Module
31
+ self.module_value, self.module_attr = "torch", "Module"
32
+ self.featurizer_module_class_name_1 = None
33
+ self.featurizer_module_class_name_2 = None
34
+
35
+
36
+ def find_featurizer_subclass(self, module_path: str) -> Tuple[bool, Union[str, None]]:
37
+ """
38
+ Finds the first class in the module that inherits from Featurizer.
39
+
40
+ Args:
41
+ module_path: Path to the uploaded Python file
42
+
43
+ Returns:
44
+ Tuple of (success, class_name, message)
45
+ """
46
+ # First try with AST for safety
47
+ try:
48
+ with open(module_path, 'r') as file:
49
+ tree = ast.parse(file.read(), filename=module_path)
50
+
51
+ for node in ast.walk(tree):
52
+ if isinstance(node, ast.ClassDef):
53
+ for base in node.bases:
54
+ if isinstance(base, ast.Name) and base.id == self.base_featurizer_class.__name__:
55
+ return True, node.name, f"Found class '{node.name}' that inherits from {self.base_featurizer_class.__name__}"
56
+
57
+ return False, None, f"No class inheriting from {self.base_featurizer_class.__name__} found"
58
+
59
+ except Exception as e:
60
+ return False, None, f"Error during static analysis: {str(e)}"
61
+
62
+
63
+ def find_featurizer_module_classes(self, module_path: str) -> Tuple[bool, Union[str, None]]:
64
+ try:
65
+ with open(module_path, 'r') as file:
66
+ tree = ast.parse(file.read(), filename=module_path)
67
+
68
+ for node in ast.walk(tree):
69
+ if isinstance(node, ast.ClassDef):
70
+ for base in node.bases:
71
+ if (isinstance(base, ast.Attribute) and base.attr == self.module_attr):
72
+ if self.featurizer_module_class_name_1 is None:
73
+ self.featurizer_module_class_name_1 = node.name
74
+ else:
75
+ self.featurizer_module_class_name_2 = node.name
76
+ return True, f"Found two featurizer modules: {self.featurizer_module_class_name_1}, {self.featurizer_module_class_name_2}"
77
+
78
+ if self.featurizer_module_class_name_1:
79
+ return True, f"Found one featurizer module: {self.featurizer_module_class_name_1}"
80
+ return False, f"Found no featurizer modules."
81
+
82
+ except Exception as e:
83
+ return False, f"Error during static analysis: {e}"
84
+
85
+
86
+ def validate_uploaded_module(self, module_path: str) -> Tuple[bool, str]:
87
+ """
88
+ Validates an uploaded module to ensure it properly extends the Featurizer class.
89
+
90
+ Args:
91
+ module_path: Path to the uploaded Python file
92
+ class_name: Name of the class to validate
93
+
94
+ Returns:
95
+ Tuple of (is_valid, message)
96
+ """
97
+ # First, find the name of the featurizer class we're verifying
98
+ found, class_name, message = self.find_featurizer_subclass(module_path)
99
+ if not found:
100
+ return False, message
101
+ else:
102
+ print("Verified featurizer subclass.")
103
+
104
+ # Second, find the name of the featurizer and inverse featurizer modules
105
+ modules_found, modules_message = self.find_featurizer_module_classes(module_path)
106
+ if not modules_found:
107
+ return False, modules_message
108
+ else:
109
+ print(f"Verified featurizer module(s): {modules_message}")
110
+
111
+ # Then, perform static code analysis on the featurizer class for basic safety
112
+ inheritance_check, ast_message = self._verify_inheritance_with_ast(module_path, class_name)
113
+ if not inheritance_check:
114
+ return False, ast_message
115
+
116
+ # Then, try to load and validate the featurizer class
117
+ return self._verify_inheritance_with_import(module_path, class_name)
118
+
119
+ # TODO: try directly loading featurizer module and inverse featurizer module?
120
+
121
+
122
+ def _verify_inheritance_with_ast(self, module_path: str, class_name: str) -> Tuple[bool, str]:
123
+ """Verify inheritance using AST without executing code"""
124
+ try:
125
+ with open(module_path, 'r') as file:
126
+ tree = ast.parse(file.read(), filename=module_path)
127
+
128
+ # Look for class definitions that match the target class name
129
+ for node in ast.walk(tree):
130
+ if isinstance(node, ast.ClassDef) and node.name == class_name:
131
+ # Check if any base class name matches 'Featurizer'
132
+ for base in node.bases:
133
+ if isinstance(base, ast.Name) and base.id == self.base_featurizer_class.__name__:
134
+ return True, "Static analysis indicates proper inheritance"
135
+
136
+ return False, f"Class '{class_name}' does not appear to inherit from {self.base_featurizer_class.__name__}"
137
+
138
+ return False, f"Class '{class_name}' not found in the uploaded module"
139
+
140
+ except Exception as e:
141
+ return False, f"Error during static analysis: {str(e)}"
142
+
143
+
144
+ def _verify_inheritance_with_import(self, module_path: str, class_name: str) -> Tuple[bool, str]:
145
+ """Safely import the module and verify inheritance using Python's introspection"""
146
+ try:
147
+ # Dynamically import the module
148
+ spec = importlib.util.spec_from_file_location("uploaded_module", module_path)
149
+ if spec is None or spec.loader is None:
150
+ return False, "Could not load the module specification"
151
+
152
+ uploaded_module = importlib.util.module_from_spec(spec)
153
+ spec.loader.exec_module(uploaded_module)
154
+
155
+ # Get the class from the module
156
+ if not hasattr(uploaded_module, class_name):
157
+ return False, f"Class '{class_name}' not found in the uploaded module"
158
+
159
+ uploaded_class = getattr(uploaded_module, class_name)
160
+
161
+ # Check if it's a proper subclass
162
+ if not inspect.isclass(uploaded_class):
163
+ return False, f"'{class_name}' is not a class"
164
+
165
+ if not issubclass(uploaded_class, self.base_featurizer_class):
166
+ return False, f"'{class_name}' does not inherit from {self.base_featurizer_class.__name__}"
167
+
168
+ # Optional: Check method resolution order
169
+ mro = inspect.getmro(uploaded_class)
170
+ if self.base_featurizer_class not in mro:
171
+ return False, f"{self.base_featurizer_class.__name__} not in the method resolution order"
172
+
173
+ return True, f"Class '{class_name}' properly extends {self.base_featurizer_class.__name__}"
174
+
175
+ except Exception as e:
176
+ return False, f"Error during dynamic validation: {str(e)}"
177
+
178
 
179
  def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
180
  """Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
 
416
  return repo_id, folder_path, revision
417
 
418
 
419
+ def validate_directory_circuit(fs: HfFileSystem, repo_id: str, dirname: str, curr_tm: str, circuit_level:Literal['edge', 'node','neuron']='edge'):
420
  errors = []
421
  warnings = []
422
 
 
463
  "arc-challenge_llama3"
464
  ]
465
 
 
 
 
466
  errors = []
467
  warnings = []
468
 
 
474
  path = hf_repo
475
  level = level
476
 
 
 
 
477
  try:
478
+ repo_id, folder_path, revision = parse_huggingface_url(hf_repo)
479
+ folder_path = repo_id + "/" + folder_path
480
  files = fs.listdir(folder_path, revision=revision)
481
  except Exception as e:
482
  errors.append(f"Could not open Huggingface URL: {e}")
 
513
 
514
  # Parse circuits directory
515
  print(f"validating {circuit_dir}")
516
+ vd_errors, vd_warnings = validate_directory_circuit(fs, repo_id, circuit_dir, curr_tm, level)
517
  errors.extend(vd_errors)
518
  warnings.extend(vd_warnings)
519
 
 
542
  return errors, warnings
543
 
544
 
545
+ def validate_directory_causalgraph(fs: HfFileSystem, repo_id: str, dirname: str):
546
+ errors = []
547
+ warnings = []
548
+
549
+ files = fs.ls(dirname)
550
+ files = [f["name"] for f in files if "_featurizer" in f["name"] or "_indices" in f["name"]]
551
+
552
+ valid_triplet = False
553
+
554
+ offset = 0
555
+ for idx, file in enumerate(files):
556
+ file_suffix = file.split(repo_id + "/")[1]
557
+ file_url = hf_hub_url(repo_id=repo_id, filename=file_suffix)
558
+ file_info = get_hf_file_metadata(file_url)
559
+ file_size_mb = file_info.size / (1024 * 1024)
560
+ if file_size_mb > 150:
561
+ warnings.append(f"Will skip file >150MB: {file}")
562
+ offset -= 1
563
+ continue
564
+
565
+ if idx + offset > 30:
566
+ warnings.append("Many files in directory; stopping at 30")
567
+ break
568
+
569
+ if file.endswith("_featurizer") or file.endswith("_indices"):
570
+ prefix = "_".join(file.split("_")[:-1])
571
+ this_suffix = "_" + file.split("_")[-1]
572
+ suffixes = ("_featurizer", "_inverse_featurizer", "_indices")
573
+ for idx, suffix in enumerate(suffixes):
574
+ if file.replace(this_suffix, suffix) not in files:
575
+ warnings.append(f"For {prefix}, found a {this_suffix} file but no associated {suffix}")
576
+ break
577
+ if idx == len(suffixes) - 1:
578
+ valid_triplet = True
579
+ if valid_triplet:
580
+ found_submodule = False
581
+ found_layer = False
582
+ found_token = False
583
+ if "residual" or "attention" in prefix.lower():
584
+ found_submodule = True
585
+ if "layer:" in prefix.lower():
586
+ found_layer = True
587
+ if "token:" in prefix.lower():
588
+ found_token = True
589
+ if not found_submodule or not found_layer or not found_token:
590
+ errors.append("Could not derive where featurizer should be applied from featurizer filenames.")
591
+
592
+ if valid_triplet:
593
+ break
594
+
595
+ if not valid_triplet:
596
+ errors.append("No valid featurizer/inverse featurizer/indices triplets.")
597
+ return errors, warnings
598
+
599
+
600
+ def verify_causal_variable_submission(hf_repo, progress=gr.Progress()):
601
+ CV_TASKS = set(["ioi_task", "4_answer_MCQA", "ARC_easy", "arithmetic", "ravel_task"])
602
+ CV_TASK_VARIABLES = {"ioi_task": ["output_token", "output_position"],
603
+ "4_answer_MCQA": ["answer_pointer", "answer"],
604
+ "arc": ["answer_pointer", "answer"],
605
+ "arithmetic": ["ones_carry"],
606
+ "ravel_task": ["Country", "Continent", "Language"]}
607
+ CV_MODELS = set(["GPT2LMHeadModel", "Qwen2ForCausalLM", "Gemma2ForCausalLM", "LlamaForCausalLM"])
608
+ # create pairs of valid task/model combinations
609
+ CV_VALID_TASK_MODELS = set([("ioi_task", "GPT2LMHeadModel"),
610
+ ("ioi_task", "Qwen2ForCausalLM"),
611
+ ("ioi_task", "Gemma2ForCausalLM"),
612
+ ("ioi_task", "LlamaForCausalLM"),
613
+ ("4_answer_MCQA", "Qwen2ForCausalLM"),
614
+ ("4_answer_MCQA", "Gemma2ForCausalLM"),
615
+ ("4_answer_MCQA", "LlamaForCausalLM"),
616
+ ("ARC_easy", "Gemma2ForCausalLM"),
617
+ ("ARC_easy", "LlamaForCausalLM"),
618
+ ("arithmetic", "Gemma2ForCausalLM"),
619
+ ("arithmetic", "LlamaForCausalLM"),
620
+ ("ravel_task", "Gemma2ForCausalLM"),
621
+ ("ravel_task", "LlamaForCausalLM")])
622
+
623
+ errors = []
624
+ warnings = []
625
+
626
+ num_py_files = 0
627
+ directories_present = {tm: False for tm in CV_VALID_TASK_MODELS}
628
+ directories_valid = {tm: False for tm in CV_VALID_TASK_MODELS}
629
+ variables_valid = {}
630
+
631
+ fs = HfFileSystem()
632
+
633
+ path = hf_repo
634
+
635
+ try:
636
+ repo_id, folder_path, revision = parse_huggingface_url(hf_repo)
637
+ folder_path = repo_id + "/" + folder_path
638
+ files = fs.listdir(folder_path, revision=revision)
639
+ except Exception as e:
640
+ errors.append(f"Could not open Huggingface URL: {e}")
641
+ return errors, warnings
642
+
643
+ file_counts = 0
644
+ for file in progress.tqdm(files, desc="Validating files in repo"):
645
+ filename = file["name"]
646
+ file_counts += 1
647
+ if file_counts >= 30:
648
+ warnings.append("Folder contains many files/directories; stopped at 30.")
649
+ break
650
+
651
+ if filename.endswith(".py"):
652
+ num_py_files += 1
653
+
654
+ causalgraph_dir = filename
655
+ dirname_proc = causalgraph_dir.lower().split("/")[-1]
656
+ if not fs.isdir(causalgraph_dir):
657
+ continue
658
+ curr_task = None
659
+ curr_model = None
660
+ curr_variable = None
661
+ # Look for task names in filename
662
+ for task in CV_TASKS:
663
+ if dirname_proc.startswith(task.lower()) or f"_{task.lower()}" in dirname_proc:
664
+ curr_task = task
665
+ if curr_task not in variables_valid:
666
+ variables_valid[curr_task] = {v: False for v in CV_TASK_VARIABLES[curr_task]}
667
+ for variable in CV_TASK_VARIABLES[curr_task]:
668
+ if dirname_proc.startswith(variable.lower()) or f"_{variable.lower()}" in dirname_proc or f"_{variable.lower().replace('_', '-')}" in dirname_proc:
669
+ curr_variable = variable
670
+ break
671
+ # Look for model names in filename
672
+ for model in CV_MODELS:
673
+ if dirname_proc.startswith(model.lower()) or f"_{model.lower()}" in dirname_proc:
674
+ curr_model = model
675
+ if curr_task is not None and curr_model is not None and curr_variable is not None:
676
+ curr_tm = (curr_task, curr_model)
677
+ if curr_tm in CV_VALID_TASK_MODELS:
678
+ directories_present[curr_tm] = True
679
+ else:
680
+ continue
681
+ else:
682
+ continue
683
+
684
+ print(f"validating {causalgraph_dir}")
685
+ vd_errors, vd_warnings = validate_directory_causalgraph(fs, repo_id, causalgraph_dir)
686
+ errors.extend(vd_errors)
687
+ warnings.extend(vd_warnings)
688
+
689
+ if len(vd_errors) == 0:
690
+ directories_valid[curr_tm] = True
691
+ variables_valid[curr_task][curr_variable] = True
692
+
693
+ if num_py_files == 0:
694
+ errors.append("No featurizer .py file detected in root of provided repo.")
695
+ elif num_py_files == 1:
696
+ errors.append("Found one .py script, but expected two: one for the featurizer, and another for the token position functions.")
697
+
698
+ task_set, model_set = set(), set()
699
+ for tm in directories_present:
700
+ if not directories_present[tm]:
701
+ continue
702
+ if not directories_valid[tm]:
703
+ warnings.append(f"Directory found for {tm[0]}/{tm[1]}, but contents not valid")
704
+ continue
705
+
706
+ for tm in directories_valid:
707
+ if directories_valid[tm]:
708
+ task, model = tm
709
+ task_set.add(task)
710
+ model_set.add(model)
711
+
712
+ if len(task_set) == 0 or len(model_set) == 0:
713
+ errors.append("No valid directories found for any task/model.")
714
+
715
+ # no_tm_display = [f"{tm[0]}/{tm[1]}" for tm in directories_valid if not directories_valid[tm]]
716
+ # if len(no_tm_display) > 0:
717
+ # warnings.append(f"No valid submission found for the following tasks/models: {*no_tm_display,}")
718
+
719
+ for task in variables_valid:
720
+ found_variable_display = [v for v in variables_valid[task] if variables_valid[task][v]]
721
+ no_variable_display = [v for v in variables_valid[task] if not variables_valid[task][v]]
722
+ if no_variable_display:
723
+ warnings.append(f"For {task}, found variables {*found_variable_display,}, but not variables {*no_variable_display,}")
724
+
725
+ return errors, warnings
src/submission/submit.py CHANGED
@@ -17,7 +17,7 @@ import gradio as gr
17
  REQUESTED_MODELS = None
18
  USERS_TO_SUBMISSION_DATES = None
19
 
20
- def upload_to_queue(track, hf_repo_circ, hf_repo_cg, level, layer, token_position, code_upload, method_name, contact_email, _id):
21
  errors = []
22
  hf_repo = hf_repo_circ if "Circuit" in track else hf_repo_cg
23
  repo_id, folder_path, revision = parse_huggingface_url(hf_repo)
@@ -53,9 +53,6 @@ def upload_to_queue(track, hf_repo_circ, hf_repo_cg, level, layer, token_positio
53
  "hf_repo": hf_repo,
54
  "user_name": user_name,
55
  "revision": commit_hash,
56
- "layer": layer,
57
- "token_position": token_position,
58
- "code_upload": code_upload,
59
  "method_name": method_name,
60
  "contact_email": contact_email.lower(),
61
  "submit_time": current_time,
@@ -65,7 +62,6 @@ def upload_to_queue(track, hf_repo_circ, hf_repo_cg, level, layer, token_positio
65
  QUEUE_REPO = QUEUE_REPO_CAUSALGRAPH
66
  EVAL_REQUESTS = EVAL_REQUESTS_CAUSALGRAPH
67
 
68
-
69
  OUT_DIR = f"{EVAL_REQUESTS}/"
70
  os.makedirs(OUT_DIR, exist_ok=True)
71
  out_path = f"{OUT_DIR}/{method_name}_{_id}_{current_time}.json"
@@ -84,7 +80,7 @@ def upload_to_queue(track, hf_repo_circ, hf_repo_cg, level, layer, token_positio
84
  errors.append(f"Could not upload entry to eval queue: {e}")
85
 
86
  if errors:
87
- status = gr.Textbox("\n".join(f"❌ {e}" for e in errors), visible=True)
88
  else:
89
  status = gr.Textbox(f"βœ… Submission received! Your submission ID is \"{_id}\". Save this so that you can manage your submission on the queue.", visible=True)
90
  return [
 
17
  REQUESTED_MODELS = None
18
  USERS_TO_SUBMISSION_DATES = None
19
 
20
+ def upload_to_queue(track, hf_repo_circ, hf_repo_cg, level, method_name, contact_email, _id):
21
  errors = []
22
  hf_repo = hf_repo_circ if "Circuit" in track else hf_repo_cg
23
  repo_id, folder_path, revision = parse_huggingface_url(hf_repo)
 
53
  "hf_repo": hf_repo,
54
  "user_name": user_name,
55
  "revision": commit_hash,
 
 
 
56
  "method_name": method_name,
57
  "contact_email": contact_email.lower(),
58
  "submit_time": current_time,
 
62
  QUEUE_REPO = QUEUE_REPO_CAUSALGRAPH
63
  EVAL_REQUESTS = EVAL_REQUESTS_CAUSALGRAPH
64
 
 
65
  OUT_DIR = f"{EVAL_REQUESTS}/"
66
  os.makedirs(OUT_DIR, exist_ok=True)
67
  out_path = f"{OUT_DIR}/{method_name}_{_id}_{current_time}.json"
 
80
  errors.append(f"Could not upload entry to eval queue: {e}")
81
 
82
  if errors:
83
+ status = gr.Textbox("\n\n".join(f"❌ {e}" for e in errors), visible=True)
84
  else:
85
  status = gr.Textbox(f"βœ… Submission received! Your submission ID is \"{_id}\". Save this so that you can manage your submission on the queue.", visible=True)
86
  return [