Spaces:

mib-bench
/

leaderboard

Running

App Files Files Community

Aaron Mueller commited on Jun 4

Commit

e1faa87

1 Parent(s): f59c752

updates for causal variable track

Browse files

Files changed (7) hide show

app.py +28 -24
src/about.py +8 -7
src/display/utils.py +1 -0
src/leaderboard/read_evals.py +28 -16
src/populate.py +4 -3
src/submission/check_validity.py +342 -11
src/submission/submit.py +2 -6

app.py CHANGED Viewed

@@ -303,13 +303,15 @@ LEADERBOARD_DF_MIB_CAUSALGRAPH_DETAILED, LEADERBOARD_DF_MIB_CAUSALGRAPH_AGGREGAT
 (
     finished_eval_queue_df_subgraph,
     pending_eval_queue_df_subgraph,
-) = get_evaluation_queue_df(EVAL_REQUESTS_SUBGRAPH, EVAL_COLS)
-# (
-#     finished_eval_queue_df_causalvariable,
-#     pending_eval_queue_df_causalvariable,
-# ) = get_evaluation_queue_df(EVAL_REQUESTS_CAUSALGRAPH, EVAL_COLS)
 def init_leaderboard_mib_subgraph(dataframe, track):
     """Initialize the subgraph leaderboard with display names for better readability."""
@@ -800,6 +802,7 @@ with demo:
             # with gr.Group(visible=False) as causal_ui:
             with gr.Column(visible=False, elem_id="bordered-column") as causal_ui:
                 gr.Markdown(EVALUATION_QUEUE_TEXT_CAUSALVARIABLE, elem_classes="markdown-text")
                 with gr.Row():
                     layer = gr.Number(
                         label="Layer Number",
@@ -813,15 +816,16 @@ with demo:
                         minimum=0,
                         info="Integer specifying token position"
                     )
                 with gr.Row():
                     hf_repo_cg = gr.Textbox(
                         label="HuggingFace Repository URL",
                         placeholder="https://huggingface.co/username/repo/path",
                         info="Must be a valid HuggingFace URL pointing to a file containing the trained featurizer (.pt). "                    )
-                    code_upload = gr.File(
-                        label="Upload Python file implementing your featurization function",
-                        file_types=[".py"],
-                    )
             # Common fields
             with gr.Group():
@@ -843,7 +847,7 @@ with demo:
             # Submission handling
             status = gr.Textbox(label="Submission Status", visible=False)
-            def handle_submission(track, hf_repo_circ, hf_repo_cg, level, layer, token_position, code_upload, method_name, contact_email):
                 errors = []
                 warnings = []
@@ -856,7 +860,7 @@ with demo:
                     errors.append("Method name is required")
                 if "@" not in contact_email or "." not in contact_email:
                     errors.append("Valid email address is required")
-                if not level:
                     errors.append("Level of granularity is required")
                 if not hf_repo.startswith("https://huggingface.co/") and not hf_repo.startswith("http://huggingface.co/"):
@@ -880,12 +884,12 @@ with demo:
                     submission_errors, submission_warnings = verify_circuit_submission(hf_repo, level)
                 elif not breaking_error:
-                    if not (isinstance(layer, int) and isinstance(token_position, int)):
-                        errors.append("Layer and token position must be integers")
-                    if not code_upload:
-                        errors.append("Code file upload is required")
-                    submission_errors, submission_warnings = verify_causal_variable_submission(hf_repo, layer, token_position, code_upload)
                 if not breaking_error:
                     errors.extend(submission_errors)
@@ -901,12 +905,12 @@ with demo:
                 elif warnings:
                     return [
                         gr.Textbox("Warnings:", visible=True),
-                        gr.Markdown("\n".join(f"• {w}" for w in warnings)),
-                        (track, hf_repo_circ, hf_repo_cg, level, layer, token_position, code_upload, method_name, contact_email, _id),
                         gr.Column(visible=True)
                     ]
                 else:
-                    return upload_to_queue(track, hf_repo_circ, hf_repo_cg, level, layer, token_position, code_upload, method_name, contact_email, _id)
             # New warning confirmation dialog
             warning_modal = gr.Column(visible=False, variant="panel")
@@ -922,7 +926,7 @@ with demo:
             submit_btn = gr.Button("Submit Entry", variant="primary")
             submit_btn.click(
                 handle_submission,
-                inputs=[track, hf_repo_circ, hf_repo_cg, level, layer, token_position, code_upload, method_name, contact_email],
                 outputs=[status, warning_display, pending_submission, warning_modal]
             )
@@ -939,24 +943,24 @@ with demo:
             with gr.Column():
                 with gr.Accordion(
-                    f"✅ Finished Evaluations ({len(finished_eval_queue_df_subgraph)})",
                     open=False,
                 ):
                     with gr.Row():
                         finished_eval_table = gr.components.Dataframe(
-                            value=finished_eval_queue_df_subgraph,
                             headers=EVAL_COLS,
                             datatype=EVAL_TYPES,
                             row_count=5,
                         )
                 with gr.Accordion(
-                    f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df_subgraph)})",
                     open=False,
                 ):
                     with gr.Row():
                         pending_eval_table = gr.components.Dataframe(
-                            value=pending_eval_queue_df_subgraph,
                             headers=EVAL_COLS,
                             datatype=EVAL_TYPES,
                             row_count=5,

 (
     finished_eval_queue_df_subgraph,
     pending_eval_queue_df_subgraph,
+) = get_evaluation_queue_df(EVAL_REQUESTS_SUBGRAPH, EVAL_COLS, "Circuit")
+(
+    finished_eval_queue_df_causalvariable,
+    pending_eval_queue_df_causalvariable,
+) = get_evaluation_queue_df(EVAL_REQUESTS_CAUSALGRAPH, EVAL_COLS, "Causal Variable")
+finished_eval_queue = pd.concat((finished_eval_queue_df_subgraph, finished_eval_queue_df_causalvariable))
+pending_eval_queue = pd.concat((pending_eval_queue_df_subgraph, pending_eval_queue_df_causalvariable))
 def init_leaderboard_mib_subgraph(dataframe, track):
     """Initialize the subgraph leaderboard with display names for better readability."""
             # with gr.Group(visible=False) as causal_ui:
             with gr.Column(visible=False, elem_id="bordered-column") as causal_ui:
                 gr.Markdown(EVALUATION_QUEUE_TEXT_CAUSALVARIABLE, elem_classes="markdown-text")
+                """
                 with gr.Row():
                     layer = gr.Number(
                         label="Layer Number",
                         minimum=0,
                         info="Integer specifying token position"
                     )
+                """
                 with gr.Row():
                     hf_repo_cg = gr.Textbox(
                         label="HuggingFace Repository URL",
                         placeholder="https://huggingface.co/username/repo/path",
                         info="Must be a valid HuggingFace URL pointing to a file containing the trained featurizer (.pt). "                    )
+                    # code_upload = gr.File(
+                    #     label="Upload Python file implementing your featurization function",
+                    #     file_types=[".py"],
+                    # )
             # Common fields
             with gr.Group():
             # Submission handling
             status = gr.Textbox(label="Submission Status", visible=False)
+            def handle_submission(track, hf_repo_circ, hf_repo_cg, level, method_name, contact_email):
                 errors = []
                 warnings = []
                     errors.append("Method name is required")
                 if "@" not in contact_email or "." not in contact_email:
                     errors.append("Valid email address is required")
+                if "Circuit" in track and not level:
                     errors.append("Level of granularity is required")
                 if not hf_repo.startswith("https://huggingface.co/") and not hf_repo.startswith("http://huggingface.co/"):
                     submission_errors, submission_warnings = verify_circuit_submission(hf_repo, level)
                 elif not breaking_error:
+                    # if not (isinstance(layer, int) and isinstance(token_position, int)):
+                    #     errors.append("Layer and token position must be integers")
+                    # if not code_upload:
+                    #     errors.append("Code file upload is required")
+                    submission_errors, submission_warnings = verify_causal_variable_submission(hf_repo)
                 if not breaking_error:
                     errors.extend(submission_errors)
                 elif warnings:
                     return [
                         gr.Textbox("Warnings:", visible=True),
+                        gr.Markdown("\n\n".join(f"• {w}" for w in warnings)),
+                        (track, hf_repo_circ, hf_repo_cg, level, method_name, contact_email, _id),
                         gr.Column(visible=True)
                     ]
                 else:
+                    return upload_to_queue(track, hf_repo_circ, hf_repo_cg, level, method_name, contact_email, _id)
             # New warning confirmation dialog
             warning_modal = gr.Column(visible=False, variant="panel")
             submit_btn = gr.Button("Submit Entry", variant="primary")
             submit_btn.click(
                 handle_submission,
+                inputs=[track, hf_repo_circ, hf_repo_cg, level, method_name, contact_email],
                 outputs=[status, warning_display, pending_submission, warning_modal]
             )
             with gr.Column():
                 with gr.Accordion(
+                    f"✅ Finished Evaluations ({len(finished_eval_queue)})",
                     open=False,
                 ):
                     with gr.Row():
                         finished_eval_table = gr.components.Dataframe(
+                            value=finished_eval_queue,
                             headers=EVAL_COLS,
                             datatype=EVAL_TYPES,
                             row_count=5,
                         )
                 with gr.Accordion(
+                    f"⏳ Pending Evaluation Queue ({len(pending_eval_queue)})",
                     open=False,
                 ):
                     with gr.Row():
                         pending_eval_table = gr.components.Dataframe(
+                            value=pending_eval_queue,
                             headers=EVAL_COLS,
                             datatype=EVAL_TYPES,
                             row_count=5,

src/about.py CHANGED Viewed

@@ -145,19 +145,20 @@ EVALUATION_QUEUE_TEXT_CAUSALVARIABLE = """
 ### 1. Collect your materials
 You'll need the following:
-* A trained featurizer saved as a .pt object.
-* A python function that can load and run forward passes with your featurizer.
-* A dynamic token alignment function.
-* A hypothesized feature location.
 ### 2. Upload your materials
 Create a HuggingFace repository, and create a folder in that repository that will hold all of your materials.
-At the URL you provide, each of the above materials should be present. We will take the first python script lexicographically
-as your featurizer function, and the first .pt file lexicographically as your featurizer.
 ### 3. Manage your submission in the queue
 If your submission passes all checks, it will be added to the queue. You will receive a submission ID here when you do this; be sure to save it!
-This will allow you to remove your submission from the queue (e.g., if you find a bug in your circuits). This will prevent you from needing to wait until
 next week to resubmit.
 Before your submission has been validated by our backend, it will have the "PREVALIDATION" status in the queue. Once it has been validated, it will have the "PENDING" status.

 ### 1. Collect your materials
 You'll need the following:
+* Trained featurizer, inverse featurizer, and indices objects.
+* A python file containing the implementation of your featurizer and inverse featurizer.
+* (Optional) Dynamic token alignment functions, provided in another python file.
 ### 2. Upload your materials
 Create a HuggingFace repository, and create a folder in that repository that will hold all of your materials.
+At the URL you provide (we'll call this the "root"), each of the above materials should be present. At the linked folder,
+we will take the first python script lexicographically at the root as the featurizer script. Within that folder, we expect
+one subfolder per model/task/causal variable triplet. Each subfolder should contain the trained featurizer, inverse featurizer,
+and indices.
 ### 3. Manage your submission in the queue
 If your submission passes all checks, it will be added to the queue. You will receive a submission ID here when you do this; be sure to save it!
+This will allow you to remove your submission from the queue (e.g., if you find a bug). This will prevent you from needing to wait until
 next week to resubmit.
 Before your submission has been validated by our backend, it will have the "PREVALIDATION" status in the queue. Once it has been validated, it will have the "PENDING" status.

src/display/utils.py CHANGED Viewed

@@ -192,6 +192,7 @@ AutoEvalColumn_mib_causalgraph = make_dataclass(
 ## For the queue columns in the submission tab
 @dataclass(frozen=True)
 class EvalQueueColumn:  # Queue column
     method_name = ColumnContent("method_name", "str", True)
     repo_id = ColumnContent("hf_repo", "markdown", True)
     revision = ColumnContent("revision", "str", True)

 ## For the queue columns in the submission tab
 @dataclass(frozen=True)
 class EvalQueueColumn:  # Queue column
+    track_name = ColumnContent("track", "str", True)
     method_name = ColumnContent("method_name", "str", True)
     repo_id = ColumnContent("hf_repo", "markdown", True)
     revision = ColumnContent("revision", "str", True)

src/leaderboard/read_evals.py CHANGED Viewed

@@ -202,16 +202,6 @@ def get_raw_eval_results_mib_subgraph(results_path: str) -> List[EvalResult_MIB_
     return eval_results
 def aggregate_methods(df: pd.DataFrame) -> pd.DataFrame:
     """
     Aggregates rows with the same base method name by taking the max value for each column.
@@ -462,7 +452,7 @@ def get_raw_eval_results_mib_causalgraph(results_path: str) -> Tuple[pd.DataFram
     # Create the detailed DataFrame for highest accuracy
     highest_records = list(highest_method_groups.values())
-    detailed_df_highest = pd.DataFrame(highest_records)
     # Process mean accuracy results
     # Group results by method
@@ -482,7 +472,7 @@ def get_raw_eval_results_mib_causalgraph(results_path: str) -> Tuple[pd.DataFram
     # Create the detailed DataFrame for mean accuracy
     mean_records = list(mean_method_groups.values())
-    detailed_df_mean = pd.DataFrame(mean_records)
     if detailed_df_highest.empty or detailed_df_mean.empty:
         return pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
@@ -492,17 +482,39 @@ def get_raw_eval_results_mib_causalgraph(results_path: str) -> Tuple[pd.DataFram
     score_columns_mean = [col for col in detailed_df_mean.columns if col not in ["eval_name", "Method"]]
     if score_columns_highest:
-        detailed_df_highest["Average"] = detailed_df_highest[score_columns_highest].mean(axis=1).round(3)
     if score_columns_mean:
-        detailed_df_mean["Average"] = detailed_df_mean[score_columns_mean].mean(axis=1).round(3)
     # Sort by Average descending
     if "Average" in detailed_df_highest.columns:
-        detailed_df_highest = detailed_df_highest.sort_values("Average", ascending=False)
     if "Average" in detailed_df_mean.columns:
-        detailed_df_mean = detailed_df_mean.sort_values("Average", ascending=False)
     # # Create intervention-averaged DataFrames for both metrics
     # intervention_averaged_highest_df = create_intervention_averaged_df(detailed_df_highest)

     return eval_results
 def aggregate_methods(df: pd.DataFrame) -> pd.DataFrame:
     """
     Aggregates rows with the same base method name by taking the max value for each column.
     # Create the detailed DataFrame for highest accuracy
     highest_records = list(highest_method_groups.values())
+    detailed_df_highest = pd.DataFrame(highest_records).round(3).fillna("-")
     # Process mean accuracy results
     # Group results by method
     # Create the detailed DataFrame for mean accuracy
     mean_records = list(mean_method_groups.values())
+    detailed_df_mean = pd.DataFrame(mean_records).round(3).fillna("-")
     if detailed_df_highest.empty or detailed_df_mean.empty:
         return pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
     score_columns_mean = [col for col in detailed_df_mean.columns if col not in ["eval_name", "Method"]]
     if score_columns_highest:
+        # detailed_df_highest["Average"] = detailed_df_highest[score_columns_highest].mean(axis=1).round(3)
+        # Check if any cell in the row contains '-'
+        has_dash = detailed_df_highest[score_columns_highest].eq('-').any(axis=1)
+        detailed_df_highest['Average'] = '-'
+        mask = ~has_dash
+        if mask.any():
+            numeric_values = detailed_df_highest.loc[mask, score_columns_highest].apply(pd.to_numeric)
+            detailed_df_highest.loc[mask, 'Average'] = numeric_values.mean(axis=1).round(3)
     if score_columns_mean:
+        # detailed_df_mean["Average"] = detailed_df_mean[score_columns_mean].mean(axis=1).round(3)
+        has_dash = detailed_df_mean[score_columns_mean].eq('-').any(axis=1)
+        detailed_df_mean['Average'] = '-'
+        mask = ~has_dash
+        if mask.any():
+            numeric_values = detailed_df_mean.loc[mask, score_columns_mean].apply(pd.to_numeric)
+            detailed_df_mean.loc[mask, 'Average'] = numeric_values.mean(axis=1).round(3)
     # Sort by Average descending
     if "Average" in detailed_df_highest.columns:
+        # Convert '-' to NaN for sorting purposes
+        detailed_df_highest['Average'] = pd.to_numeric(detailed_df_highest['Average'], errors='coerce')
+        detailed_df_highest = detailed_df_highest.sort_values(by=['Average'], ascending=False, na_position='last')
+        # Convert NaN back to '-'
+        detailed_df_highest['Average'] = detailed_df_highest['Average'].fillna('-')
+        # detailed_df_highest = detailed_df_highest.sort_values("Average", ascending=False).round(3)
     if "Average" in detailed_df_mean.columns:
+        detailed_df_mean['Average'] = pd.to_numeric(detailed_df_mean['Average'], errors='coerce')
+        detailed_df_mean = detailed_df_mean.sort_values(by=['Average'], ascending=False, na_position='last')
+        # Convert NaN back to '-'
+        detailed_df_mean['Average'] = detailed_df_mean['Average'].fillna('-')
+        # detailed_df_mean = detailed_df_mean.sort_values("Average", ascending=False).round(3)
     # # Create intervention-averaged DataFrames for both metrics
     # intervention_averaged_highest_df = create_intervention_averaged_df(detailed_df_highest)

src/populate.py CHANGED Viewed

@@ -34,7 +34,6 @@ def get_leaderboard_df_mib_subgraph(results_path: str, cols: list, benchmark_col
 def aggregate_methods(df: pd.DataFrame) -> pd.DataFrame:
     """Aggregates rows with the same base method name by taking the max value for each column"""
     df_copy = df.copy()
@@ -139,11 +138,13 @@ def get_leaderboard_df_mib_causalgraph(results_path: str) -> Tuple[pd.DataFrame,
     return detailed_df, aggregated_df, intervention_averaged_df
-def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
     """Creates the different dataframes for the evaluation queues requests"""
     entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
     all_evals = []
     for entry in entries:
         if ".json" in entry:
             file_path = os.path.join(save_path, entry)
@@ -175,7 +176,7 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
     finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL" or e["status"] == "FAILED"]
     for list in (pending_list, finished_list):
         for item in list:
-            item["track"] = "Circuit Localization"
             item["hf_repo"] = parse_huggingface_url(item["hf_repo"])[0]
     df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
     df_finished = pd.DataFrame.from_records(finished_list, columns=cols)

 def aggregate_methods(df: pd.DataFrame) -> pd.DataFrame:
     """Aggregates rows with the same base method name by taking the max value for each column"""
     df_copy = df.copy()
     return detailed_df, aggregated_df, intervention_averaged_df
+def get_evaluation_queue_df(save_path: str, cols: list, track: str) -> list[pd.DataFrame]:
     """Creates the different dataframes for the evaluation queues requests"""
     entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
     all_evals = []
+    print(track)
     for entry in entries:
         if ".json" in entry:
             file_path = os.path.join(save_path, entry)
     finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL" or e["status"] == "FAILED"]
     for list in (pending_list, finished_list):
         for item in list:
+            item["track"] = track
             item["hf_repo"] = parse_huggingface_url(item["hf_repo"])[0]
     df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
     df_finished = pd.DataFrame.from_records(finished_list, columns=cols)

src/submission/check_validity.py CHANGED Viewed

@@ -8,7 +8,7 @@ import gradio as gr
 from urllib.parse import urlparse
 from collections import defaultdict
 from datetime import datetime, timedelta, timezone
-from typing import Literal
 from huggingface_hub import HfApi, HfFileSystem, hf_hub_url, get_hf_file_metadata
 from huggingface_hub import ModelCard
@@ -19,6 +19,162 @@ from transformers.models.auto.tokenization_auto import AutoTokenizer
 from src.display.utils import TEXT_TASKS, VISION_TASKS, NUM_EXPECTED_EXAMPLES
 from src.envs import EVAL_REQUESTS_SUBGRAPH, EVAL_REQUESTS_CAUSALGRAPH
 def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
     """Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
@@ -260,7 +416,7 @@ def parse_huggingface_url(url: str):
     return repo_id, folder_path, revision
-def validate_directory(fs: HfFileSystem, repo_id: str, dirname: str, curr_tm: str, circuit_level:Literal['edge', 'node','neuron']='edge'):
     errors = []
     warnings = []
@@ -307,9 +463,6 @@ def verify_circuit_submission(hf_repo, level, progress=gr.Progress()):
         "arc-challenge_llama3"
     ]
-    TASKS = ["ioi", "mcqa", "arithmetic-addition", "arithmetic-subtraction", "arc-easy", "arc-challenge"]
-    MODELS = ["gpt2", "qwen2.5", "gemma2", "llama3", "interpbench"]
     errors = []
     warnings = []
@@ -321,10 +474,9 @@ def verify_circuit_submission(hf_repo, level, progress=gr.Progress()):
     path = hf_repo
     level = level
-    repo_id, folder_path, revision = parse_huggingface_url(hf_repo)
-    folder_path = repo_id + "/" + folder_path
     try:
         files = fs.listdir(folder_path, revision=revision)
     except Exception as e:
         errors.append(f"Could not open Huggingface URL: {e}")
@@ -361,7 +513,7 @@ def verify_circuit_submission(hf_repo, level, progress=gr.Progress()):
         # Parse circuits directory
         print(f"validating {circuit_dir}")
-        vd_errors, vd_warnings = validate_directory(fs, repo_id, circuit_dir, curr_tm, level)
         errors.extend(vd_errors)
         warnings.extend(vd_warnings)
@@ -390,5 +542,184 @@ def verify_circuit_submission(hf_repo, level, progress=gr.Progress()):
     return errors, warnings
-def verify_causal_variable_submission(hf_repo, layer, position, code_upload):
-    return

 from urllib.parse import urlparse
 from collections import defaultdict
 from datetime import datetime, timedelta, timezone
+from typing import Literal, Tuple, Union
 from huggingface_hub import HfApi, HfFileSystem, hf_hub_url, get_hf_file_metadata
 from huggingface_hub import ModelCard
 from src.display.utils import TEXT_TASKS, VISION_TASKS, NUM_EXPECTED_EXAMPLES
 from src.envs import EVAL_REQUESTS_SUBGRAPH, EVAL_REQUESTS_CAUSALGRAPH
+TASKS = ["ioi", "mcqa", "arithmetic-addition", "arithmetic-subtraction", "arc-easy", "arc-challenge"]
+MODELS = ["gpt2", "qwen2.5", "gemma2", "llama3", "interpbench"]
+class FeaturizerValidator:
+    def __init__(self, base_featurizer_class):
+        self.base_featurizer_class = base_featurizer_class
+        self.featurizer_class_name = None
+        # torch.nn.Module
+        self.module_value, self.module_attr = "torch", "Module"
+        self.featurizer_module_class_name_1 = None
+        self.featurizer_module_class_name_2 = None
+    def find_featurizer_subclass(self, module_path: str) -> Tuple[bool, Union[str, None]]:
+        """
+        Finds the first class in the module that inherits from Featurizer.
+        Args:
+            module_path: Path to the uploaded Python file
+        Returns:
+            Tuple of (success, class_name, message)
+        """
+        # First try with AST for safety
+        try:
+            with open(module_path, 'r') as file:
+                tree = ast.parse(file.read(), filename=module_path)
+            for node in ast.walk(tree):
+                if isinstance(node, ast.ClassDef):
+                    for base in node.bases:
+                        if isinstance(base, ast.Name) and base.id == self.base_featurizer_class.__name__:
+                            return True, node.name, f"Found class '{node.name}' that inherits from {self.base_featurizer_class.__name__}"
+            return False, None, f"No class inheriting from {self.base_featurizer_class.__name__} found"
+        except Exception as e:
+            return False, None, f"Error during static analysis: {str(e)}"
+    def find_featurizer_module_classes(self, module_path: str) -> Tuple[bool, Union[str, None]]:
+        try:
+            with open(module_path, 'r') as file:
+                tree = ast.parse(file.read(), filename=module_path)
+            for node in ast.walk(tree):
+                if isinstance(node, ast.ClassDef):
+                    for base in node.bases:
+                        if (isinstance(base, ast.Attribute) and base.attr == self.module_attr):
+                            if self.featurizer_module_class_name_1 is None:
+                                self.featurizer_module_class_name_1 = node.name
+                            else:
+                                self.featurizer_module_class_name_2 = node.name
+                                return True, f"Found two featurizer modules: {self.featurizer_module_class_name_1}, {self.featurizer_module_class_name_2}"
+            if self.featurizer_module_class_name_1:
+                return True, f"Found one featurizer module: {self.featurizer_module_class_name_1}"
+            return False, f"Found no featurizer modules."
+        except Exception as e:
+            return False, f"Error during static analysis: {e}"
+    def validate_uploaded_module(self, module_path: str) -> Tuple[bool, str]:
+        """
+        Validates an uploaded module to ensure it properly extends the Featurizer class.
+        Args:
+            module_path: Path to the uploaded Python file
+            class_name: Name of the class to validate
+        Returns:
+            Tuple of (is_valid, message)
+        """
+        # First, find the name of the featurizer class we're verifying
+        found, class_name, message = self.find_featurizer_subclass(module_path)
+        if not found:
+            return False, message
+        else:
+            print("Verified featurizer subclass.")
+        # Second, find the name of the featurizer and inverse featurizer modules
+        modules_found, modules_message = self.find_featurizer_module_classes(module_path)
+        if not modules_found:
+            return False, modules_message
+        else:
+            print(f"Verified featurizer module(s): {modules_message}")
+        # Then, perform static code analysis on the featurizer class for basic safety
+        inheritance_check, ast_message = self._verify_inheritance_with_ast(module_path, class_name)
+        if not inheritance_check:
+            return False, ast_message
+        # Then, try to load and validate the featurizer class
+        return self._verify_inheritance_with_import(module_path, class_name)
+        # TODO: try directly loading featurizer module and inverse featurizer module?
+    def _verify_inheritance_with_ast(self, module_path: str, class_name: str) -> Tuple[bool, str]:
+        """Verify inheritance using AST without executing code"""
+        try:
+            with open(module_path, 'r') as file:
+                tree = ast.parse(file.read(), filename=module_path)
+            # Look for class definitions that match the target class name
+            for node in ast.walk(tree):
+                if isinstance(node, ast.ClassDef) and node.name == class_name:
+                    # Check if any base class name matches 'Featurizer'
+                    for base in node.bases:
+                        if isinstance(base, ast.Name) and base.id == self.base_featurizer_class.__name__:
+                            return True, "Static analysis indicates proper inheritance"
+                    return False, f"Class '{class_name}' does not appear to inherit from {self.base_featurizer_class.__name__}"
+            return False, f"Class '{class_name}' not found in the uploaded module"
+        except Exception as e:
+            return False, f"Error during static analysis: {str(e)}"
+    def _verify_inheritance_with_import(self, module_path: str, class_name: str) -> Tuple[bool, str]:
+        """Safely import the module and verify inheritance using Python's introspection"""
+        try:
+            # Dynamically import the module
+            spec = importlib.util.spec_from_file_location("uploaded_module", module_path)
+            if spec is None or spec.loader is None:
+                return False, "Could not load the module specification"
+            uploaded_module = importlib.util.module_from_spec(spec)
+            spec.loader.exec_module(uploaded_module)
+            # Get the class from the module
+            if not hasattr(uploaded_module, class_name):
+                return False, f"Class '{class_name}' not found in the uploaded module"
+            uploaded_class = getattr(uploaded_module, class_name)
+            # Check if it's a proper subclass
+            if not inspect.isclass(uploaded_class):
+                return False, f"'{class_name}' is not a class"
+            if not issubclass(uploaded_class, self.base_featurizer_class):
+                return False, f"'{class_name}' does not inherit from {self.base_featurizer_class.__name__}"
+            # Optional: Check method resolution order
+            mro = inspect.getmro(uploaded_class)
+            if self.base_featurizer_class not in mro:
+                return False, f"{self.base_featurizer_class.__name__} not in the method resolution order"
+            return True, f"Class '{class_name}' properly extends {self.base_featurizer_class.__name__}"
+        except Exception as e:
+            return False, f"Error during dynamic validation: {str(e)}"
 def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
     """Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
     return repo_id, folder_path, revision
+def validate_directory_circuit(fs: HfFileSystem, repo_id: str, dirname: str, curr_tm: str, circuit_level:Literal['edge', 'node','neuron']='edge'):
     errors = []
     warnings = []
         "arc-challenge_llama3"
     ]
     errors = []
     warnings = []
     path = hf_repo
     level = level
     try:
+        repo_id, folder_path, revision = parse_huggingface_url(hf_repo)
+        folder_path = repo_id + "/" + folder_path
         files = fs.listdir(folder_path, revision=revision)
     except Exception as e:
         errors.append(f"Could not open Huggingface URL: {e}")
         # Parse circuits directory
         print(f"validating {circuit_dir}")
+        vd_errors, vd_warnings = validate_directory_circuit(fs, repo_id, circuit_dir, curr_tm, level)
         errors.extend(vd_errors)
         warnings.extend(vd_warnings)
     return errors, warnings
+def validate_directory_causalgraph(fs: HfFileSystem, repo_id: str, dirname: str):
+    errors = []
+    warnings = []
+    files = fs.ls(dirname)
+    files = [f["name"] for f in files if "_featurizer" in f["name"] or "_indices" in f["name"]]
+    valid_triplet = False
+    offset = 0
+    for idx, file in enumerate(files):
+        file_suffix = file.split(repo_id + "/")[1]
+        file_url = hf_hub_url(repo_id=repo_id, filename=file_suffix)
+        file_info = get_hf_file_metadata(file_url)
+        file_size_mb = file_info.size / (1024 * 1024)
+        if file_size_mb > 150:
+            warnings.append(f"Will skip file >150MB: {file}")
+            offset -= 1
+            continue
+        if idx + offset > 30:
+            warnings.append("Many files in directory; stopping at 30")
+            break
+        if file.endswith("_featurizer") or file.endswith("_indices"):
+            prefix = "_".join(file.split("_")[:-1])
+            this_suffix = "_" + file.split("_")[-1]
+            suffixes = ("_featurizer", "_inverse_featurizer", "_indices")
+            for idx, suffix in enumerate(suffixes):
+                if file.replace(this_suffix, suffix) not in files:
+                    warnings.append(f"For {prefix}, found a {this_suffix} file but no associated {suffix}")
+                    break
+                if idx == len(suffixes) - 1:
+                    valid_triplet = True
+            if valid_triplet:
+                found_submodule = False
+                found_layer = False
+                found_token = False
+                if "residual" or "attention" in prefix.lower():
+                    found_submodule = True
+                if "layer:" in prefix.lower():
+                    found_layer = True
+                if "token:" in prefix.lower():
+                    found_token = True
+                if not found_submodule or not found_layer or not found_token:
+                    errors.append("Could not derive where featurizer should be applied from featurizer filenames.")
+        if valid_triplet:
+            break
+    if not valid_triplet:
+        errors.append("No valid featurizer/inverse featurizer/indices triplets.")
+    return errors, warnings
+def verify_causal_variable_submission(hf_repo, progress=gr.Progress()):
+    CV_TASKS = set(["ioi_task", "4_answer_MCQA", "ARC_easy", "arithmetic", "ravel_task"])
+    CV_TASK_VARIABLES = {"ioi_task": ["output_token", "output_position"],
+                    "4_answer_MCQA": ["answer_pointer", "answer"],
+                    "arc": ["answer_pointer", "answer"],
+                    "arithmetic": ["ones_carry"],
+                    "ravel_task": ["Country", "Continent", "Language"]}
+    CV_MODELS = set(["GPT2LMHeadModel", "Qwen2ForCausalLM", "Gemma2ForCausalLM", "LlamaForCausalLM"])
+    # create pairs of valid task/model combinations
+    CV_VALID_TASK_MODELS = set([("ioi_task", "GPT2LMHeadModel"),
+                         ("ioi_task", "Qwen2ForCausalLM"),
+                         ("ioi_task", "Gemma2ForCausalLM"),
+                         ("ioi_task", "LlamaForCausalLM"),
+                         ("4_answer_MCQA", "Qwen2ForCausalLM"),
+                         ("4_answer_MCQA", "Gemma2ForCausalLM"),
+                         ("4_answer_MCQA", "LlamaForCausalLM"),
+                         ("ARC_easy", "Gemma2ForCausalLM"),
+                         ("ARC_easy", "LlamaForCausalLM"),
+                         ("arithmetic", "Gemma2ForCausalLM"),
+                         ("arithmetic", "LlamaForCausalLM"),
+                         ("ravel_task", "Gemma2ForCausalLM"),
+                         ("ravel_task", "LlamaForCausalLM")])
+    errors = []
+    warnings = []
+    num_py_files = 0
+    directories_present = {tm: False for tm in CV_VALID_TASK_MODELS}
+    directories_valid = {tm: False for tm in CV_VALID_TASK_MODELS}
+    variables_valid = {}
+    fs = HfFileSystem()
+    path = hf_repo
+    try:
+        repo_id, folder_path, revision = parse_huggingface_url(hf_repo)
+        folder_path = repo_id + "/" + folder_path
+        files = fs.listdir(folder_path, revision=revision)
+    except Exception as e:
+        errors.append(f"Could not open Huggingface URL: {e}")
+        return errors, warnings
+    file_counts = 0
+    for file in progress.tqdm(files, desc="Validating files in repo"):
+        filename = file["name"]
+        file_counts += 1
+        if file_counts >= 30:
+            warnings.append("Folder contains many files/directories; stopped at 30.")
+            break
+        if filename.endswith(".py"):
+            num_py_files += 1
+        causalgraph_dir = filename
+        dirname_proc = causalgraph_dir.lower().split("/")[-1]
+        if not fs.isdir(causalgraph_dir):
+            continue
+        curr_task = None
+        curr_model = None
+        curr_variable = None
+        # Look for task names in filename
+        for task in CV_TASKS:
+            if dirname_proc.startswith(task.lower()) or f"_{task.lower()}" in dirname_proc:
+                curr_task = task
+                if curr_task not in variables_valid:
+                    variables_valid[curr_task] = {v: False for v in CV_TASK_VARIABLES[curr_task]}
+                for variable in CV_TASK_VARIABLES[curr_task]:
+                    if dirname_proc.startswith(variable.lower()) or f"_{variable.lower()}" in dirname_proc or f"_{variable.lower().replace('_', '-')}" in dirname_proc:
+                        curr_variable = variable
+                        break
+        # Look for model names in filename
+        for model in CV_MODELS:
+            if dirname_proc.startswith(model.lower()) or f"_{model.lower()}" in dirname_proc:
+                curr_model = model
+        if curr_task is not None and curr_model is not None and curr_variable is not None:
+            curr_tm = (curr_task, curr_model)
+            if curr_tm in CV_VALID_TASK_MODELS:
+                directories_present[curr_tm] = True
+            else:
+                continue
+        else:
+            continue
+        print(f"validating {causalgraph_dir}")
+        vd_errors, vd_warnings = validate_directory_causalgraph(fs, repo_id, causalgraph_dir)
+        errors.extend(vd_errors)
+        warnings.extend(vd_warnings)
+        if len(vd_errors) == 0:
+            directories_valid[curr_tm] = True
+            variables_valid[curr_task][curr_variable] = True
+    if num_py_files == 0:
+        errors.append("No featurizer .py file detected in root of provided repo.")
+    elif num_py_files == 1:
+        errors.append("Found one .py script, but expected two: one for the featurizer, and another for the token position functions.")
+    task_set, model_set = set(), set()
+    for tm in directories_present:
+        if not directories_present[tm]:
+            continue
+        if not directories_valid[tm]:
+            warnings.append(f"Directory found for {tm[0]}/{tm[1]}, but contents not valid")
+            continue
+    for tm in directories_valid:
+        if directories_valid[tm]:
+            task, model = tm
+            task_set.add(task)
+            model_set.add(model)
+    if len(task_set) == 0 or len(model_set) == 0:
+        errors.append("No valid directories found for any task/model.")
+    # no_tm_display = [f"{tm[0]}/{tm[1]}" for tm in directories_valid if not directories_valid[tm]]
+    # if len(no_tm_display) > 0:
+    #     warnings.append(f"No valid submission found for the following tasks/models: {*no_tm_display,}")
+    for task in variables_valid:
+        found_variable_display = [v for v in variables_valid[task] if variables_valid[task][v]]
+        no_variable_display = [v for v in variables_valid[task] if not variables_valid[task][v]]
+        if no_variable_display:
+            warnings.append(f"For {task}, found variables {*found_variable_display,}, but not variables {*no_variable_display,}")
+    return errors, warnings

src/submission/submit.py CHANGED Viewed

@@ -17,7 +17,7 @@ import gradio as gr
 REQUESTED_MODELS = None
 USERS_TO_SUBMISSION_DATES = None
-def upload_to_queue(track, hf_repo_circ, hf_repo_cg, level, layer, token_position, code_upload, method_name, contact_email, _id):
     errors = []
     hf_repo = hf_repo_circ if "Circuit" in track else hf_repo_cg
     repo_id, folder_path, revision = parse_huggingface_url(hf_repo)
@@ -53,9 +53,6 @@ def upload_to_queue(track, hf_repo_circ, hf_repo_cg, level, layer, token_positio
                 "hf_repo": hf_repo,
                 "user_name": user_name,
                 "revision": commit_hash,
-                "layer": layer,
-                "token_position": token_position,
-                "code_upload": code_upload,
                 "method_name": method_name,
                 "contact_email": contact_email.lower(),
                 "submit_time": current_time,
@@ -65,7 +62,6 @@ def upload_to_queue(track, hf_repo_circ, hf_repo_cg, level, layer, token_positio
             QUEUE_REPO = QUEUE_REPO_CAUSALGRAPH
             EVAL_REQUESTS = EVAL_REQUESTS_CAUSALGRAPH
         OUT_DIR = f"{EVAL_REQUESTS}/"
         os.makedirs(OUT_DIR, exist_ok=True)
         out_path = f"{OUT_DIR}/{method_name}_{_id}_{current_time}.json"
@@ -84,7 +80,7 @@ def upload_to_queue(track, hf_repo_circ, hf_repo_cg, level, layer, token_positio
             errors.append(f"Could not upload entry to eval queue: {e}")
     if errors:
-        status = gr.Textbox("\n".join(f"❌ {e}" for e in errors), visible=True)
     else:
         status = gr.Textbox(f"✅ Submission received! Your submission ID is \"{_id}\". Save this so that you can manage your submission on the queue.", visible=True)
     return [

 REQUESTED_MODELS = None
 USERS_TO_SUBMISSION_DATES = None
+def upload_to_queue(track, hf_repo_circ, hf_repo_cg, level, method_name, contact_email, _id):
     errors = []
     hf_repo = hf_repo_circ if "Circuit" in track else hf_repo_cg
     repo_id, folder_path, revision = parse_huggingface_url(hf_repo)
                 "hf_repo": hf_repo,
                 "user_name": user_name,
                 "revision": commit_hash,
                 "method_name": method_name,
                 "contact_email": contact_email.lower(),
                 "submit_time": current_time,
             QUEUE_REPO = QUEUE_REPO_CAUSALGRAPH
             EVAL_REQUESTS = EVAL_REQUESTS_CAUSALGRAPH
         OUT_DIR = f"{EVAL_REQUESTS}/"
         os.makedirs(OUT_DIR, exist_ok=True)
         out_path = f"{OUT_DIR}/{method_name}_{_id}_{current_time}.json"
             errors.append(f"Could not upload entry to eval queue: {e}")
     if errors:
+        status = gr.Textbox("\n\n".join(f"❌ {e}" for e in errors), visible=True)
     else:
         status = gr.Textbox(f"✅ Submission received! Your submission ID is \"{_id}\". Save this so that you can manage your submission on the queue.", visible=True)
     return [