Spaces:

mib-bench
/

leaderboard

Restarting

App Files Files Community

Aaron Mueller commited on May 21

Commit

33ddef9

1 Parent(s): a5eab2c

format update to submission page

Browse files

Files changed (4) hide show

app.py +24 -23
env.yml +205 -0
src/display/css_html_js.py +6 -1
src/leaderboard/read_evals.py +2 -4

app.py CHANGED Viewed

@@ -702,30 +702,31 @@ with demo:
                 elem_id="track_selector"
             )
-            with gr.Group(visible=False) as circuit_ui:
-                with gr.Column():
-                    with gr.Row():
-                        gr.Markdown(EVALUATION_QUEUE_TEXT_SUBGRAPH, elem_classes="markdown-text")
-                    with gr.Row():
-                        hf_repo_circ = gr.Textbox(
-                            label="HuggingFace Repository URL",
-                            placeholder="https://huggingface.co/username/repo/path",
-                            info="Must be a valid HuggingFace URL pointing to folders containing either 1 importance score file per task/model, or " \
-                                "9 circuit files per task/model (.json or .pt)."
-                        )
-                        level = gr.Radio(
-                            choices=[
-                                "Edge",
-                                "Node (submodule)",
-                                "Node (neuron)"
-                            ],
-                            label="Level of granularity",
-                            info="Is your circuit defined by its inclusion/exclusion of certain edges (e.g., MLP1 to H10L12), of certain submodules (e.g., MLP1), or of neurons " \
-                                "within those submodules (e.g., MLP1 neuron 295)?"
-                        )
-            with gr.Group(visible=False) as causal_ui:
                 gr.Markdown(EVALUATION_QUEUE_TEXT_CAUSALVARIABLE, elem_classes="markdown-text")
                 with gr.Row():
                     layer = gr.Number(
@@ -768,7 +769,7 @@ with demo:
             track.change(toggle_ui, track, [circuit_ui, causal_ui])
             # Submission handling
-            status = gr.Textbox(label="Submission Status", visible=True)
             def handle_submission(track, hf_repo_circ, hf_repo_cg, level, layer, token_position, code_upload, method_name, contact_email):
                 errors = []

                 elem_id="track_selector"
             )
+            # with gr.Group(visible=False) as circuit_ui:
+            with gr.Column(visible=False, elem_id="bordered-column") as circuit_ui:
+                with gr.Row():
+                    gr.Markdown(EVALUATION_QUEUE_TEXT_SUBGRAPH, elem_classes="markdown-text")
+                with gr.Row():
+                    hf_repo_circ = gr.Textbox(
+                        label="HuggingFace Repository URL",
+                        placeholder="https://huggingface.co/username/repo/path",
+                        info="Must be a valid HuggingFace URL pointing to folders containing either 1 importance score file per task/model, or " \
+                            "9 circuit files per task/model (.json or .pt)."
+                    )
+                    level = gr.Radio(
+                        choices=[
+                            "Edge",
+                            "Node (submodule)",
+                            "Node (neuron)"
+                        ],
+                        label="Level of granularity",
+                        info="Is your circuit defined by its inclusion/exclusion of certain edges (e.g., MLP1 to H10L12), of certain submodules (e.g., MLP1), or of neurons " \
+                            "within those submodules (e.g., MLP1 neuron 295)?"
+                    )
+            # with gr.Group(visible=False) as causal_ui:
+            with gr.Column(visible=False, elem_id="bordered-column") as causal_ui:
                 gr.Markdown(EVALUATION_QUEUE_TEXT_CAUSALVARIABLE, elem_classes="markdown-text")
                 with gr.Row():
                     layer = gr.Number(
             track.change(toggle_ui, track, [circuit_ui, causal_ui])
             # Submission handling
+            status = gr.Textbox(label="Submission Status", visible=False)
             def handle_submission(track, hf_repo_circ, hf_repo_cg, level, layer, token_position, code_upload, method_name, contact_email):
                 errors = []

env.yml ADDED Viewed

	@@ -0,0 +1,205 @@

+name: concepts
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - appnope=0.1.4=pyhd8ed1ab_0
+  - asttokens=2.4.1=pyhd8ed1ab_0
+  - bzip2=1.0.8=h620ffc9_4
+  - ca-certificates=2024.2.2=hf0a4a13_0
+  - comm=0.2.2=pyhd8ed1ab_0
+  - debugpy=1.6.7=py310h313beb8_0
+  - decorator=5.1.1=pyhd8ed1ab_0
+  - exceptiongroup=1.2.0=pyhd8ed1ab_2
+  - executing=2.0.1=pyhd8ed1ab_0
+  - importlib-metadata=7.1.0=pyha770c72_0
+  - importlib_metadata=7.1.0=hd8ed1ab_0
+  - ipykernel=6.29.3=pyh3cd1d5f_0
+  - ipython=8.24.0=pyh707e725_0
+  - jedi=0.19.1=pyhd8ed1ab_0
+  - jupyter_client=8.6.1=pyhd8ed1ab_0
+  - jupyter_core=5.5.0=py310hca03da5_0
+  - krb5=1.21.2=h92f50d5_0
+  - libcxx=17.0.6=h5f092b4_0
+  - libedit=3.1.20191231=hc8eb9b7_2
+  - libffi=3.4.4=hca03da5_0
+  - libsodium=1.0.18=h27ca646_1
+  - matplotlib-inline=0.1.7=pyhd8ed1ab_0
+  - ncurses=6.4=h313beb8_0
+  - nest-asyncio=1.6.0=pyhd8ed1ab_0
+  - openssl=3.3.0=h0d3ecfb_0
+  - parso=0.8.4=pyhd8ed1ab_0
+  - pexpect=4.9.0=pyhd8ed1ab_0
+  - pickleshare=0.7.5=py_1003
+  - pip=23.3.1=py310hca03da5_0
+  - platformdirs=4.2.2=pyhd8ed1ab_0
+  - prompt-toolkit=3.0.42=pyha770c72_0
+  - psutil=5.9.0=py310h1a28f6b_0
+  - ptyprocess=0.7.0=pyhd3deb0d_0
+  - pure_eval=0.2.2=pyhd8ed1ab_0
+  - pygments=2.18.0=pyhd8ed1ab_0
+  - python=3.10.13=hb885b13_0
+  - pyzmq=25.1.2=py310h313beb8_0
+  - readline=8.2=h1a28f6b_0
+  - six=1.16.0=pyh6c4a22f_0
+  - sqlite=3.41.2=h80987f9_0
+  - stack_data=0.6.2=pyhd8ed1ab_0
+  - tk=8.6.12=hb8d0fd4_0
+  - tornado=6.3.3=py310h80987f9_0
+  - traitlets=5.14.3=pyhd8ed1ab_0
+  - wcwidth=0.2.13=pyhd8ed1ab_0
+  - wheel=0.41.2=py310hca03da5_0
+  - xz=5.4.5=h80987f9_0
+  - zeromq=4.3.5=hcc0f68c_4
+  - zipp=3.17.0=pyhd8ed1ab_0
+  - zlib=1.2.13=h5a0b063_0
+  - pip:
+    - absl-py==2.1.0
+    - aiofiles==23.2.1
+    - aiohttp==3.9.5
+    - aiosignal==1.3.1
+    - annotated-types==0.6.0
+    - anyio==4.3.0
+    - api==0.0.7
+    - apscheduler==3.11.0
+    - arxiv-latex-cleaner==1.0.8
+    - async-timeout==4.0.3
+    - attrs==23.2.0
+    - beautifulsoup4==4.12.3
+    - better-abc==0.0.3
+    - blis==1.3.0
+    - bs4==0.0.2
+    - catalogue==2.0.10
+    - certifi==2023.11.17
+    - chardet==3.0.4
+    - charset-normalizer==3.3.2
+    - click==8.1.7
+    - cloudpathlib==0.21.0
+    - confection==0.1.5
+    - contourpy==1.2.1
+    - cycler==0.12.1
+    - cymem==2.0.11
+    - datasets==2.20.0
+    - deprecated==1.2.14
+    - dill==0.3.8
+    - distro==1.9.0
+    - docker-pycreds==0.4.0
+    - einops==0.8.0
+    - en-core-web-sm==3.8.0
+    - fastapi==0.115.5
+    - ffmpy==0.4.0
+    - filelock==3.13.1
+    - fonttools==4.51.0
+    - frozenlist==1.4.1
+    - fsspec==2023.12.2
+    - future==1.0.0
+    - gitdb==4.0.12
+    - gitpython==3.1.44
+    - gradio==5.6.0
+    - gradio-client==1.4.3
+    - gradio-leaderboard==0.0.13
+    - h11==0.14.0
+    - httpcore==1.0.5
+    - httpx==0.27.0
+    - huggingface-hub==0.26.2
+    - idna==2.8
+    - inflect==6.2.0
+    - jaxtyping==0.2.36
+    - jinja2==3.0.2
+    - joblib==1.3.2
+    - jsonlines==4.0.0
+    - kiwisolver==1.4.5
+    - langcodes==3.5.0
+    - language-data==1.3.0
+    - languages==1.0.0
+    - lxml==5.2.1
+    - mailjet==1.4.1
+    - mailjet-rest==1.3.4
+    - marisa-trie==1.2.1
+    - markdown==3.7
+    - markdown-it-py==3.0.0
+    - markupsafe==2.1.5
+    - matplotlib==3.9.0
+    - mdurl==0.1.2
+    - minicons==0.2.18
+    - multidict==6.0.5
+    - multiprocess==0.70.16
+    - murmurhash==1.0.12
+    - nlopt==2.9.0
+    - nltk==3.8.1
+    - nose==1.3.7
+    - numpy==1.26.4
+    - openai==1.30.1
+    - openreview-py==1.44.3
+    - orjson==3.10.11
+    - packaging==23.2
+    - pandas==1.5.3
+    - pathlib==1.0.1
+    - pillow==10.3.0
+    - preshed==3.0.9
+    - protobuf==5.28.3
+    - pyarrow==16.1.0
+    - pyarrow-hotfix==0.6
+    - pycryptodome==3.21.0
+    - pydantic==2.10.5
+    - pydantic-core==2.27.2
+    - pydub==0.25.1
+    - pyjwt==2.9.0
+    - pylatexenc==2.10
+    - pyparsing==3.1.2
+    - pypdf2==1.26.0
+    - python-dateutil==2.8.2
+    - python-multipart==0.0.12
+    - pytz==2023.3.post1
+    - pyyaml==5.3.1
+    - regex==2023.12.25
+    - requests==2.32.3
+    - rich==13.9.4
+    - roman==3.3
+    - ruff==0.8.0
+    - safehttpx==0.1.1
+    - safetensors==0.4.1
+    - scikit-learn==1.5.2
+    - scipy==1.14.1
+    - seaborn==0.13.2
+    - semantic-memory==0.1.9
+    - semantic-version==2.10.0
+    - sentencepiece==0.2.0
+    - sentry-sdk==2.20.0
+    - setproctitle==1.3.4
+    - setuptools==65.5.1
+    - shellingham==1.5.4
+    - smart-open==7.1.0
+    - smmap==5.0.2
+    - sniffio==1.3.1
+    - soupsieve==2.5
+    - spacy==3.8.5
+    - spacy-legacy==3.0.12
+    - spacy-loggers==1.0.5
+    - srsly==2.5.1
+    - starlette==0.41.3
+    - termcolor==2.4.0
+    - thinc==8.3.6
+    - threadpoolctl==3.5.0
+    - tld==0.13
+    - tokenizers==0.20.3
+    - tomlkit==0.12.0
+    - torch==1.13.1
+    - tqdm==4.66.4
+    - transformer-lens==2.11.0
+    - transformers==4.46.3
+    - typeguard==4.4.1
+    - typer==0.13.1
+    - typing-extensions==4.12.2
+    - tzdata==2025.2
+    - tzlocal==5.2
+    - urllib3==2.3.0
+    - uvicorn==0.32.1
+    - wandb==0.19.4
+    - wasabi==1.1.3
+    - weasel==0.4.1
+    - websockets==12.0
+    - wrapt==1.16.0
+    - xxhash==3.4.1
+    - yarl==1.9.4
+prefix: /Users/aaronmueller/miniconda3/envs/concepts

src/display/css_html_js.py CHANGED Viewed

@@ -94,6 +94,11 @@ custom_css = """
 #box-filter > .form{
     border: 0
 }
 """
 get_window_url_params = """
@@ -102,4 +107,4 @@ get_window_url_params = """
         url_params = Object.fromEntries(params);
         return url_params;
     }
-    """

 #box-filter > .form{
     border: 0
 }
+#bordered-column {
+        border: 1px solid;
+        border-radius: 8px;
+        padding: 16px;
+    }
 """
 get_window_url_params = """
         url_params = Object.fromEntries(params);
         return url_params;
     }
+    """

src/leaderboard/read_evals.py CHANGED Viewed

@@ -22,7 +22,7 @@ def compute_area(edge_counts, faithfulnesses):
     # Return None if either list is empty
     if not edge_counts or not faithfulnesses:
         return None, None, None
     percentages = [e / max(edge_counts) for e in edge_counts]
     area_under = 0.
     area_from_100 = 0.
@@ -327,8 +327,7 @@ class EvalResult_MIB_CAUSALGRAPH:
         results = {}
         for task in ["IOI", "MCQA", "arithmetic", "ARC-easy"]:
             results[task] = {}
-        print(f"Processing file: {json_filepath}")
         # Process each model's results
         for result in data.get("results", []):
             model_id = result.get("model_id", "")
@@ -350,7 +349,6 @@ class EvalResult_MIB_CAUSALGRAPH:
                         intervention_key = '_'.join(intervention_data['intervention'])
                         intervention_scores[intervention_key].append(avg_cf_score)
-                        print(f"intervention_key is {intervention_key}, avg_cf_score is {avg_cf_score}")
                 # Average across layers for each intervention
                 results[task][model_name] = {

     # Return None if either list is empty
     if not edge_counts or not faithfulnesses:
         return None, None, None
     percentages = [e / max(edge_counts) for e in edge_counts]
     area_under = 0.
     area_from_100 = 0.
         results = {}
         for task in ["IOI", "MCQA", "arithmetic", "ARC-easy"]:
             results[task] = {}
         # Process each model's results
         for result in data.get("results", []):
             model_id = result.get("model_id", "")
                         intervention_key = '_'.join(intervention_data['intervention'])
                         intervention_scores[intervention_key].append(avg_cf_score)
                 # Average across layers for each intervention
                 results[task][model_name] = {