Spaces:
Restarting
Restarting
Aaron Mueller
commited on
Commit
·
33ddef9
1
Parent(s):
a5eab2c
format update to submission page
Browse files- app.py +24 -23
- env.yml +205 -0
- src/display/css_html_js.py +6 -1
- src/leaderboard/read_evals.py +2 -4
app.py
CHANGED
@@ -702,30 +702,31 @@ with demo:
|
|
702 |
elem_id="track_selector"
|
703 |
)
|
704 |
|
705 |
-
with gr.Group(visible=False) as circuit_ui:
|
706 |
-
|
707 |
-
|
708 |
-
|
709 |
|
710 |
-
|
711 |
-
|
712 |
-
|
713 |
-
|
714 |
-
|
715 |
-
|
716 |
-
|
717 |
-
|
718 |
-
|
719 |
-
|
720 |
-
|
721 |
-
|
722 |
-
|
723 |
-
|
724 |
-
|
725 |
-
|
726 |
-
|
727 |
|
728 |
-
with gr.Group(visible=False) as causal_ui:
|
|
|
729 |
gr.Markdown(EVALUATION_QUEUE_TEXT_CAUSALVARIABLE, elem_classes="markdown-text")
|
730 |
with gr.Row():
|
731 |
layer = gr.Number(
|
@@ -768,7 +769,7 @@ with demo:
|
|
768 |
track.change(toggle_ui, track, [circuit_ui, causal_ui])
|
769 |
|
770 |
# Submission handling
|
771 |
-
status = gr.Textbox(label="Submission Status", visible=
|
772 |
|
773 |
def handle_submission(track, hf_repo_circ, hf_repo_cg, level, layer, token_position, code_upload, method_name, contact_email):
|
774 |
errors = []
|
|
|
702 |
elem_id="track_selector"
|
703 |
)
|
704 |
|
705 |
+
# with gr.Group(visible=False) as circuit_ui:
|
706 |
+
with gr.Column(visible=False, elem_id="bordered-column") as circuit_ui:
|
707 |
+
with gr.Row():
|
708 |
+
gr.Markdown(EVALUATION_QUEUE_TEXT_SUBGRAPH, elem_classes="markdown-text")
|
709 |
|
710 |
+
with gr.Row():
|
711 |
+
hf_repo_circ = gr.Textbox(
|
712 |
+
label="HuggingFace Repository URL",
|
713 |
+
placeholder="https://huggingface.co/username/repo/path",
|
714 |
+
info="Must be a valid HuggingFace URL pointing to folders containing either 1 importance score file per task/model, or " \
|
715 |
+
"9 circuit files per task/model (.json or .pt)."
|
716 |
+
)
|
717 |
+
level = gr.Radio(
|
718 |
+
choices=[
|
719 |
+
"Edge",
|
720 |
+
"Node (submodule)",
|
721 |
+
"Node (neuron)"
|
722 |
+
],
|
723 |
+
label="Level of granularity",
|
724 |
+
info="Is your circuit defined by its inclusion/exclusion of certain edges (e.g., MLP1 to H10L12), of certain submodules (e.g., MLP1), or of neurons " \
|
725 |
+
"within those submodules (e.g., MLP1 neuron 295)?"
|
726 |
+
)
|
727 |
|
728 |
+
# with gr.Group(visible=False) as causal_ui:
|
729 |
+
with gr.Column(visible=False, elem_id="bordered-column") as causal_ui:
|
730 |
gr.Markdown(EVALUATION_QUEUE_TEXT_CAUSALVARIABLE, elem_classes="markdown-text")
|
731 |
with gr.Row():
|
732 |
layer = gr.Number(
|
|
|
769 |
track.change(toggle_ui, track, [circuit_ui, causal_ui])
|
770 |
|
771 |
# Submission handling
|
772 |
+
status = gr.Textbox(label="Submission Status", visible=False)
|
773 |
|
774 |
def handle_submission(track, hf_repo_circ, hf_repo_cg, level, layer, token_position, code_upload, method_name, contact_email):
|
775 |
errors = []
|
env.yml
ADDED
@@ -0,0 +1,205 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: concepts
|
2 |
+
channels:
|
3 |
+
- conda-forge
|
4 |
+
- defaults
|
5 |
+
dependencies:
|
6 |
+
- appnope=0.1.4=pyhd8ed1ab_0
|
7 |
+
- asttokens=2.4.1=pyhd8ed1ab_0
|
8 |
+
- bzip2=1.0.8=h620ffc9_4
|
9 |
+
- ca-certificates=2024.2.2=hf0a4a13_0
|
10 |
+
- comm=0.2.2=pyhd8ed1ab_0
|
11 |
+
- debugpy=1.6.7=py310h313beb8_0
|
12 |
+
- decorator=5.1.1=pyhd8ed1ab_0
|
13 |
+
- exceptiongroup=1.2.0=pyhd8ed1ab_2
|
14 |
+
- executing=2.0.1=pyhd8ed1ab_0
|
15 |
+
- importlib-metadata=7.1.0=pyha770c72_0
|
16 |
+
- importlib_metadata=7.1.0=hd8ed1ab_0
|
17 |
+
- ipykernel=6.29.3=pyh3cd1d5f_0
|
18 |
+
- ipython=8.24.0=pyh707e725_0
|
19 |
+
- jedi=0.19.1=pyhd8ed1ab_0
|
20 |
+
- jupyter_client=8.6.1=pyhd8ed1ab_0
|
21 |
+
- jupyter_core=5.5.0=py310hca03da5_0
|
22 |
+
- krb5=1.21.2=h92f50d5_0
|
23 |
+
- libcxx=17.0.6=h5f092b4_0
|
24 |
+
- libedit=3.1.20191231=hc8eb9b7_2
|
25 |
+
- libffi=3.4.4=hca03da5_0
|
26 |
+
- libsodium=1.0.18=h27ca646_1
|
27 |
+
- matplotlib-inline=0.1.7=pyhd8ed1ab_0
|
28 |
+
- ncurses=6.4=h313beb8_0
|
29 |
+
- nest-asyncio=1.6.0=pyhd8ed1ab_0
|
30 |
+
- openssl=3.3.0=h0d3ecfb_0
|
31 |
+
- parso=0.8.4=pyhd8ed1ab_0
|
32 |
+
- pexpect=4.9.0=pyhd8ed1ab_0
|
33 |
+
- pickleshare=0.7.5=py_1003
|
34 |
+
- pip=23.3.1=py310hca03da5_0
|
35 |
+
- platformdirs=4.2.2=pyhd8ed1ab_0
|
36 |
+
- prompt-toolkit=3.0.42=pyha770c72_0
|
37 |
+
- psutil=5.9.0=py310h1a28f6b_0
|
38 |
+
- ptyprocess=0.7.0=pyhd3deb0d_0
|
39 |
+
- pure_eval=0.2.2=pyhd8ed1ab_0
|
40 |
+
- pygments=2.18.0=pyhd8ed1ab_0
|
41 |
+
- python=3.10.13=hb885b13_0
|
42 |
+
- pyzmq=25.1.2=py310h313beb8_0
|
43 |
+
- readline=8.2=h1a28f6b_0
|
44 |
+
- six=1.16.0=pyh6c4a22f_0
|
45 |
+
- sqlite=3.41.2=h80987f9_0
|
46 |
+
- stack_data=0.6.2=pyhd8ed1ab_0
|
47 |
+
- tk=8.6.12=hb8d0fd4_0
|
48 |
+
- tornado=6.3.3=py310h80987f9_0
|
49 |
+
- traitlets=5.14.3=pyhd8ed1ab_0
|
50 |
+
- wcwidth=0.2.13=pyhd8ed1ab_0
|
51 |
+
- wheel=0.41.2=py310hca03da5_0
|
52 |
+
- xz=5.4.5=h80987f9_0
|
53 |
+
- zeromq=4.3.5=hcc0f68c_4
|
54 |
+
- zipp=3.17.0=pyhd8ed1ab_0
|
55 |
+
- zlib=1.2.13=h5a0b063_0
|
56 |
+
- pip:
|
57 |
+
- absl-py==2.1.0
|
58 |
+
- aiofiles==23.2.1
|
59 |
+
- aiohttp==3.9.5
|
60 |
+
- aiosignal==1.3.1
|
61 |
+
- annotated-types==0.6.0
|
62 |
+
- anyio==4.3.0
|
63 |
+
- api==0.0.7
|
64 |
+
- apscheduler==3.11.0
|
65 |
+
- arxiv-latex-cleaner==1.0.8
|
66 |
+
- async-timeout==4.0.3
|
67 |
+
- attrs==23.2.0
|
68 |
+
- beautifulsoup4==4.12.3
|
69 |
+
- better-abc==0.0.3
|
70 |
+
- blis==1.3.0
|
71 |
+
- bs4==0.0.2
|
72 |
+
- catalogue==2.0.10
|
73 |
+
- certifi==2023.11.17
|
74 |
+
- chardet==3.0.4
|
75 |
+
- charset-normalizer==3.3.2
|
76 |
+
- click==8.1.7
|
77 |
+
- cloudpathlib==0.21.0
|
78 |
+
- confection==0.1.5
|
79 |
+
- contourpy==1.2.1
|
80 |
+
- cycler==0.12.1
|
81 |
+
- cymem==2.0.11
|
82 |
+
- datasets==2.20.0
|
83 |
+
- deprecated==1.2.14
|
84 |
+
- dill==0.3.8
|
85 |
+
- distro==1.9.0
|
86 |
+
- docker-pycreds==0.4.0
|
87 |
+
- einops==0.8.0
|
88 |
+
- en-core-web-sm==3.8.0
|
89 |
+
- fastapi==0.115.5
|
90 |
+
- ffmpy==0.4.0
|
91 |
+
- filelock==3.13.1
|
92 |
+
- fonttools==4.51.0
|
93 |
+
- frozenlist==1.4.1
|
94 |
+
- fsspec==2023.12.2
|
95 |
+
- future==1.0.0
|
96 |
+
- gitdb==4.0.12
|
97 |
+
- gitpython==3.1.44
|
98 |
+
- gradio==5.6.0
|
99 |
+
- gradio-client==1.4.3
|
100 |
+
- gradio-leaderboard==0.0.13
|
101 |
+
- h11==0.14.0
|
102 |
+
- httpcore==1.0.5
|
103 |
+
- httpx==0.27.0
|
104 |
+
- huggingface-hub==0.26.2
|
105 |
+
- idna==2.8
|
106 |
+
- inflect==6.2.0
|
107 |
+
- jaxtyping==0.2.36
|
108 |
+
- jinja2==3.0.2
|
109 |
+
- joblib==1.3.2
|
110 |
+
- jsonlines==4.0.0
|
111 |
+
- kiwisolver==1.4.5
|
112 |
+
- langcodes==3.5.0
|
113 |
+
- language-data==1.3.0
|
114 |
+
- languages==1.0.0
|
115 |
+
- lxml==5.2.1
|
116 |
+
- mailjet==1.4.1
|
117 |
+
- mailjet-rest==1.3.4
|
118 |
+
- marisa-trie==1.2.1
|
119 |
+
- markdown==3.7
|
120 |
+
- markdown-it-py==3.0.0
|
121 |
+
- markupsafe==2.1.5
|
122 |
+
- matplotlib==3.9.0
|
123 |
+
- mdurl==0.1.2
|
124 |
+
- minicons==0.2.18
|
125 |
+
- multidict==6.0.5
|
126 |
+
- multiprocess==0.70.16
|
127 |
+
- murmurhash==1.0.12
|
128 |
+
- nlopt==2.9.0
|
129 |
+
- nltk==3.8.1
|
130 |
+
- nose==1.3.7
|
131 |
+
- numpy==1.26.4
|
132 |
+
- openai==1.30.1
|
133 |
+
- openreview-py==1.44.3
|
134 |
+
- orjson==3.10.11
|
135 |
+
- packaging==23.2
|
136 |
+
- pandas==1.5.3
|
137 |
+
- pathlib==1.0.1
|
138 |
+
- pillow==10.3.0
|
139 |
+
- preshed==3.0.9
|
140 |
+
- protobuf==5.28.3
|
141 |
+
- pyarrow==16.1.0
|
142 |
+
- pyarrow-hotfix==0.6
|
143 |
+
- pycryptodome==3.21.0
|
144 |
+
- pydantic==2.10.5
|
145 |
+
- pydantic-core==2.27.2
|
146 |
+
- pydub==0.25.1
|
147 |
+
- pyjwt==2.9.0
|
148 |
+
- pylatexenc==2.10
|
149 |
+
- pyparsing==3.1.2
|
150 |
+
- pypdf2==1.26.0
|
151 |
+
- python-dateutil==2.8.2
|
152 |
+
- python-multipart==0.0.12
|
153 |
+
- pytz==2023.3.post1
|
154 |
+
- pyyaml==5.3.1
|
155 |
+
- regex==2023.12.25
|
156 |
+
- requests==2.32.3
|
157 |
+
- rich==13.9.4
|
158 |
+
- roman==3.3
|
159 |
+
- ruff==0.8.0
|
160 |
+
- safehttpx==0.1.1
|
161 |
+
- safetensors==0.4.1
|
162 |
+
- scikit-learn==1.5.2
|
163 |
+
- scipy==1.14.1
|
164 |
+
- seaborn==0.13.2
|
165 |
+
- semantic-memory==0.1.9
|
166 |
+
- semantic-version==2.10.0
|
167 |
+
- sentencepiece==0.2.0
|
168 |
+
- sentry-sdk==2.20.0
|
169 |
+
- setproctitle==1.3.4
|
170 |
+
- setuptools==65.5.1
|
171 |
+
- shellingham==1.5.4
|
172 |
+
- smart-open==7.1.0
|
173 |
+
- smmap==5.0.2
|
174 |
+
- sniffio==1.3.1
|
175 |
+
- soupsieve==2.5
|
176 |
+
- spacy==3.8.5
|
177 |
+
- spacy-legacy==3.0.12
|
178 |
+
- spacy-loggers==1.0.5
|
179 |
+
- srsly==2.5.1
|
180 |
+
- starlette==0.41.3
|
181 |
+
- termcolor==2.4.0
|
182 |
+
- thinc==8.3.6
|
183 |
+
- threadpoolctl==3.5.0
|
184 |
+
- tld==0.13
|
185 |
+
- tokenizers==0.20.3
|
186 |
+
- tomlkit==0.12.0
|
187 |
+
- torch==1.13.1
|
188 |
+
- tqdm==4.66.4
|
189 |
+
- transformer-lens==2.11.0
|
190 |
+
- transformers==4.46.3
|
191 |
+
- typeguard==4.4.1
|
192 |
+
- typer==0.13.1
|
193 |
+
- typing-extensions==4.12.2
|
194 |
+
- tzdata==2025.2
|
195 |
+
- tzlocal==5.2
|
196 |
+
- urllib3==2.3.0
|
197 |
+
- uvicorn==0.32.1
|
198 |
+
- wandb==0.19.4
|
199 |
+
- wasabi==1.1.3
|
200 |
+
- weasel==0.4.1
|
201 |
+
- websockets==12.0
|
202 |
+
- wrapt==1.16.0
|
203 |
+
- xxhash==3.4.1
|
204 |
+
- yarl==1.9.4
|
205 |
+
prefix: /Users/aaronmueller/miniconda3/envs/concepts
|
src/display/css_html_js.py
CHANGED
@@ -94,6 +94,11 @@ custom_css = """
|
|
94 |
#box-filter > .form{
|
95 |
border: 0
|
96 |
}
|
|
|
|
|
|
|
|
|
|
|
97 |
"""
|
98 |
|
99 |
get_window_url_params = """
|
@@ -102,4 +107,4 @@ get_window_url_params = """
|
|
102 |
url_params = Object.fromEntries(params);
|
103 |
return url_params;
|
104 |
}
|
105 |
-
"""
|
|
|
94 |
#box-filter > .form{
|
95 |
border: 0
|
96 |
}
|
97 |
+
#bordered-column {
|
98 |
+
border: 1px solid;
|
99 |
+
border-radius: 8px;
|
100 |
+
padding: 16px;
|
101 |
+
}
|
102 |
"""
|
103 |
|
104 |
get_window_url_params = """
|
|
|
107 |
url_params = Object.fromEntries(params);
|
108 |
return url_params;
|
109 |
}
|
110 |
+
"""
|
src/leaderboard/read_evals.py
CHANGED
@@ -22,7 +22,7 @@ def compute_area(edge_counts, faithfulnesses):
|
|
22 |
# Return None if either list is empty
|
23 |
if not edge_counts or not faithfulnesses:
|
24 |
return None, None, None
|
25 |
-
|
26 |
percentages = [e / max(edge_counts) for e in edge_counts]
|
27 |
area_under = 0.
|
28 |
area_from_100 = 0.
|
@@ -327,8 +327,7 @@ class EvalResult_MIB_CAUSALGRAPH:
|
|
327 |
results = {}
|
328 |
for task in ["IOI", "MCQA", "arithmetic", "ARC-easy"]:
|
329 |
results[task] = {}
|
330 |
-
|
331 |
-
print(f"Processing file: {json_filepath}")
|
332 |
# Process each model's results
|
333 |
for result in data.get("results", []):
|
334 |
model_id = result.get("model_id", "")
|
@@ -350,7 +349,6 @@ class EvalResult_MIB_CAUSALGRAPH:
|
|
350 |
|
351 |
intervention_key = '_'.join(intervention_data['intervention'])
|
352 |
intervention_scores[intervention_key].append(avg_cf_score)
|
353 |
-
print(f"intervention_key is {intervention_key}, avg_cf_score is {avg_cf_score}")
|
354 |
|
355 |
# Average across layers for each intervention
|
356 |
results[task][model_name] = {
|
|
|
22 |
# Return None if either list is empty
|
23 |
if not edge_counts or not faithfulnesses:
|
24 |
return None, None, None
|
25 |
+
|
26 |
percentages = [e / max(edge_counts) for e in edge_counts]
|
27 |
area_under = 0.
|
28 |
area_from_100 = 0.
|
|
|
327 |
results = {}
|
328 |
for task in ["IOI", "MCQA", "arithmetic", "ARC-easy"]:
|
329 |
results[task] = {}
|
330 |
+
|
|
|
331 |
# Process each model's results
|
332 |
for result in data.get("results", []):
|
333 |
model_id = result.get("model_id", "")
|
|
|
349 |
|
350 |
intervention_key = '_'.join(intervention_data['intervention'])
|
351 |
intervention_scores[intervention_key].append(avg_cf_score)
|
|
|
352 |
|
353 |
# Average across layers for each intervention
|
354 |
results[task][model_name] = {
|