Aaron Mueller commited on
Commit
33ddef9
·
1 Parent(s): a5eab2c

format update to submission page

Browse files
Files changed (4) hide show
  1. app.py +24 -23
  2. env.yml +205 -0
  3. src/display/css_html_js.py +6 -1
  4. src/leaderboard/read_evals.py +2 -4
app.py CHANGED
@@ -702,30 +702,31 @@ with demo:
702
  elem_id="track_selector"
703
  )
704
 
705
- with gr.Group(visible=False) as circuit_ui:
706
- with gr.Column():
707
- with gr.Row():
708
- gr.Markdown(EVALUATION_QUEUE_TEXT_SUBGRAPH, elem_classes="markdown-text")
709
 
710
- with gr.Row():
711
- hf_repo_circ = gr.Textbox(
712
- label="HuggingFace Repository URL",
713
- placeholder="https://huggingface.co/username/repo/path",
714
- info="Must be a valid HuggingFace URL pointing to folders containing either 1 importance score file per task/model, or " \
715
- "9 circuit files per task/model (.json or .pt)."
716
- )
717
- level = gr.Radio(
718
- choices=[
719
- "Edge",
720
- "Node (submodule)",
721
- "Node (neuron)"
722
- ],
723
- label="Level of granularity",
724
- info="Is your circuit defined by its inclusion/exclusion of certain edges (e.g., MLP1 to H10L12), of certain submodules (e.g., MLP1), or of neurons " \
725
- "within those submodules (e.g., MLP1 neuron 295)?"
726
- )
727
 
728
- with gr.Group(visible=False) as causal_ui:
 
729
  gr.Markdown(EVALUATION_QUEUE_TEXT_CAUSALVARIABLE, elem_classes="markdown-text")
730
  with gr.Row():
731
  layer = gr.Number(
@@ -768,7 +769,7 @@ with demo:
768
  track.change(toggle_ui, track, [circuit_ui, causal_ui])
769
 
770
  # Submission handling
771
- status = gr.Textbox(label="Submission Status", visible=True)
772
 
773
  def handle_submission(track, hf_repo_circ, hf_repo_cg, level, layer, token_position, code_upload, method_name, contact_email):
774
  errors = []
 
702
  elem_id="track_selector"
703
  )
704
 
705
+ # with gr.Group(visible=False) as circuit_ui:
706
+ with gr.Column(visible=False, elem_id="bordered-column") as circuit_ui:
707
+ with gr.Row():
708
+ gr.Markdown(EVALUATION_QUEUE_TEXT_SUBGRAPH, elem_classes="markdown-text")
709
 
710
+ with gr.Row():
711
+ hf_repo_circ = gr.Textbox(
712
+ label="HuggingFace Repository URL",
713
+ placeholder="https://huggingface.co/username/repo/path",
714
+ info="Must be a valid HuggingFace URL pointing to folders containing either 1 importance score file per task/model, or " \
715
+ "9 circuit files per task/model (.json or .pt)."
716
+ )
717
+ level = gr.Radio(
718
+ choices=[
719
+ "Edge",
720
+ "Node (submodule)",
721
+ "Node (neuron)"
722
+ ],
723
+ label="Level of granularity",
724
+ info="Is your circuit defined by its inclusion/exclusion of certain edges (e.g., MLP1 to H10L12), of certain submodules (e.g., MLP1), or of neurons " \
725
+ "within those submodules (e.g., MLP1 neuron 295)?"
726
+ )
727
 
728
+ # with gr.Group(visible=False) as causal_ui:
729
+ with gr.Column(visible=False, elem_id="bordered-column") as causal_ui:
730
  gr.Markdown(EVALUATION_QUEUE_TEXT_CAUSALVARIABLE, elem_classes="markdown-text")
731
  with gr.Row():
732
  layer = gr.Number(
 
769
  track.change(toggle_ui, track, [circuit_ui, causal_ui])
770
 
771
  # Submission handling
772
+ status = gr.Textbox(label="Submission Status", visible=False)
773
 
774
  def handle_submission(track, hf_repo_circ, hf_repo_cg, level, layer, token_position, code_upload, method_name, contact_email):
775
  errors = []
env.yml ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: concepts
2
+ channels:
3
+ - conda-forge
4
+ - defaults
5
+ dependencies:
6
+ - appnope=0.1.4=pyhd8ed1ab_0
7
+ - asttokens=2.4.1=pyhd8ed1ab_0
8
+ - bzip2=1.0.8=h620ffc9_4
9
+ - ca-certificates=2024.2.2=hf0a4a13_0
10
+ - comm=0.2.2=pyhd8ed1ab_0
11
+ - debugpy=1.6.7=py310h313beb8_0
12
+ - decorator=5.1.1=pyhd8ed1ab_0
13
+ - exceptiongroup=1.2.0=pyhd8ed1ab_2
14
+ - executing=2.0.1=pyhd8ed1ab_0
15
+ - importlib-metadata=7.1.0=pyha770c72_0
16
+ - importlib_metadata=7.1.0=hd8ed1ab_0
17
+ - ipykernel=6.29.3=pyh3cd1d5f_0
18
+ - ipython=8.24.0=pyh707e725_0
19
+ - jedi=0.19.1=pyhd8ed1ab_0
20
+ - jupyter_client=8.6.1=pyhd8ed1ab_0
21
+ - jupyter_core=5.5.0=py310hca03da5_0
22
+ - krb5=1.21.2=h92f50d5_0
23
+ - libcxx=17.0.6=h5f092b4_0
24
+ - libedit=3.1.20191231=hc8eb9b7_2
25
+ - libffi=3.4.4=hca03da5_0
26
+ - libsodium=1.0.18=h27ca646_1
27
+ - matplotlib-inline=0.1.7=pyhd8ed1ab_0
28
+ - ncurses=6.4=h313beb8_0
29
+ - nest-asyncio=1.6.0=pyhd8ed1ab_0
30
+ - openssl=3.3.0=h0d3ecfb_0
31
+ - parso=0.8.4=pyhd8ed1ab_0
32
+ - pexpect=4.9.0=pyhd8ed1ab_0
33
+ - pickleshare=0.7.5=py_1003
34
+ - pip=23.3.1=py310hca03da5_0
35
+ - platformdirs=4.2.2=pyhd8ed1ab_0
36
+ - prompt-toolkit=3.0.42=pyha770c72_0
37
+ - psutil=5.9.0=py310h1a28f6b_0
38
+ - ptyprocess=0.7.0=pyhd3deb0d_0
39
+ - pure_eval=0.2.2=pyhd8ed1ab_0
40
+ - pygments=2.18.0=pyhd8ed1ab_0
41
+ - python=3.10.13=hb885b13_0
42
+ - pyzmq=25.1.2=py310h313beb8_0
43
+ - readline=8.2=h1a28f6b_0
44
+ - six=1.16.0=pyh6c4a22f_0
45
+ - sqlite=3.41.2=h80987f9_0
46
+ - stack_data=0.6.2=pyhd8ed1ab_0
47
+ - tk=8.6.12=hb8d0fd4_0
48
+ - tornado=6.3.3=py310h80987f9_0
49
+ - traitlets=5.14.3=pyhd8ed1ab_0
50
+ - wcwidth=0.2.13=pyhd8ed1ab_0
51
+ - wheel=0.41.2=py310hca03da5_0
52
+ - xz=5.4.5=h80987f9_0
53
+ - zeromq=4.3.5=hcc0f68c_4
54
+ - zipp=3.17.0=pyhd8ed1ab_0
55
+ - zlib=1.2.13=h5a0b063_0
56
+ - pip:
57
+ - absl-py==2.1.0
58
+ - aiofiles==23.2.1
59
+ - aiohttp==3.9.5
60
+ - aiosignal==1.3.1
61
+ - annotated-types==0.6.0
62
+ - anyio==4.3.0
63
+ - api==0.0.7
64
+ - apscheduler==3.11.0
65
+ - arxiv-latex-cleaner==1.0.8
66
+ - async-timeout==4.0.3
67
+ - attrs==23.2.0
68
+ - beautifulsoup4==4.12.3
69
+ - better-abc==0.0.3
70
+ - blis==1.3.0
71
+ - bs4==0.0.2
72
+ - catalogue==2.0.10
73
+ - certifi==2023.11.17
74
+ - chardet==3.0.4
75
+ - charset-normalizer==3.3.2
76
+ - click==8.1.7
77
+ - cloudpathlib==0.21.0
78
+ - confection==0.1.5
79
+ - contourpy==1.2.1
80
+ - cycler==0.12.1
81
+ - cymem==2.0.11
82
+ - datasets==2.20.0
83
+ - deprecated==1.2.14
84
+ - dill==0.3.8
85
+ - distro==1.9.0
86
+ - docker-pycreds==0.4.0
87
+ - einops==0.8.0
88
+ - en-core-web-sm==3.8.0
89
+ - fastapi==0.115.5
90
+ - ffmpy==0.4.0
91
+ - filelock==3.13.1
92
+ - fonttools==4.51.0
93
+ - frozenlist==1.4.1
94
+ - fsspec==2023.12.2
95
+ - future==1.0.0
96
+ - gitdb==4.0.12
97
+ - gitpython==3.1.44
98
+ - gradio==5.6.0
99
+ - gradio-client==1.4.3
100
+ - gradio-leaderboard==0.0.13
101
+ - h11==0.14.0
102
+ - httpcore==1.0.5
103
+ - httpx==0.27.0
104
+ - huggingface-hub==0.26.2
105
+ - idna==2.8
106
+ - inflect==6.2.0
107
+ - jaxtyping==0.2.36
108
+ - jinja2==3.0.2
109
+ - joblib==1.3.2
110
+ - jsonlines==4.0.0
111
+ - kiwisolver==1.4.5
112
+ - langcodes==3.5.0
113
+ - language-data==1.3.0
114
+ - languages==1.0.0
115
+ - lxml==5.2.1
116
+ - mailjet==1.4.1
117
+ - mailjet-rest==1.3.4
118
+ - marisa-trie==1.2.1
119
+ - markdown==3.7
120
+ - markdown-it-py==3.0.0
121
+ - markupsafe==2.1.5
122
+ - matplotlib==3.9.0
123
+ - mdurl==0.1.2
124
+ - minicons==0.2.18
125
+ - multidict==6.0.5
126
+ - multiprocess==0.70.16
127
+ - murmurhash==1.0.12
128
+ - nlopt==2.9.0
129
+ - nltk==3.8.1
130
+ - nose==1.3.7
131
+ - numpy==1.26.4
132
+ - openai==1.30.1
133
+ - openreview-py==1.44.3
134
+ - orjson==3.10.11
135
+ - packaging==23.2
136
+ - pandas==1.5.3
137
+ - pathlib==1.0.1
138
+ - pillow==10.3.0
139
+ - preshed==3.0.9
140
+ - protobuf==5.28.3
141
+ - pyarrow==16.1.0
142
+ - pyarrow-hotfix==0.6
143
+ - pycryptodome==3.21.0
144
+ - pydantic==2.10.5
145
+ - pydantic-core==2.27.2
146
+ - pydub==0.25.1
147
+ - pyjwt==2.9.0
148
+ - pylatexenc==2.10
149
+ - pyparsing==3.1.2
150
+ - pypdf2==1.26.0
151
+ - python-dateutil==2.8.2
152
+ - python-multipart==0.0.12
153
+ - pytz==2023.3.post1
154
+ - pyyaml==5.3.1
155
+ - regex==2023.12.25
156
+ - requests==2.32.3
157
+ - rich==13.9.4
158
+ - roman==3.3
159
+ - ruff==0.8.0
160
+ - safehttpx==0.1.1
161
+ - safetensors==0.4.1
162
+ - scikit-learn==1.5.2
163
+ - scipy==1.14.1
164
+ - seaborn==0.13.2
165
+ - semantic-memory==0.1.9
166
+ - semantic-version==2.10.0
167
+ - sentencepiece==0.2.0
168
+ - sentry-sdk==2.20.0
169
+ - setproctitle==1.3.4
170
+ - setuptools==65.5.1
171
+ - shellingham==1.5.4
172
+ - smart-open==7.1.0
173
+ - smmap==5.0.2
174
+ - sniffio==1.3.1
175
+ - soupsieve==2.5
176
+ - spacy==3.8.5
177
+ - spacy-legacy==3.0.12
178
+ - spacy-loggers==1.0.5
179
+ - srsly==2.5.1
180
+ - starlette==0.41.3
181
+ - termcolor==2.4.0
182
+ - thinc==8.3.6
183
+ - threadpoolctl==3.5.0
184
+ - tld==0.13
185
+ - tokenizers==0.20.3
186
+ - tomlkit==0.12.0
187
+ - torch==1.13.1
188
+ - tqdm==4.66.4
189
+ - transformer-lens==2.11.0
190
+ - transformers==4.46.3
191
+ - typeguard==4.4.1
192
+ - typer==0.13.1
193
+ - typing-extensions==4.12.2
194
+ - tzdata==2025.2
195
+ - tzlocal==5.2
196
+ - urllib3==2.3.0
197
+ - uvicorn==0.32.1
198
+ - wandb==0.19.4
199
+ - wasabi==1.1.3
200
+ - weasel==0.4.1
201
+ - websockets==12.0
202
+ - wrapt==1.16.0
203
+ - xxhash==3.4.1
204
+ - yarl==1.9.4
205
+ prefix: /Users/aaronmueller/miniconda3/envs/concepts
src/display/css_html_js.py CHANGED
@@ -94,6 +94,11 @@ custom_css = """
94
  #box-filter > .form{
95
  border: 0
96
  }
 
 
 
 
 
97
  """
98
 
99
  get_window_url_params = """
@@ -102,4 +107,4 @@ get_window_url_params = """
102
  url_params = Object.fromEntries(params);
103
  return url_params;
104
  }
105
- """
 
94
  #box-filter > .form{
95
  border: 0
96
  }
97
+ #bordered-column {
98
+ border: 1px solid;
99
+ border-radius: 8px;
100
+ padding: 16px;
101
+ }
102
  """
103
 
104
  get_window_url_params = """
 
107
  url_params = Object.fromEntries(params);
108
  return url_params;
109
  }
110
+ """
src/leaderboard/read_evals.py CHANGED
@@ -22,7 +22,7 @@ def compute_area(edge_counts, faithfulnesses):
22
  # Return None if either list is empty
23
  if not edge_counts or not faithfulnesses:
24
  return None, None, None
25
-
26
  percentages = [e / max(edge_counts) for e in edge_counts]
27
  area_under = 0.
28
  area_from_100 = 0.
@@ -327,8 +327,7 @@ class EvalResult_MIB_CAUSALGRAPH:
327
  results = {}
328
  for task in ["IOI", "MCQA", "arithmetic", "ARC-easy"]:
329
  results[task] = {}
330
-
331
- print(f"Processing file: {json_filepath}")
332
  # Process each model's results
333
  for result in data.get("results", []):
334
  model_id = result.get("model_id", "")
@@ -350,7 +349,6 @@ class EvalResult_MIB_CAUSALGRAPH:
350
 
351
  intervention_key = '_'.join(intervention_data['intervention'])
352
  intervention_scores[intervention_key].append(avg_cf_score)
353
- print(f"intervention_key is {intervention_key}, avg_cf_score is {avg_cf_score}")
354
 
355
  # Average across layers for each intervention
356
  results[task][model_name] = {
 
22
  # Return None if either list is empty
23
  if not edge_counts or not faithfulnesses:
24
  return None, None, None
25
+
26
  percentages = [e / max(edge_counts) for e in edge_counts]
27
  area_under = 0.
28
  area_from_100 = 0.
 
327
  results = {}
328
  for task in ["IOI", "MCQA", "arithmetic", "ARC-easy"]:
329
  results[task] = {}
330
+
 
331
  # Process each model's results
332
  for result in data.get("results", []):
333
  model_id = result.get("model_id", "")
 
349
 
350
  intervention_key = '_'.join(intervention_data['intervention'])
351
  intervention_scores[intervention_key].append(avg_cf_score)
 
352
 
353
  # Average across layers for each intervention
354
  results[task][model_name] = {