ggcristian commited on
Commit
c797bf2
·
1 Parent(s): 3f46491

Add 'Reasoning' tag for model names

Browse files
app.py CHANGED
@@ -291,14 +291,14 @@ with gr.Blocks(
291
  show_row_numbers=True,
292
  wrap=True,
293
  datatype=[
294
- "markdown",
295
  "html",
296
  ],
297
  interactive=False,
298
  column_widths=[
299
  "7%",
300
- "24%",
301
- "17%",
302
  "10%",
303
  "13%",
304
  "10%",
 
291
  show_row_numbers=True,
292
  wrap=True,
293
  datatype=[
294
+ "html",
295
  "html",
296
  ],
297
  interactive=False,
298
  column_widths=[
299
  "7%",
300
+ "28%",
301
+ "13%",
302
  "10%",
303
  "13%",
304
  "10%",
results/parse.py CHANGED
@@ -13,177 +13,224 @@ model_details = {
13
  685,
14
  "General",
15
  "V2",
 
16
  ),
17
  "DeepSeek R1": (
18
  "https://huggingface.co/deepseek-ai/DeepSeek-R1",
19
  685,
20
  "General",
21
  "V1",
 
22
  ),
23
  "Llama 3.1 405B": (
24
  "https://huggingface.co/RedHatAI/Meta-Llama-3.1-405B-FP8",
25
  406,
26
  "General",
27
  "V1",
 
28
  ),
29
  "Qwen3 236B A22B": (
30
  "https://huggingface.co/Qwen/Qwen3-235B-A22B",
31
  235,
32
  "General",
33
  "V2",
 
34
  ),
35
  "Llama 3.(1-3) 70B": (
36
  "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
37
  70.6,
38
  "General",
39
  "V1",
 
40
  ),
41
  "Qwen2.5 72B": (
42
  "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
43
  72.7,
44
  "General",
45
  "V1",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  ),
47
- "QwQ 32B": ("https://huggingface.co/Qwen/QwQ-32B", 32.8, "General", "V2"),
48
- "Qwen2.5 32B": ("https://huggingface.co/Qwen/Qwen2.5-32B", 32.5, "General", "V1"),
49
  "StarChat2 15B v0.1": (
50
  "https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1",
51
  16,
52
  "General",
53
  "V1",
 
54
  ),
55
  "DeepSeek R1 Distill Qwen 14B": (
56
  "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
57
  14.8,
58
  "General",
59
  "V1",
 
60
  ),
61
  "CodeLlama 70B": (
62
  "https://huggingface.co/codellama/CodeLlama-70b-hf",
63
  69,
64
  "Coding",
65
  "V1",
 
66
  ),
67
  "QwenCoder 2.5 32B": (
68
  "https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct",
69
  32.5,
70
  "Coding",
71
  "V1",
 
72
  ),
73
  "DeepSeek Coder 33B": (
74
  "https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct",
75
  33.3,
76
  "Coding",
77
  "V1",
 
78
  ),
79
  "QwenCoder 2.5 14B": (
80
  "https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct",
81
  14.7,
82
  "Coding",
83
  "V1",
 
84
  ),
85
  "DeepCoder 14B": (
86
  "https://huggingface.co/agentica-org/DeepCoder-14B-Preview",
87
  14.8,
88
  "Coding",
89
  "V2",
 
90
  ),
91
  "OpenCoder 8B": (
92
  "https://huggingface.co/infly/OpenCoder-8B-Instruct",
93
  7.77,
94
  "Coding",
95
  "V1",
 
96
  ),
97
  "SeedCoder 8B": (
98
  "https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Instruct",
99
  8.25,
100
  "Coding",
101
  "V2",
 
102
  ),
103
  "SeedCoder 8B Reasoning": (
104
  "https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Reasoning-bf16",
105
  8.25,
106
  "Coding",
107
  "V2",
 
108
  ),
109
  "QwenCoder 2.5 7B": (
110
  "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct",
111
  7.61,
112
  "Coding",
113
  "V1",
 
114
  ),
115
  "DeepSeek Coder 6.7B": (
116
  "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct",
117
  6.74,
118
  "Coding",
119
  "V1",
 
120
  ),
121
  "HaVen-CodeQwen": (
122
  "https://huggingface.co/yangyiyao/HaVen-CodeQwen",
123
  7.25,
124
  "RTL-Specific",
125
  "V1",
 
126
  ),
127
  "CodeV R1 Distill Qwen 7B": (
128
  "https://huggingface.co/zhuyaoyu/CodeV-R1-Distill-Qwen-7B",
129
  7.62,
130
  "RTL-Specific",
131
  "V2",
 
132
  ),
133
  "CodeV-CL-7B": (
134
  "https://huggingface.co/yang-z/CodeV-CL-7B",
135
  6.74,
136
  "RTL-Specific",
137
  "V1",
 
138
  ),
139
  "CodeV-QW-7B": (
140
  "https://huggingface.co/yang-z/CodeV-QW-7B",
141
  7.25,
142
  "RTL-Specific",
143
  "V1",
 
144
  ),
145
  "CodeV-DS-6.7B": (
146
  "https://huggingface.co/yang-z/CodeV-DS-6.7B",
147
  6.74,
148
  "RTL-Specific",
149
  "V1",
 
150
  ),
151
  "RTLCoder Mistral": (
152
  "https://huggingface.co/ishorn5/RTLCoder-v1.1",
153
  7.24,
154
  "RTL-Specific",
155
  "V1",
 
156
  ),
157
  "RTLCoder DeepSeek": (
158
  "https://huggingface.co/ishorn5/RTLCoder-Deepseek-v1.1",
159
  6.74,
160
  "RTL-Specific",
161
  "V1",
 
 
 
 
 
 
 
 
162
  ),
163
- "OriGen": ("https://huggingface.co/henryen/OriGen", 6.74, "RTL-Specific", "V1"),
164
  "Qwen3 Coder 480B A35B": (
165
  "https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct",
166
  480,
167
  "Coding",
168
  "V2",
 
169
  ),
170
  "Magistral Small 2506": (
171
  "https://huggingface.co/mistralai/Magistral-Small-2506",
172
  23.6,
173
  "General",
174
  "V2",
 
175
  ),
176
  "gpt-oss-20b": (
177
  "https://huggingface.co/openai/gpt-oss-20b",
178
  21.5,
179
  "General",
180
  "V2",
 
181
  ),
182
  "gpt-oss-120b": (
183
  "https://huggingface.co/openai/gpt-oss-120b",
184
  120,
185
  "General",
186
  "V2",
 
187
  ),
188
  }
189
 
@@ -201,14 +248,15 @@ def get_headers(reader, agg=False) -> Union[list, list]:
201
  return metrics, benchs
202
 
203
 
204
- def get_model_params_and_url(model) -> Union[str, str, float, str]:
205
  if model not in model_details:
206
- return "-", 0.0, "Unknown", "Unknown"
207
  url = model_details[model][0]
208
  params = model_details[model][1]
209
  type = model_details[model][2]
210
  release = model_details[model][3]
211
- return url, params, type, release
 
212
 
213
 
214
  def parse_results(csv_path: str) -> list[dict]:
@@ -227,7 +275,7 @@ def parse_results(csv_path: str) -> list[dict]:
227
  model = row[0]
228
  if not model:
229
  continue
230
- url, params, type, release = get_model_params_and_url(model)
231
  models.append(model)
232
  row = row[1:]
233
  ctr = 0
@@ -243,6 +291,7 @@ def parse_results(csv_path: str) -> list[dict]:
243
  record["Model URL"] = url
244
  record["Params"] = params
245
  record["Release"] = release
 
246
  dataset.append(record)
247
  ctr += 1
248
  print(models)
 
13
  685,
14
  "General",
15
  "V2",
16
+ "Reasoning", # "Dense" or "Reasoning"
17
  ),
18
  "DeepSeek R1": (
19
  "https://huggingface.co/deepseek-ai/DeepSeek-R1",
20
  685,
21
  "General",
22
  "V1",
23
+ "Reasoning",
24
  ),
25
  "Llama 3.1 405B": (
26
  "https://huggingface.co/RedHatAI/Meta-Llama-3.1-405B-FP8",
27
  406,
28
  "General",
29
  "V1",
30
+ "Dense",
31
  ),
32
  "Qwen3 236B A22B": (
33
  "https://huggingface.co/Qwen/Qwen3-235B-A22B",
34
  235,
35
  "General",
36
  "V2",
37
+ "Reasoning",
38
  ),
39
  "Llama 3.(1-3) 70B": (
40
  "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
41
  70.6,
42
  "General",
43
  "V1",
44
+ "Dense",
45
  ),
46
  "Qwen2.5 72B": (
47
  "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
48
  72.7,
49
  "General",
50
  "V1",
51
+ "Dense",
52
+ ),
53
+ "QwQ 32B": (
54
+ "https://huggingface.co/Qwen/QwQ-32B",
55
+ 32.8,
56
+ "General",
57
+ "V2",
58
+ "Reasoning",
59
+ ),
60
+ "Qwen2.5 32B": (
61
+ "https://huggingface.co/Qwen/Qwen2.5-32B",
62
+ 32.5,
63
+ "General",
64
+ "V1",
65
+ "Dense",
66
  ),
 
 
67
  "StarChat2 15B v0.1": (
68
  "https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1",
69
  16,
70
  "General",
71
  "V1",
72
+ "Dense",
73
  ),
74
  "DeepSeek R1 Distill Qwen 14B": (
75
  "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
76
  14.8,
77
  "General",
78
  "V1",
79
+ "Reasoning",
80
  ),
81
  "CodeLlama 70B": (
82
  "https://huggingface.co/codellama/CodeLlama-70b-hf",
83
  69,
84
  "Coding",
85
  "V1",
86
+ "Dense",
87
  ),
88
  "QwenCoder 2.5 32B": (
89
  "https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct",
90
  32.5,
91
  "Coding",
92
  "V1",
93
+ "Dense",
94
  ),
95
  "DeepSeek Coder 33B": (
96
  "https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct",
97
  33.3,
98
  "Coding",
99
  "V1",
100
+ "Dense",
101
  ),
102
  "QwenCoder 2.5 14B": (
103
  "https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct",
104
  14.7,
105
  "Coding",
106
  "V1",
107
+ "Dense",
108
  ),
109
  "DeepCoder 14B": (
110
  "https://huggingface.co/agentica-org/DeepCoder-14B-Preview",
111
  14.8,
112
  "Coding",
113
  "V2",
114
+ "Reasoning",
115
  ),
116
  "OpenCoder 8B": (
117
  "https://huggingface.co/infly/OpenCoder-8B-Instruct",
118
  7.77,
119
  "Coding",
120
  "V1",
121
+ "Dense",
122
  ),
123
  "SeedCoder 8B": (
124
  "https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Instruct",
125
  8.25,
126
  "Coding",
127
  "V2",
128
+ "Dense",
129
  ),
130
  "SeedCoder 8B Reasoning": (
131
  "https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Reasoning-bf16",
132
  8.25,
133
  "Coding",
134
  "V2",
135
+ "Reasoning",
136
  ),
137
  "QwenCoder 2.5 7B": (
138
  "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct",
139
  7.61,
140
  "Coding",
141
  "V1",
142
+ "Dense",
143
  ),
144
  "DeepSeek Coder 6.7B": (
145
  "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct",
146
  6.74,
147
  "Coding",
148
  "V1",
149
+ "Dense",
150
  ),
151
  "HaVen-CodeQwen": (
152
  "https://huggingface.co/yangyiyao/HaVen-CodeQwen",
153
  7.25,
154
  "RTL-Specific",
155
  "V1",
156
+ "Dense",
157
  ),
158
  "CodeV R1 Distill Qwen 7B": (
159
  "https://huggingface.co/zhuyaoyu/CodeV-R1-Distill-Qwen-7B",
160
  7.62,
161
  "RTL-Specific",
162
  "V2",
163
+ "Reasoning",
164
  ),
165
  "CodeV-CL-7B": (
166
  "https://huggingface.co/yang-z/CodeV-CL-7B",
167
  6.74,
168
  "RTL-Specific",
169
  "V1",
170
+ "Dense",
171
  ),
172
  "CodeV-QW-7B": (
173
  "https://huggingface.co/yang-z/CodeV-QW-7B",
174
  7.25,
175
  "RTL-Specific",
176
  "V1",
177
+ "Dense",
178
  ),
179
  "CodeV-DS-6.7B": (
180
  "https://huggingface.co/yang-z/CodeV-DS-6.7B",
181
  6.74,
182
  "RTL-Specific",
183
  "V1",
184
+ "Dense",
185
  ),
186
  "RTLCoder Mistral": (
187
  "https://huggingface.co/ishorn5/RTLCoder-v1.1",
188
  7.24,
189
  "RTL-Specific",
190
  "V1",
191
+ "Dense",
192
  ),
193
  "RTLCoder DeepSeek": (
194
  "https://huggingface.co/ishorn5/RTLCoder-Deepseek-v1.1",
195
  6.74,
196
  "RTL-Specific",
197
  "V1",
198
+ "Dense",
199
+ ),
200
+ "OriGen": (
201
+ "https://huggingface.co/henryen/OriGen",
202
+ 6.74,
203
+ "RTL-Specific",
204
+ "V1",
205
+ "Dense",
206
  ),
 
207
  "Qwen3 Coder 480B A35B": (
208
  "https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct",
209
  480,
210
  "Coding",
211
  "V2",
212
+ "Dense",
213
  ),
214
  "Magistral Small 2506": (
215
  "https://huggingface.co/mistralai/Magistral-Small-2506",
216
  23.6,
217
  "General",
218
  "V2",
219
+ "Reasoning",
220
  ),
221
  "gpt-oss-20b": (
222
  "https://huggingface.co/openai/gpt-oss-20b",
223
  21.5,
224
  "General",
225
  "V2",
226
+ "Reasoning",
227
  ),
228
  "gpt-oss-120b": (
229
  "https://huggingface.co/openai/gpt-oss-120b",
230
  120,
231
  "General",
232
  "V2",
233
+ "Reasoning",
234
  ),
235
  }
236
 
 
248
  return metrics, benchs
249
 
250
 
251
+ def get_model_params_and_url(model) -> Union[str, str, float, str, str]:
252
  if model not in model_details:
253
+ return "-", 0.0, "-", "-", "-"
254
  url = model_details[model][0]
255
  params = model_details[model][1]
256
  type = model_details[model][2]
257
  release = model_details[model][3]
258
+ reasoning = model_details[model][4]
259
+ return url, params, type, release, reasoning
260
 
261
 
262
  def parse_results(csv_path: str) -> list[dict]:
 
275
  model = row[0]
276
  if not model:
277
  continue
278
+ url, params, type, release, reasoning = get_model_params_and_url(model)
279
  models.append(model)
280
  row = row[1:]
281
  ctr = 0
 
291
  record["Model URL"] = url
292
  record["Params"] = params
293
  record["Release"] = release
294
+ record["Thinking"] = reasoning
295
  dataset.append(record)
296
  ctr += 1
297
  print(models)
results/results_icarus.json CHANGED
The diff for this file is too large to render. See raw diff
 
results/results_verilator.json CHANGED
The diff for this file is too large to render. See raw diff
 
utils.py CHANGED
@@ -15,11 +15,20 @@ type_emoji = {
15
  # fmt: on
16
 
17
 
18
- def model_hyperlink(link, model_name, release):
 
19
  if release == "V1":
20
- return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
 
 
 
 
 
21
  else:
22
- return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a> <span style="opacity: 0.7; font-variant: all-small-caps; font-weight: 600">new</span>'
 
 
 
23
 
24
 
25
  def handle_special_cases(benchmark, metric):
@@ -33,14 +42,18 @@ def handle_special_cases(benchmark, metric):
33
  def filter_RTLRepo(subset: pd.DataFrame) -> pd.DataFrame:
34
  subset = subset.drop(subset[subset.Score < 0.0].index)
35
  details = subset[
36
- ["Model", "Model URL", "Model Type", "Params", "Release"]
37
  ].drop_duplicates("Model")
38
  filtered_df = subset[["Model", "Score"]].rename(
39
  columns={"Score": "Exact Matching (EM)"}
40
  )
41
  filtered_df = pd.merge(filtered_df, details, on="Model", how="left")
42
  filtered_df["Model"] = filtered_df.apply(
43
- lambda row: model_hyperlink(row["Model URL"], row["Model"], row["Release"]),
 
 
 
 
44
  axis=1,
45
  )
46
  filtered_df["Type"] = filtered_df["Model Type"].map(lambda x: type_emoji.get(x, ""))
@@ -53,7 +66,7 @@ def filter_RTLRepo(subset: pd.DataFrame) -> pd.DataFrame:
53
 
54
  def filter_bench(subset: pd.DataFrame, df_agg=None, agg_column=None) -> pd.DataFrame:
55
  details = subset[
56
- ["Model", "Model URL", "Model Type", "Params", "Release"]
57
  ].drop_duplicates("Model")
58
  if "RTLLM" in subset["Benchmark"].unique():
59
  pivot_df = (
@@ -82,7 +95,9 @@ def filter_bench(subset: pd.DataFrame, df_agg=None, agg_column=None) -> pd.DataF
82
 
83
  pivot_df = pd.merge(pivot_df, details, on="Model", how="left")
84
  pivot_df["Model"] = pivot_df.apply(
85
- lambda row: model_hyperlink(row["Model URL"], row["Model"], row["Release"]),
 
 
86
  axis=1,
87
  )
88
  pivot_df["Type"] = pivot_df["Model Type"].map(lambda x: type_emoji.get(x, ""))
@@ -144,7 +159,7 @@ def filter_bench_all(
144
  subset: pd.DataFrame, df_agg=None, agg_column=None
145
  ) -> pd.DataFrame:
146
  details = subset[
147
- ["Model", "Model URL", "Model Type", "Params", "Release"]
148
  ].drop_duplicates("Model")
149
  if "RTLLM" in subset["Benchmark"].unique():
150
  pivot_df = (
@@ -164,8 +179,11 @@ def filter_bench_all(
164
  )
165
 
166
  pivot_df = pd.merge(pivot_df, details, on="Model", how="left")
 
167
  pivot_df["Model"] = pivot_df.apply(
168
- lambda row: model_hyperlink(row["Model URL"], row["Model"], row["Release"]),
 
 
169
  axis=1,
170
  )
171
  pivot_df["Type"] = pivot_df["Model Type"].map(lambda x: type_emoji.get(x, ""))
 
15
  # fmt: on
16
 
17
 
18
+ def model_hyperlink(link, model_name, release, thinking=False):
19
+ ret = f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
20
  if release == "V1":
21
+ return ret
22
+ elif thinking == False:
23
+ return (
24
+ ret
25
+ + f' <span style="opacity: 0.7; font-variant: all-small-caps; font-weight: 600">new</span>'
26
+ )
27
  else:
28
+ return (
29
+ ret
30
+ + f' <span style="opacity: 0.7; font-variant: all-small-caps; font-weight: 600">new</span> <span style="opacity: 0.9; font-variant: all-small-caps; font-weight: 600; color: #5C6BC0">(reasoning)</span>'
31
+ )
32
 
33
 
34
  def handle_special_cases(benchmark, metric):
 
42
  def filter_RTLRepo(subset: pd.DataFrame) -> pd.DataFrame:
43
  subset = subset.drop(subset[subset.Score < 0.0].index)
44
  details = subset[
45
+ ["Model", "Model URL", "Model Type", "Params", "Release", "Thinking"]
46
  ].drop_duplicates("Model")
47
  filtered_df = subset[["Model", "Score"]].rename(
48
  columns={"Score": "Exact Matching (EM)"}
49
  )
50
  filtered_df = pd.merge(filtered_df, details, on="Model", how="left")
51
  filtered_df["Model"] = filtered_df.apply(
52
+ lambda row: model_hyperlink(
53
+ row["Model URL"],
54
+ row["Model"],
55
+ row["Release"],
56
+ ),
57
  axis=1,
58
  )
59
  filtered_df["Type"] = filtered_df["Model Type"].map(lambda x: type_emoji.get(x, ""))
 
66
 
67
  def filter_bench(subset: pd.DataFrame, df_agg=None, agg_column=None) -> pd.DataFrame:
68
  details = subset[
69
+ ["Model", "Model URL", "Model Type", "Params", "Release", "Thinking"]
70
  ].drop_duplicates("Model")
71
  if "RTLLM" in subset["Benchmark"].unique():
72
  pivot_df = (
 
95
 
96
  pivot_df = pd.merge(pivot_df, details, on="Model", how="left")
97
  pivot_df["Model"] = pivot_df.apply(
98
+ lambda row: model_hyperlink(
99
+ row["Model URL"], row["Model"], row["Release"], row["Thinking"]
100
+ ),
101
  axis=1,
102
  )
103
  pivot_df["Type"] = pivot_df["Model Type"].map(lambda x: type_emoji.get(x, ""))
 
159
  subset: pd.DataFrame, df_agg=None, agg_column=None
160
  ) -> pd.DataFrame:
161
  details = subset[
162
+ ["Model", "Model URL", "Model Type", "Params", "Release", "Thinking"]
163
  ].drop_duplicates("Model")
164
  if "RTLLM" in subset["Benchmark"].unique():
165
  pivot_df = (
 
179
  )
180
 
181
  pivot_df = pd.merge(pivot_df, details, on="Model", how="left")
182
+ print(pivot_df.columns)
183
  pivot_df["Model"] = pivot_df.apply(
184
+ lambda row: model_hyperlink(
185
+ row["Model URL"], row["Model"], row["Release"], row["Thinking"]
186
+ ),
187
  axis=1,
188
  )
189
  pivot_df["Type"] = pivot_df["Model Type"].map(lambda x: type_emoji.get(x, ""))