Spaces:
Running
Running
Commit
·
c797bf2
1
Parent(s):
3f46491
Add 'Reasoning' tag for model names
Browse files- app.py +3 -3
- results/parse.py +56 -7
- results/results_icarus.json +0 -0
- results/results_verilator.json +0 -0
- utils.py +27 -9
app.py
CHANGED
@@ -291,14 +291,14 @@ with gr.Blocks(
|
|
291 |
show_row_numbers=True,
|
292 |
wrap=True,
|
293 |
datatype=[
|
294 |
-
"
|
295 |
"html",
|
296 |
],
|
297 |
interactive=False,
|
298 |
column_widths=[
|
299 |
"7%",
|
300 |
-
"
|
301 |
-
"
|
302 |
"10%",
|
303 |
"13%",
|
304 |
"10%",
|
|
|
291 |
show_row_numbers=True,
|
292 |
wrap=True,
|
293 |
datatype=[
|
294 |
+
"html",
|
295 |
"html",
|
296 |
],
|
297 |
interactive=False,
|
298 |
column_widths=[
|
299 |
"7%",
|
300 |
+
"28%",
|
301 |
+
"13%",
|
302 |
"10%",
|
303 |
"13%",
|
304 |
"10%",
|
results/parse.py
CHANGED
@@ -13,177 +13,224 @@ model_details = {
|
|
13 |
685,
|
14 |
"General",
|
15 |
"V2",
|
|
|
16 |
),
|
17 |
"DeepSeek R1": (
|
18 |
"https://huggingface.co/deepseek-ai/DeepSeek-R1",
|
19 |
685,
|
20 |
"General",
|
21 |
"V1",
|
|
|
22 |
),
|
23 |
"Llama 3.1 405B": (
|
24 |
"https://huggingface.co/RedHatAI/Meta-Llama-3.1-405B-FP8",
|
25 |
406,
|
26 |
"General",
|
27 |
"V1",
|
|
|
28 |
),
|
29 |
"Qwen3 236B A22B": (
|
30 |
"https://huggingface.co/Qwen/Qwen3-235B-A22B",
|
31 |
235,
|
32 |
"General",
|
33 |
"V2",
|
|
|
34 |
),
|
35 |
"Llama 3.(1-3) 70B": (
|
36 |
"https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
|
37 |
70.6,
|
38 |
"General",
|
39 |
"V1",
|
|
|
40 |
),
|
41 |
"Qwen2.5 72B": (
|
42 |
"https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
|
43 |
72.7,
|
44 |
"General",
|
45 |
"V1",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
),
|
47 |
-
"QwQ 32B": ("https://huggingface.co/Qwen/QwQ-32B", 32.8, "General", "V2"),
|
48 |
-
"Qwen2.5 32B": ("https://huggingface.co/Qwen/Qwen2.5-32B", 32.5, "General", "V1"),
|
49 |
"StarChat2 15B v0.1": (
|
50 |
"https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1",
|
51 |
16,
|
52 |
"General",
|
53 |
"V1",
|
|
|
54 |
),
|
55 |
"DeepSeek R1 Distill Qwen 14B": (
|
56 |
"https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
|
57 |
14.8,
|
58 |
"General",
|
59 |
"V1",
|
|
|
60 |
),
|
61 |
"CodeLlama 70B": (
|
62 |
"https://huggingface.co/codellama/CodeLlama-70b-hf",
|
63 |
69,
|
64 |
"Coding",
|
65 |
"V1",
|
|
|
66 |
),
|
67 |
"QwenCoder 2.5 32B": (
|
68 |
"https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct",
|
69 |
32.5,
|
70 |
"Coding",
|
71 |
"V1",
|
|
|
72 |
),
|
73 |
"DeepSeek Coder 33B": (
|
74 |
"https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct",
|
75 |
33.3,
|
76 |
"Coding",
|
77 |
"V1",
|
|
|
78 |
),
|
79 |
"QwenCoder 2.5 14B": (
|
80 |
"https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct",
|
81 |
14.7,
|
82 |
"Coding",
|
83 |
"V1",
|
|
|
84 |
),
|
85 |
"DeepCoder 14B": (
|
86 |
"https://huggingface.co/agentica-org/DeepCoder-14B-Preview",
|
87 |
14.8,
|
88 |
"Coding",
|
89 |
"V2",
|
|
|
90 |
),
|
91 |
"OpenCoder 8B": (
|
92 |
"https://huggingface.co/infly/OpenCoder-8B-Instruct",
|
93 |
7.77,
|
94 |
"Coding",
|
95 |
"V1",
|
|
|
96 |
),
|
97 |
"SeedCoder 8B": (
|
98 |
"https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Instruct",
|
99 |
8.25,
|
100 |
"Coding",
|
101 |
"V2",
|
|
|
102 |
),
|
103 |
"SeedCoder 8B Reasoning": (
|
104 |
"https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Reasoning-bf16",
|
105 |
8.25,
|
106 |
"Coding",
|
107 |
"V2",
|
|
|
108 |
),
|
109 |
"QwenCoder 2.5 7B": (
|
110 |
"https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct",
|
111 |
7.61,
|
112 |
"Coding",
|
113 |
"V1",
|
|
|
114 |
),
|
115 |
"DeepSeek Coder 6.7B": (
|
116 |
"https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct",
|
117 |
6.74,
|
118 |
"Coding",
|
119 |
"V1",
|
|
|
120 |
),
|
121 |
"HaVen-CodeQwen": (
|
122 |
"https://huggingface.co/yangyiyao/HaVen-CodeQwen",
|
123 |
7.25,
|
124 |
"RTL-Specific",
|
125 |
"V1",
|
|
|
126 |
),
|
127 |
"CodeV R1 Distill Qwen 7B": (
|
128 |
"https://huggingface.co/zhuyaoyu/CodeV-R1-Distill-Qwen-7B",
|
129 |
7.62,
|
130 |
"RTL-Specific",
|
131 |
"V2",
|
|
|
132 |
),
|
133 |
"CodeV-CL-7B": (
|
134 |
"https://huggingface.co/yang-z/CodeV-CL-7B",
|
135 |
6.74,
|
136 |
"RTL-Specific",
|
137 |
"V1",
|
|
|
138 |
),
|
139 |
"CodeV-QW-7B": (
|
140 |
"https://huggingface.co/yang-z/CodeV-QW-7B",
|
141 |
7.25,
|
142 |
"RTL-Specific",
|
143 |
"V1",
|
|
|
144 |
),
|
145 |
"CodeV-DS-6.7B": (
|
146 |
"https://huggingface.co/yang-z/CodeV-DS-6.7B",
|
147 |
6.74,
|
148 |
"RTL-Specific",
|
149 |
"V1",
|
|
|
150 |
),
|
151 |
"RTLCoder Mistral": (
|
152 |
"https://huggingface.co/ishorn5/RTLCoder-v1.1",
|
153 |
7.24,
|
154 |
"RTL-Specific",
|
155 |
"V1",
|
|
|
156 |
),
|
157 |
"RTLCoder DeepSeek": (
|
158 |
"https://huggingface.co/ishorn5/RTLCoder-Deepseek-v1.1",
|
159 |
6.74,
|
160 |
"RTL-Specific",
|
161 |
"V1",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
),
|
163 |
-
"OriGen": ("https://huggingface.co/henryen/OriGen", 6.74, "RTL-Specific", "V1"),
|
164 |
"Qwen3 Coder 480B A35B": (
|
165 |
"https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct",
|
166 |
480,
|
167 |
"Coding",
|
168 |
"V2",
|
|
|
169 |
),
|
170 |
"Magistral Small 2506": (
|
171 |
"https://huggingface.co/mistralai/Magistral-Small-2506",
|
172 |
23.6,
|
173 |
"General",
|
174 |
"V2",
|
|
|
175 |
),
|
176 |
"gpt-oss-20b": (
|
177 |
"https://huggingface.co/openai/gpt-oss-20b",
|
178 |
21.5,
|
179 |
"General",
|
180 |
"V2",
|
|
|
181 |
),
|
182 |
"gpt-oss-120b": (
|
183 |
"https://huggingface.co/openai/gpt-oss-120b",
|
184 |
120,
|
185 |
"General",
|
186 |
"V2",
|
|
|
187 |
),
|
188 |
}
|
189 |
|
@@ -201,14 +248,15 @@ def get_headers(reader, agg=False) -> Union[list, list]:
|
|
201 |
return metrics, benchs
|
202 |
|
203 |
|
204 |
-
def get_model_params_and_url(model) -> Union[str, str, float, str]:
|
205 |
if model not in model_details:
|
206 |
-
return "-", 0.0, "
|
207 |
url = model_details[model][0]
|
208 |
params = model_details[model][1]
|
209 |
type = model_details[model][2]
|
210 |
release = model_details[model][3]
|
211 |
-
|
|
|
212 |
|
213 |
|
214 |
def parse_results(csv_path: str) -> list[dict]:
|
@@ -227,7 +275,7 @@ def parse_results(csv_path: str) -> list[dict]:
|
|
227 |
model = row[0]
|
228 |
if not model:
|
229 |
continue
|
230 |
-
url, params, type, release = get_model_params_and_url(model)
|
231 |
models.append(model)
|
232 |
row = row[1:]
|
233 |
ctr = 0
|
@@ -243,6 +291,7 @@ def parse_results(csv_path: str) -> list[dict]:
|
|
243 |
record["Model URL"] = url
|
244 |
record["Params"] = params
|
245 |
record["Release"] = release
|
|
|
246 |
dataset.append(record)
|
247 |
ctr += 1
|
248 |
print(models)
|
|
|
13 |
685,
|
14 |
"General",
|
15 |
"V2",
|
16 |
+
"Reasoning", # "Dense" or "Reasoning"
|
17 |
),
|
18 |
"DeepSeek R1": (
|
19 |
"https://huggingface.co/deepseek-ai/DeepSeek-R1",
|
20 |
685,
|
21 |
"General",
|
22 |
"V1",
|
23 |
+
"Reasoning",
|
24 |
),
|
25 |
"Llama 3.1 405B": (
|
26 |
"https://huggingface.co/RedHatAI/Meta-Llama-3.1-405B-FP8",
|
27 |
406,
|
28 |
"General",
|
29 |
"V1",
|
30 |
+
"Dense",
|
31 |
),
|
32 |
"Qwen3 236B A22B": (
|
33 |
"https://huggingface.co/Qwen/Qwen3-235B-A22B",
|
34 |
235,
|
35 |
"General",
|
36 |
"V2",
|
37 |
+
"Reasoning",
|
38 |
),
|
39 |
"Llama 3.(1-3) 70B": (
|
40 |
"https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
|
41 |
70.6,
|
42 |
"General",
|
43 |
"V1",
|
44 |
+
"Dense",
|
45 |
),
|
46 |
"Qwen2.5 72B": (
|
47 |
"https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
|
48 |
72.7,
|
49 |
"General",
|
50 |
"V1",
|
51 |
+
"Dense",
|
52 |
+
),
|
53 |
+
"QwQ 32B": (
|
54 |
+
"https://huggingface.co/Qwen/QwQ-32B",
|
55 |
+
32.8,
|
56 |
+
"General",
|
57 |
+
"V2",
|
58 |
+
"Reasoning",
|
59 |
+
),
|
60 |
+
"Qwen2.5 32B": (
|
61 |
+
"https://huggingface.co/Qwen/Qwen2.5-32B",
|
62 |
+
32.5,
|
63 |
+
"General",
|
64 |
+
"V1",
|
65 |
+
"Dense",
|
66 |
),
|
|
|
|
|
67 |
"StarChat2 15B v0.1": (
|
68 |
"https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1",
|
69 |
16,
|
70 |
"General",
|
71 |
"V1",
|
72 |
+
"Dense",
|
73 |
),
|
74 |
"DeepSeek R1 Distill Qwen 14B": (
|
75 |
"https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
|
76 |
14.8,
|
77 |
"General",
|
78 |
"V1",
|
79 |
+
"Reasoning",
|
80 |
),
|
81 |
"CodeLlama 70B": (
|
82 |
"https://huggingface.co/codellama/CodeLlama-70b-hf",
|
83 |
69,
|
84 |
"Coding",
|
85 |
"V1",
|
86 |
+
"Dense",
|
87 |
),
|
88 |
"QwenCoder 2.5 32B": (
|
89 |
"https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct",
|
90 |
32.5,
|
91 |
"Coding",
|
92 |
"V1",
|
93 |
+
"Dense",
|
94 |
),
|
95 |
"DeepSeek Coder 33B": (
|
96 |
"https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct",
|
97 |
33.3,
|
98 |
"Coding",
|
99 |
"V1",
|
100 |
+
"Dense",
|
101 |
),
|
102 |
"QwenCoder 2.5 14B": (
|
103 |
"https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct",
|
104 |
14.7,
|
105 |
"Coding",
|
106 |
"V1",
|
107 |
+
"Dense",
|
108 |
),
|
109 |
"DeepCoder 14B": (
|
110 |
"https://huggingface.co/agentica-org/DeepCoder-14B-Preview",
|
111 |
14.8,
|
112 |
"Coding",
|
113 |
"V2",
|
114 |
+
"Reasoning",
|
115 |
),
|
116 |
"OpenCoder 8B": (
|
117 |
"https://huggingface.co/infly/OpenCoder-8B-Instruct",
|
118 |
7.77,
|
119 |
"Coding",
|
120 |
"V1",
|
121 |
+
"Dense",
|
122 |
),
|
123 |
"SeedCoder 8B": (
|
124 |
"https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Instruct",
|
125 |
8.25,
|
126 |
"Coding",
|
127 |
"V2",
|
128 |
+
"Dense",
|
129 |
),
|
130 |
"SeedCoder 8B Reasoning": (
|
131 |
"https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Reasoning-bf16",
|
132 |
8.25,
|
133 |
"Coding",
|
134 |
"V2",
|
135 |
+
"Reasoning",
|
136 |
),
|
137 |
"QwenCoder 2.5 7B": (
|
138 |
"https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct",
|
139 |
7.61,
|
140 |
"Coding",
|
141 |
"V1",
|
142 |
+
"Dense",
|
143 |
),
|
144 |
"DeepSeek Coder 6.7B": (
|
145 |
"https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct",
|
146 |
6.74,
|
147 |
"Coding",
|
148 |
"V1",
|
149 |
+
"Dense",
|
150 |
),
|
151 |
"HaVen-CodeQwen": (
|
152 |
"https://huggingface.co/yangyiyao/HaVen-CodeQwen",
|
153 |
7.25,
|
154 |
"RTL-Specific",
|
155 |
"V1",
|
156 |
+
"Dense",
|
157 |
),
|
158 |
"CodeV R1 Distill Qwen 7B": (
|
159 |
"https://huggingface.co/zhuyaoyu/CodeV-R1-Distill-Qwen-7B",
|
160 |
7.62,
|
161 |
"RTL-Specific",
|
162 |
"V2",
|
163 |
+
"Reasoning",
|
164 |
),
|
165 |
"CodeV-CL-7B": (
|
166 |
"https://huggingface.co/yang-z/CodeV-CL-7B",
|
167 |
6.74,
|
168 |
"RTL-Specific",
|
169 |
"V1",
|
170 |
+
"Dense",
|
171 |
),
|
172 |
"CodeV-QW-7B": (
|
173 |
"https://huggingface.co/yang-z/CodeV-QW-7B",
|
174 |
7.25,
|
175 |
"RTL-Specific",
|
176 |
"V1",
|
177 |
+
"Dense",
|
178 |
),
|
179 |
"CodeV-DS-6.7B": (
|
180 |
"https://huggingface.co/yang-z/CodeV-DS-6.7B",
|
181 |
6.74,
|
182 |
"RTL-Specific",
|
183 |
"V1",
|
184 |
+
"Dense",
|
185 |
),
|
186 |
"RTLCoder Mistral": (
|
187 |
"https://huggingface.co/ishorn5/RTLCoder-v1.1",
|
188 |
7.24,
|
189 |
"RTL-Specific",
|
190 |
"V1",
|
191 |
+
"Dense",
|
192 |
),
|
193 |
"RTLCoder DeepSeek": (
|
194 |
"https://huggingface.co/ishorn5/RTLCoder-Deepseek-v1.1",
|
195 |
6.74,
|
196 |
"RTL-Specific",
|
197 |
"V1",
|
198 |
+
"Dense",
|
199 |
+
),
|
200 |
+
"OriGen": (
|
201 |
+
"https://huggingface.co/henryen/OriGen",
|
202 |
+
6.74,
|
203 |
+
"RTL-Specific",
|
204 |
+
"V1",
|
205 |
+
"Dense",
|
206 |
),
|
|
|
207 |
"Qwen3 Coder 480B A35B": (
|
208 |
"https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct",
|
209 |
480,
|
210 |
"Coding",
|
211 |
"V2",
|
212 |
+
"Dense",
|
213 |
),
|
214 |
"Magistral Small 2506": (
|
215 |
"https://huggingface.co/mistralai/Magistral-Small-2506",
|
216 |
23.6,
|
217 |
"General",
|
218 |
"V2",
|
219 |
+
"Reasoning",
|
220 |
),
|
221 |
"gpt-oss-20b": (
|
222 |
"https://huggingface.co/openai/gpt-oss-20b",
|
223 |
21.5,
|
224 |
"General",
|
225 |
"V2",
|
226 |
+
"Reasoning",
|
227 |
),
|
228 |
"gpt-oss-120b": (
|
229 |
"https://huggingface.co/openai/gpt-oss-120b",
|
230 |
120,
|
231 |
"General",
|
232 |
"V2",
|
233 |
+
"Reasoning",
|
234 |
),
|
235 |
}
|
236 |
|
|
|
248 |
return metrics, benchs
|
249 |
|
250 |
|
251 |
+
def get_model_params_and_url(model) -> Union[str, str, float, str, str]:
|
252 |
if model not in model_details:
|
253 |
+
return "-", 0.0, "-", "-", "-"
|
254 |
url = model_details[model][0]
|
255 |
params = model_details[model][1]
|
256 |
type = model_details[model][2]
|
257 |
release = model_details[model][3]
|
258 |
+
reasoning = model_details[model][4]
|
259 |
+
return url, params, type, release, reasoning
|
260 |
|
261 |
|
262 |
def parse_results(csv_path: str) -> list[dict]:
|
|
|
275 |
model = row[0]
|
276 |
if not model:
|
277 |
continue
|
278 |
+
url, params, type, release, reasoning = get_model_params_and_url(model)
|
279 |
models.append(model)
|
280 |
row = row[1:]
|
281 |
ctr = 0
|
|
|
291 |
record["Model URL"] = url
|
292 |
record["Params"] = params
|
293 |
record["Release"] = release
|
294 |
+
record["Thinking"] = reasoning
|
295 |
dataset.append(record)
|
296 |
ctr += 1
|
297 |
print(models)
|
results/results_icarus.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
results/results_verilator.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
utils.py
CHANGED
@@ -15,11 +15,20 @@ type_emoji = {
|
|
15 |
# fmt: on
|
16 |
|
17 |
|
18 |
-
def model_hyperlink(link, model_name, release):
|
|
|
19 |
if release == "V1":
|
20 |
-
return
|
|
|
|
|
|
|
|
|
|
|
21 |
else:
|
22 |
-
return
|
|
|
|
|
|
|
23 |
|
24 |
|
25 |
def handle_special_cases(benchmark, metric):
|
@@ -33,14 +42,18 @@ def handle_special_cases(benchmark, metric):
|
|
33 |
def filter_RTLRepo(subset: pd.DataFrame) -> pd.DataFrame:
|
34 |
subset = subset.drop(subset[subset.Score < 0.0].index)
|
35 |
details = subset[
|
36 |
-
["Model", "Model URL", "Model Type", "Params", "Release"]
|
37 |
].drop_duplicates("Model")
|
38 |
filtered_df = subset[["Model", "Score"]].rename(
|
39 |
columns={"Score": "Exact Matching (EM)"}
|
40 |
)
|
41 |
filtered_df = pd.merge(filtered_df, details, on="Model", how="left")
|
42 |
filtered_df["Model"] = filtered_df.apply(
|
43 |
-
lambda row: model_hyperlink(
|
|
|
|
|
|
|
|
|
44 |
axis=1,
|
45 |
)
|
46 |
filtered_df["Type"] = filtered_df["Model Type"].map(lambda x: type_emoji.get(x, ""))
|
@@ -53,7 +66,7 @@ def filter_RTLRepo(subset: pd.DataFrame) -> pd.DataFrame:
|
|
53 |
|
54 |
def filter_bench(subset: pd.DataFrame, df_agg=None, agg_column=None) -> pd.DataFrame:
|
55 |
details = subset[
|
56 |
-
["Model", "Model URL", "Model Type", "Params", "Release"]
|
57 |
].drop_duplicates("Model")
|
58 |
if "RTLLM" in subset["Benchmark"].unique():
|
59 |
pivot_df = (
|
@@ -82,7 +95,9 @@ def filter_bench(subset: pd.DataFrame, df_agg=None, agg_column=None) -> pd.DataF
|
|
82 |
|
83 |
pivot_df = pd.merge(pivot_df, details, on="Model", how="left")
|
84 |
pivot_df["Model"] = pivot_df.apply(
|
85 |
-
lambda row: model_hyperlink(
|
|
|
|
|
86 |
axis=1,
|
87 |
)
|
88 |
pivot_df["Type"] = pivot_df["Model Type"].map(lambda x: type_emoji.get(x, ""))
|
@@ -144,7 +159,7 @@ def filter_bench_all(
|
|
144 |
subset: pd.DataFrame, df_agg=None, agg_column=None
|
145 |
) -> pd.DataFrame:
|
146 |
details = subset[
|
147 |
-
["Model", "Model URL", "Model Type", "Params", "Release"]
|
148 |
].drop_duplicates("Model")
|
149 |
if "RTLLM" in subset["Benchmark"].unique():
|
150 |
pivot_df = (
|
@@ -164,8 +179,11 @@ def filter_bench_all(
|
|
164 |
)
|
165 |
|
166 |
pivot_df = pd.merge(pivot_df, details, on="Model", how="left")
|
|
|
167 |
pivot_df["Model"] = pivot_df.apply(
|
168 |
-
lambda row: model_hyperlink(
|
|
|
|
|
169 |
axis=1,
|
170 |
)
|
171 |
pivot_df["Type"] = pivot_df["Model Type"].map(lambda x: type_emoji.get(x, ""))
|
|
|
15 |
# fmt: on
|
16 |
|
17 |
|
18 |
+
def model_hyperlink(link, model_name, release, thinking=False):
|
19 |
+
ret = f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
20 |
if release == "V1":
|
21 |
+
return ret
|
22 |
+
elif thinking == False:
|
23 |
+
return (
|
24 |
+
ret
|
25 |
+
+ f' <span style="opacity: 0.7; font-variant: all-small-caps; font-weight: 600">new</span>'
|
26 |
+
)
|
27 |
else:
|
28 |
+
return (
|
29 |
+
ret
|
30 |
+
+ f' <span style="opacity: 0.7; font-variant: all-small-caps; font-weight: 600">new</span> <span style="opacity: 0.9; font-variant: all-small-caps; font-weight: 600; color: #5C6BC0">(reasoning)</span>'
|
31 |
+
)
|
32 |
|
33 |
|
34 |
def handle_special_cases(benchmark, metric):
|
|
|
42 |
def filter_RTLRepo(subset: pd.DataFrame) -> pd.DataFrame:
|
43 |
subset = subset.drop(subset[subset.Score < 0.0].index)
|
44 |
details = subset[
|
45 |
+
["Model", "Model URL", "Model Type", "Params", "Release", "Thinking"]
|
46 |
].drop_duplicates("Model")
|
47 |
filtered_df = subset[["Model", "Score"]].rename(
|
48 |
columns={"Score": "Exact Matching (EM)"}
|
49 |
)
|
50 |
filtered_df = pd.merge(filtered_df, details, on="Model", how="left")
|
51 |
filtered_df["Model"] = filtered_df.apply(
|
52 |
+
lambda row: model_hyperlink(
|
53 |
+
row["Model URL"],
|
54 |
+
row["Model"],
|
55 |
+
row["Release"],
|
56 |
+
),
|
57 |
axis=1,
|
58 |
)
|
59 |
filtered_df["Type"] = filtered_df["Model Type"].map(lambda x: type_emoji.get(x, ""))
|
|
|
66 |
|
67 |
def filter_bench(subset: pd.DataFrame, df_agg=None, agg_column=None) -> pd.DataFrame:
|
68 |
details = subset[
|
69 |
+
["Model", "Model URL", "Model Type", "Params", "Release", "Thinking"]
|
70 |
].drop_duplicates("Model")
|
71 |
if "RTLLM" in subset["Benchmark"].unique():
|
72 |
pivot_df = (
|
|
|
95 |
|
96 |
pivot_df = pd.merge(pivot_df, details, on="Model", how="left")
|
97 |
pivot_df["Model"] = pivot_df.apply(
|
98 |
+
lambda row: model_hyperlink(
|
99 |
+
row["Model URL"], row["Model"], row["Release"], row["Thinking"]
|
100 |
+
),
|
101 |
axis=1,
|
102 |
)
|
103 |
pivot_df["Type"] = pivot_df["Model Type"].map(lambda x: type_emoji.get(x, ""))
|
|
|
159 |
subset: pd.DataFrame, df_agg=None, agg_column=None
|
160 |
) -> pd.DataFrame:
|
161 |
details = subset[
|
162 |
+
["Model", "Model URL", "Model Type", "Params", "Release", "Thinking"]
|
163 |
].drop_duplicates("Model")
|
164 |
if "RTLLM" in subset["Benchmark"].unique():
|
165 |
pivot_df = (
|
|
|
179 |
)
|
180 |
|
181 |
pivot_df = pd.merge(pivot_df, details, on="Model", how="left")
|
182 |
+
print(pivot_df.columns)
|
183 |
pivot_df["Model"] = pivot_df.apply(
|
184 |
+
lambda row: model_hyperlink(
|
185 |
+
row["Model URL"], row["Model"], row["Release"], row["Thinking"]
|
186 |
+
),
|
187 |
axis=1,
|
188 |
)
|
189 |
pivot_df["Type"] = pivot_df["Model Type"].map(lambda x: type_emoji.get(x, ""))
|