Merge branch 'main' of https://huggingface.co/spaces/HPAI-BSC/TuRTLe-Leaderboard
Browse files- about.py +9 -8
- app.py +16 -11
- css_html_js.py +1 -1
about.py
CHANGED
@@ -1,9 +1,10 @@
|
|
1 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
2 |
-
CITATION_BUTTON_TEXT = r"""@misc{
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
}
|
9 |
-
|
|
|
|
1 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
2 |
+
CITATION_BUTTON_TEXT = r"""@misc{garciagasulla2025turtleunifiedevaluationllms,
|
3 |
+
title={TuRTLe: A Unified Evaluation of LLMs for RTL Generation},
|
4 |
+
author={Dario Garcia-Gasulla and Gokcen Kestor and Emanuele Parisi and Miquel Albert\'i-Binimelis and Cristian Gutierrez and Razine Moundir Ghorab and Orlando Montenegro and Bernat Homs and Miquel Moreto},
|
5 |
+
year={2025},
|
6 |
+
eprint={2504.01986},
|
7 |
+
archivePrefix={arXiv},
|
8 |
+
primaryClass={cs.AR},
|
9 |
+
url={https://arxiv.org/abs/2504.01986},
|
10 |
+
}"""
|
app.py
CHANGED
@@ -38,7 +38,7 @@ def filter_leaderboard(task, benchmark, model_type, search_query, max_params):
|
|
38 |
|
39 |
if model_type != 'All':
|
40 |
# without emojis
|
41 |
-
subset = subset[subset['Model Type'] == model_type]
|
42 |
if search_query:
|
43 |
subset = subset[subset['Model'].str.contains(search_query, case=False, na=False)]
|
44 |
max_params = float(max_params)
|
@@ -149,7 +149,7 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
|
|
149 |
lc_benchs = ["RTL-Repo"]
|
150 |
non_rtl_metrics = ["Syntax (STX)", "Functionality (FNC)", "Synthesis (SYN)", "Power", "Performance", "Area"]
|
151 |
rtl_metrics = ["Exact Matching (EM)"]
|
152 |
-
model_types = ['All', 'General', 'Coding', 'RTL-Specific']
|
153 |
|
154 |
gr.HTML("""
|
155 |
<p align="center" style="margin-bottom: -10px;">
|
@@ -160,7 +160,9 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
|
|
160 |
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
|
161 |
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/js/all.min.js"></script>
|
162 |
<div style="text-align: center; margin-bottom: 15px;">
|
163 |
-
<p style="margin-bottom: 15px;">Welcome to the TuRTLe Model Leaderboard!
|
|
|
|
|
164 |
<a href="https://github.com/HPAI-BSC/TuRTLe" target="_blank" style="text-decoration: none; margin-right: 10px;">
|
165 |
<button style="background: #333; color: white; padding: 10px 14px; border-radius: 8px; border: none; font-size: 16px; cursor: pointer;">
|
166 |
GitHub Repo
|
@@ -192,21 +194,24 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
|
|
192 |
benchmark_radio = gr.Radio(choices=["All"] + s2r_benchs, label="Select Benchmark", value='All')
|
193 |
|
194 |
with gr.Row(equal_height=True):
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
|
|
|
|
199 |
choices=model_types,
|
200 |
label="Select Model Type",
|
201 |
-
value='All'
|
|
|
202 |
)
|
203 |
-
with gr.Column():
|
204 |
params_slider = gr.Slider(
|
205 |
minimum=df['Params'].min(),
|
206 |
maximum=700,
|
207 |
value=700,
|
208 |
label="Max Params",
|
209 |
-
step=1
|
|
|
210 |
)
|
211 |
|
212 |
leaderboard = gr.DataFrame(
|
@@ -218,7 +223,7 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
|
|
218 |
interactive=False,
|
219 |
column_widths=["7%", "25%", "10%", "17%", "6%", "6%", "6%", "6%", "6%", "7%"])
|
220 |
|
221 |
-
with gr.Tab("
|
222 |
with gr.Row(equal_height=True):
|
223 |
default_benchmark = s2r_benchs[0]
|
224 |
bubble_benchmark = gr.Dropdown(choices=benchmarks, label="Select Benchmark", value=default_benchmark, elem_classes="gr-dropdown")
|
|
|
38 |
|
39 |
if model_type != 'All':
|
40 |
# without emojis
|
41 |
+
subset = subset[subset['Model Type'] == model_type.split(" ")[0]]
|
42 |
if search_query:
|
43 |
subset = subset[subset['Model'].str.contains(search_query, case=False, na=False)]
|
44 |
max_params = float(max_params)
|
|
|
149 |
lc_benchs = ["RTL-Repo"]
|
150 |
non_rtl_metrics = ["Syntax (STX)", "Functionality (FNC)", "Synthesis (SYN)", "Power", "Performance", "Area"]
|
151 |
rtl_metrics = ["Exact Matching (EM)"]
|
152 |
+
model_types = ['All', 'General 🟢', 'Coding 🔵', 'RTL-Specific 🔴']
|
153 |
|
154 |
gr.HTML("""
|
155 |
<p align="center" style="margin-bottom: -10px;">
|
|
|
160 |
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
|
161 |
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/js/all.min.js"></script>
|
162 |
<div style="text-align: center; margin-bottom: 15px;">
|
163 |
+
<p style="margin-bottom: 15px;">Welcome to the TuRTLe Model Leaderboard! TuRTLe is a <b>unified evaluation framework designed to systematically assess Large Language Models (LLMs) in RTL (Register-Transfer Level) generation</b> for hardware design.
|
164 |
+
Evaluation criteria include <b>syntax correctness, functional accuracy, synthesizability, and post-synthesis quality</b> (PPA: Power, Performance, Area). TuRTLe integrates multiple benchmarks to highlight strengths and weaknesses of available LLMs.
|
165 |
+
Use the filters below to explore different RTL benchmarks and models.</p>
|
166 |
<a href="https://github.com/HPAI-BSC/TuRTLe" target="_blank" style="text-decoration: none; margin-right: 10px;">
|
167 |
<button style="background: #333; color: white; padding: 10px 14px; border-radius: 8px; border: none; font-size: 16px; cursor: pointer;">
|
168 |
GitHub Repo
|
|
|
194 |
benchmark_radio = gr.Radio(choices=["All"] + s2r_benchs, label="Select Benchmark", value='All')
|
195 |
|
196 |
with gr.Row(equal_height=True):
|
197 |
+
search_box = gr.Textbox(
|
198 |
+
label="Search Model",
|
199 |
+
placeholder="Type model name...",
|
200 |
+
scale=2,
|
201 |
+
)
|
202 |
+
model_type_dropdown = gr.Radio(
|
203 |
choices=model_types,
|
204 |
label="Select Model Type",
|
205 |
+
value='All',
|
206 |
+
scale=3,
|
207 |
)
|
|
|
208 |
params_slider = gr.Slider(
|
209 |
minimum=df['Params'].min(),
|
210 |
maximum=700,
|
211 |
value=700,
|
212 |
label="Max Params",
|
213 |
+
step=1,
|
214 |
+
scale=2,
|
215 |
)
|
216 |
|
217 |
leaderboard = gr.DataFrame(
|
|
|
223 |
interactive=False,
|
224 |
column_widths=["7%", "25%", "10%", "17%", "6%", "6%", "6%", "6%", "6%", "7%"])
|
225 |
|
226 |
+
with gr.Tab("Plot View"):
|
227 |
with gr.Row(equal_height=True):
|
228 |
default_benchmark = s2r_benchs[0]
|
229 |
bubble_benchmark = gr.Dropdown(choices=benchmarks, label="Select Benchmark", value=default_benchmark, elem_classes="gr-dropdown")
|
css_html_js.py
CHANGED
@@ -107,7 +107,7 @@ custom_css = """
|
|
107 |
border: 0;
|
108 |
}
|
109 |
.slider_input_container {
|
110 |
-
padding-top:
|
111 |
}
|
112 |
input[role="listbox"] {
|
113 |
cursor: pointer !important;
|
|
|
107 |
border: 0;
|
108 |
}
|
109 |
.slider_input_container {
|
110 |
+
padding-top: 2px;
|
111 |
}
|
112 |
input[role="listbox"] {
|
113 |
cursor: pointer !important;
|