Miquel Albertí commited on
Commit
9f92a32
·
2 Parent(s): f843e68 0d5d6e3

Merge branch 'main' of https://huggingface.co/spaces/HPAI-BSC/TuRTLe-Leaderboard

Browse files
Files changed (3) hide show
  1. about.py +9 -8
  2. app.py +16 -11
  3. css_html_js.py +1 -1
about.py CHANGED
@@ -1,9 +1,10 @@
1
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
2
- CITATION_BUTTON_TEXT = r"""@misc{turtle_2025,
3
- author = {Garcia-Gasulla, D. and Kestor, G. and Parisi, E. and Albertí-Binimelis, M. and Gutierrez, C. and Ghorab, R.M. and Montenegro, O. and Homs, B. and Moreto, Miquel},
4
- title = {TuRTLe: A Unified Evaluation of LLMs for RTL Generation},
5
- journal= {arXiv preprint arXiv:2504.01986},
6
- year = {2025},
7
- note = {Under review}
8
- }
9
- """
 
 
1
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
2
+ CITATION_BUTTON_TEXT = r"""@misc{garciagasulla2025turtleunifiedevaluationllms,
3
+ title={TuRTLe: A Unified Evaluation of LLMs for RTL Generation},
4
+ author={Dario Garcia-Gasulla and Gokcen Kestor and Emanuele Parisi and Miquel Albert\'i-Binimelis and Cristian Gutierrez and Razine Moundir Ghorab and Orlando Montenegro and Bernat Homs and Miquel Moreto},
5
+ year={2025},
6
+ eprint={2504.01986},
7
+ archivePrefix={arXiv},
8
+ primaryClass={cs.AR},
9
+ url={https://arxiv.org/abs/2504.01986},
10
+ }"""
app.py CHANGED
@@ -38,7 +38,7 @@ def filter_leaderboard(task, benchmark, model_type, search_query, max_params):
38
 
39
  if model_type != 'All':
40
  # without emojis
41
- subset = subset[subset['Model Type'] == model_type]
42
  if search_query:
43
  subset = subset[subset['Model'].str.contains(search_query, case=False, na=False)]
44
  max_params = float(max_params)
@@ -149,7 +149,7 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
149
  lc_benchs = ["RTL-Repo"]
150
  non_rtl_metrics = ["Syntax (STX)", "Functionality (FNC)", "Synthesis (SYN)", "Power", "Performance", "Area"]
151
  rtl_metrics = ["Exact Matching (EM)"]
152
- model_types = ['All', 'General', 'Coding', 'RTL-Specific']
153
 
154
  gr.HTML("""
155
  <p align="center" style="margin-bottom: -10px;">
@@ -160,7 +160,9 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
160
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
161
  <script defer src="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/js/all.min.js"></script>
162
  <div style="text-align: center; margin-bottom: 15px;">
163
- <p style="margin-bottom: 15px;">Welcome to the TuRTLe Model Leaderboard! Use the filters below to explore different RTL benchmarks and models.</p>
 
 
164
  <a href="https://github.com/HPAI-BSC/TuRTLe" target="_blank" style="text-decoration: none; margin-right: 10px;">
165
  <button style="background: #333; color: white; padding: 10px 14px; border-radius: 8px; border: none; font-size: 16px; cursor: pointer;">
166
  GitHub Repo
@@ -192,21 +194,24 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
192
  benchmark_radio = gr.Radio(choices=["All"] + s2r_benchs, label="Select Benchmark", value='All')
193
 
194
  with gr.Row(equal_height=True):
195
- with gr.Column():
196
- search_box = gr.Textbox(label="Search Model", placeholder="Type model name...")
197
- with gr.Column():
198
- model_type_dropdown = gr.Dropdown(
 
 
199
  choices=model_types,
200
  label="Select Model Type",
201
- value='All'
 
202
  )
203
- with gr.Column():
204
  params_slider = gr.Slider(
205
  minimum=df['Params'].min(),
206
  maximum=700,
207
  value=700,
208
  label="Max Params",
209
- step=1
 
210
  )
211
 
212
  leaderboard = gr.DataFrame(
@@ -218,7 +223,7 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
218
  interactive=False,
219
  column_widths=["7%", "25%", "10%", "17%", "6%", "6%", "6%", "6%", "6%", "7%"])
220
 
221
- with gr.Tab("Graph View"):
222
  with gr.Row(equal_height=True):
223
  default_benchmark = s2r_benchs[0]
224
  bubble_benchmark = gr.Dropdown(choices=benchmarks, label="Select Benchmark", value=default_benchmark, elem_classes="gr-dropdown")
 
38
 
39
  if model_type != 'All':
40
  # without emojis
41
+ subset = subset[subset['Model Type'] == model_type.split(" ")[0]]
42
  if search_query:
43
  subset = subset[subset['Model'].str.contains(search_query, case=False, na=False)]
44
  max_params = float(max_params)
 
149
  lc_benchs = ["RTL-Repo"]
150
  non_rtl_metrics = ["Syntax (STX)", "Functionality (FNC)", "Synthesis (SYN)", "Power", "Performance", "Area"]
151
  rtl_metrics = ["Exact Matching (EM)"]
152
+ model_types = ['All', 'General 🟢', 'Coding 🔵', 'RTL-Specific 🔴']
153
 
154
  gr.HTML("""
155
  <p align="center" style="margin-bottom: -10px;">
 
160
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
161
  <script defer src="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/js/all.min.js"></script>
162
  <div style="text-align: center; margin-bottom: 15px;">
163
+ <p style="margin-bottom: 15px;">Welcome to the TuRTLe Model Leaderboard! TuRTLe is a <b>unified evaluation framework designed to systematically assess Large Language Models (LLMs) in RTL (Register-Transfer Level) generation</b> for hardware design.
164
+ Evaluation criteria include <b>syntax correctness, functional accuracy, synthesizability, and post-synthesis quality</b> (PPA: Power, Performance, Area). TuRTLe integrates multiple benchmarks to highlight strengths and weaknesses of available LLMs.
165
+ Use the filters below to explore different RTL benchmarks and models.</p>
166
  <a href="https://github.com/HPAI-BSC/TuRTLe" target="_blank" style="text-decoration: none; margin-right: 10px;">
167
  <button style="background: #333; color: white; padding: 10px 14px; border-radius: 8px; border: none; font-size: 16px; cursor: pointer;">
168
  GitHub Repo
 
194
  benchmark_radio = gr.Radio(choices=["All"] + s2r_benchs, label="Select Benchmark", value='All')
195
 
196
  with gr.Row(equal_height=True):
197
+ search_box = gr.Textbox(
198
+ label="Search Model",
199
+ placeholder="Type model name...",
200
+ scale=2,
201
+ )
202
+ model_type_dropdown = gr.Radio(
203
  choices=model_types,
204
  label="Select Model Type",
205
+ value='All',
206
+ scale=3,
207
  )
 
208
  params_slider = gr.Slider(
209
  minimum=df['Params'].min(),
210
  maximum=700,
211
  value=700,
212
  label="Max Params",
213
+ step=1,
214
+ scale=2,
215
  )
216
 
217
  leaderboard = gr.DataFrame(
 
223
  interactive=False,
224
  column_widths=["7%", "25%", "10%", "17%", "6%", "6%", "6%", "6%", "6%", "7%"])
225
 
226
+ with gr.Tab("Plot View"):
227
  with gr.Row(equal_height=True):
228
  default_benchmark = s2r_benchs[0]
229
  bubble_benchmark = gr.Dropdown(choices=benchmarks, label="Select Benchmark", value=default_benchmark, elem_classes="gr-dropdown")
css_html_js.py CHANGED
@@ -107,7 +107,7 @@ custom_css = """
107
  border: 0;
108
  }
109
  .slider_input_container {
110
- padding-top: 8px;
111
  }
112
  input[role="listbox"] {
113
  cursor: pointer !important;
 
107
  border: 0;
108
  }
109
  .slider_input_container {
110
+ padding-top: 2px;
111
  }
112
  input[role="listbox"] {
113
  cursor: pointer !important;