TuRTLe-Leaderboard

Running

Miquel Albertí commited on May 15

Commit

9f92a32

2 Parent(s): f843e68 0d5d6e3

Merge branch 'main' of https://huggingface.co/spaces/HPAI-BSC/TuRTLe-Leaderboard

Browse files

Files changed (3) hide show

about.py +9 -8
app.py +16 -11
css_html_js.py +1 -1

about.py CHANGED Viewed

@@ -1,9 +1,10 @@
 CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
-CITATION_BUTTON_TEXT = r"""@misc{turtle_2025,
-    author = {Garcia-Gasulla, D. and Kestor, G. and Parisi, E. and Albertí-Binimelis, M. and Gutierrez, C. and Ghorab, R.M. and Montenegro, O. and Homs, B. and Moreto, Miquel},
-    title  = {TuRTLe: A Unified Evaluation of LLMs for RTL Generation},
-    journal= {arXiv preprint arXiv:2504.01986},
-    year   = {2025},
-    note   = {Under review}
-}
-"""

 CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
+CITATION_BUTTON_TEXT = r"""@misc{garciagasulla2025turtleunifiedevaluationllms,
+      title={TuRTLe: A Unified Evaluation of LLMs for RTL Generation},
+      author={Dario Garcia-Gasulla and Gokcen Kestor and Emanuele Parisi and Miquel Albert\'i-Binimelis and Cristian Gutierrez and Razine Moundir Ghorab and Orlando Montenegro and Bernat Homs and Miquel Moreto},
+      year={2025},
+      eprint={2504.01986},
+      archivePrefix={arXiv},
+      primaryClass={cs.AR},
+      url={https://arxiv.org/abs/2504.01986},
+}"""

app.py CHANGED Viewed

@@ -38,7 +38,7 @@ def filter_leaderboard(task, benchmark, model_type, search_query, max_params):
     if model_type != 'All':
         # without emojis
-        subset = subset[subset['Model Type'] == model_type]
     if search_query:
         subset = subset[subset['Model'].str.contains(search_query, case=False, na=False)]
     max_params = float(max_params)
@@ -149,7 +149,7 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
     lc_benchs = ["RTL-Repo"]
     non_rtl_metrics = ["Syntax (STX)", "Functionality (FNC)", "Synthesis (SYN)", "Power", "Performance", "Area"]
     rtl_metrics = ["Exact Matching (EM)"]
-    model_types = ['All', 'General', 'Coding', 'RTL-Specific']
     gr.HTML("""
     <p align="center" style="margin-bottom: -10px;">
@@ -160,7 +160,9 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
     <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
     <script defer src="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/js/all.min.js"></script>
     <div style="text-align: center; margin-bottom: 15px;">
-        <p style="margin-bottom: 15px;">Welcome to the TuRTLe Model Leaderboard! Use the filters below to explore different RTL benchmarks and models.</p>
         <a href="https://github.com/HPAI-BSC/TuRTLe" target="_blank" style="text-decoration: none; margin-right: 10px;">
             <button style="background: #333; color: white; padding: 10px 14px; border-radius: 8px; border: none; font-size: 16px; cursor: pointer;">
                 GitHub Repo
@@ -192,21 +194,24 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
                     benchmark_radio = gr.Radio(choices=["All"] + s2r_benchs, label="Select Benchmark", value='All')
             with gr.Row(equal_height=True):
-                with gr.Column():
-                    search_box = gr.Textbox(label="Search Model", placeholder="Type model name...")
-                with gr.Column():
-                    model_type_dropdown = gr.Dropdown(
                         choices=model_types,
                         label="Select Model Type",
-                        value='All'
                     )
-                with gr.Column():
                     params_slider = gr.Slider(
                         minimum=df['Params'].min(),
                         maximum=700,
                         value=700,
                         label="Max Params",
-                        step=1
                     )
             leaderboard = gr.DataFrame(
@@ -218,7 +223,7 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
                 interactive=False,
                 column_widths=["7%", "25%", "10%", "17%", "6%", "6%", "6%", "6%", "6%", "7%"])
-        with gr.Tab("Graph View"):
             with gr.Row(equal_height=True):
                 default_benchmark = s2r_benchs[0]
                 bubble_benchmark = gr.Dropdown(choices=benchmarks, label="Select Benchmark", value=default_benchmark, elem_classes="gr-dropdown")

     if model_type != 'All':
         # without emojis
+        subset = subset[subset['Model Type'] == model_type.split(" ")[0]]
     if search_query:
         subset = subset[subset['Model'].str.contains(search_query, case=False, na=False)]
     max_params = float(max_params)
     lc_benchs = ["RTL-Repo"]
     non_rtl_metrics = ["Syntax (STX)", "Functionality (FNC)", "Synthesis (SYN)", "Power", "Performance", "Area"]
     rtl_metrics = ["Exact Matching (EM)"]
+    model_types = ['All', 'General 🟢', 'Coding 🔵', 'RTL-Specific 🔴']
     gr.HTML("""
     <p align="center" style="margin-bottom: -10px;">
     <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
     <script defer src="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/js/all.min.js"></script>
     <div style="text-align: center; margin-bottom: 15px;">
+        <p style="margin-bottom: 15px;">Welcome to the TuRTLe Model Leaderboard! TuRTLe is a <b>unified evaluation framework designed to systematically assess Large Language Models (LLMs) in RTL (Register-Transfer Level) generation</b> for hardware design.
+        Evaluation criteria include <b>syntax correctness, functional accuracy, synthesizability, and post-synthesis quality</b> (PPA: Power, Performance, Area). TuRTLe integrates multiple benchmarks to highlight strengths and weaknesses of available LLMs.
+        Use the filters below to explore different RTL benchmarks and models.</p>
         <a href="https://github.com/HPAI-BSC/TuRTLe" target="_blank" style="text-decoration: none; margin-right: 10px;">
             <button style="background: #333; color: white; padding: 10px 14px; border-radius: 8px; border: none; font-size: 16px; cursor: pointer;">
                 GitHub Repo
                     benchmark_radio = gr.Radio(choices=["All"] + s2r_benchs, label="Select Benchmark", value='All')
             with gr.Row(equal_height=True):
+                    search_box = gr.Textbox(
+                        label="Search Model",
+                        placeholder="Type model name...",
+                        scale=2,
+                    )
+                    model_type_dropdown = gr.Radio(
                         choices=model_types,
                         label="Select Model Type",
+                        value='All',
+                        scale=3,
                     )
                     params_slider = gr.Slider(
                         minimum=df['Params'].min(),
                         maximum=700,
                         value=700,
                         label="Max Params",
+                        step=1,
+                        scale=2,
                     )
             leaderboard = gr.DataFrame(
                 interactive=False,
                 column_widths=["7%", "25%", "10%", "17%", "6%", "6%", "6%", "6%", "6%", "7%"])
+        with gr.Tab("Plot View"):
             with gr.Row(equal_height=True):
                 default_benchmark = s2r_benchs[0]
                 bubble_benchmark = gr.Dropdown(choices=benchmarks, label="Select Benchmark", value=default_benchmark, elem_classes="gr-dropdown")

css_html_js.py CHANGED Viewed

@@ -107,7 +107,7 @@ custom_css = """
     border: 0;
 }
 .slider_input_container {
-    padding-top: 8px;
 }
 input[role="listbox"] {
     cursor: pointer !important;

     border: 0;
 }
 .slider_input_container {
+    padding-top: 2px;
 }
 input[role="listbox"] {
     cursor: pointer !important;