Spaces:

sealical
/

PhysCodeBench-Leaderboard

Sleeping

App Files Files Community

Sealical commited on Mar 19

Commit

5bf1eab

1 Parent(s): 81b7fcb

Update space

Browse files

Files changed (2) hide show

app.py +329 -172
old_app.py +204 -0

app.py CHANGED Viewed

@@ -1,204 +1,361 @@
 import gradio as gr
 from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
 import pandas as pd
-from apscheduler.schedulers.background import BackgroundScheduler
 from huggingface_hub import snapshot_download
-from src.about import (
-    CITATION_BUTTON_LABEL,
-    CITATION_BUTTON_TEXT,
-    EVALUATION_QUEUE_TEXT,
-    INTRODUCTION_TEXT,
-    LLM_BENCHMARKS_TEXT,
-    TITLE,
-)
-from src.display.css_html_js import custom_css
-from src.display.utils import (
-    BENCHMARK_COLS,
-    COLS,
-    EVAL_COLS,
-    EVAL_TYPES,
-    AutoEvalColumn,
-    ModelType,
-    fields,
-    WeightType,
-    Precision
-)
-from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
-from src.populate import get_evaluation_queue_df, get_leaderboard_df
-from src.submission.submit import add_new_eval
-def restart_space():
-    API.restart_space(repo_id=REPO_ID)
-### Space initialisation
-try:
-    print(EVAL_REQUESTS_PATH)
-    snapshot_download(
-        repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
-    )
-except Exception:
-    restart_space()
-try:
-    print(EVAL_RESULTS_PATH)
-    snapshot_download(
-        repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
-    )
-except Exception:
-    restart_space()
-LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
-(
-    finished_eval_queue_df,
-    running_eval_queue_df,
-    pending_eval_queue_df,
-) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
 def init_leaderboard(dataframe):
     if dataframe is None or dataframe.empty:
         raise ValueError("Leaderboard DataFrame is empty or None.")
     return Leaderboard(
         value=dataframe,
-        datatype=[c.type for c in fields(AutoEvalColumn)],
         select_columns=SelectColumns(
-            default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
-            cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
             label="Select Columns to Display:",
         ),
-        search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
-        hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
         filter_columns=[
-            ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
-            ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
             ColumnFilter(
-                AutoEvalColumn.params.name,
                 type="slider",
                 min=0.01,
-                max=150,
                 label="Select the number of parameters (B)",
             ),
-            ColumnFilter(
-                AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
-            ),
         ],
-        bool_checkboxgroup_label="Hide models",
         interactive=False,
     )
-demo = gr.Blocks(css=custom_css)
-with demo:
-    gr.HTML(TITLE)
-    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
-    with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
-            leaderboard = init_leaderboard(LEADERBOARD_DF)
-        with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
-            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
-        with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
-            with gr.Column():
                 with gr.Row():
-                    gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
-                with gr.Column():
-                    with gr.Accordion(
-                        f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            finished_eval_table = gr.components.Dataframe(
-                                value=finished_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-                    with gr.Accordion(
-                        f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            running_eval_table = gr.components.Dataframe(
-                                value=running_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-                    with gr.Accordion(
-                        f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            pending_eval_table = gr.components.Dataframe(
-                                value=pending_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-            with gr.Row():
-                gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
-            with gr.Row():
-                with gr.Column():
-                    model_name_textbox = gr.Textbox(label="Model name")
-                    revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
-                    model_type = gr.Dropdown(
-                        choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
-                        label="Model type",
                         multiselect=False,
-                        value=None,
-                        interactive=True,
                     )
-                with gr.Column():
-                    precision = gr.Dropdown(
-                        choices=[i.value.name for i in Precision if i != Precision.Unknown],
-                        label="Precision",
-                        multiselect=False,
-                        value="float16",
-                        interactive=True,
-                    )
-                    weight_type = gr.Dropdown(
-                        choices=[i.value.name for i in WeightType],
-                        label="Weights type",
-                        multiselect=False,
-                        value="Original",
-                        interactive=True,
-                    )
-                    base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
-            submit_button = gr.Button("Submit Eval")
-            submission_result = gr.Markdown()
-            submit_button.click(
-                add_new_eval,
-                [
-                    model_name_textbox,
-                    base_model_name_textbox,
-                    revision_name_textbox,
-                    precision,
-                    weight_type,
-                    model_type,
-                ],
-                submission_result,
-            )
-    with gr.Row():
-        with gr.Accordion("📙 Citation", open=False):
-            citation_button = gr.Textbox(
-                value=CITATION_BUTTON_TEXT,
-                label=CITATION_BUTTON_LABEL,
-                lines=20,
-                elem_id="citation-button",
-                show_copy_button=True,
-            )
-scheduler = BackgroundScheduler()
-scheduler.add_job(restart_space, "interval", seconds=1800)
-scheduler.start()
-demo.queue(default_concurrency_limit=40).launch()

 import gradio as gr
 from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
 import pandas as pd
+import os
+import json
 from huggingface_hub import snapshot_download
+# Constants for PhysicalCodeBench
+TITLE = """
+<div style="text-align: center; max-width: 900px; margin: 0 auto;">
+    <div>
+        <h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
+            PhysicalCodeBench Leaderboard
+        </h1>
+        <h3 style="margin-top: 0; margin-bottom: 10px; font-weight: 500;">
+            Evaluating LLMs on Physics-based Simulation Code Generation
+        </h3>
+    </div>
+</div>
+"""
+INTRODUCTION_TEXT = """
+PhysicalCodeBench evaluates the abilities of Large Language Models (LLMs) to generate code for physics-based simulations.
+The benchmark consists of user instructions that describe physical scenarios to be simulated, reference code implementations,
+and resulting simulation videos generated using the [Genesis](https://github.com/Genesis-Embodied-AI/Genesis) physics engine.
+This leaderboard showcases model performance on the PhysicalCodeBench-50 dataset, measuring both text-based execution success
+and visual quality of the generated simulations.
+"""
+ABOUT_TEXT = """
+## About PhysicalCodeBench
+PhysicalCodeBench evaluates an LLM's ability to:
+- Understand natural language descriptions of physical scenarios
+- Generate executable code that correctly implements the physics simulation
+- Produce visually accurate and physically plausible results
+The benchmark covers a variety of physical phenomena including:
+- Rigid body dynamics (collisions, rolling, bouncing, etc.)
+- Fluid and particle simulations
+- Soft body physics
+- Controlled environments (robotic arms, drones, etc.)
+- Chain reactions and complex interactions
+## Evaluation Metrics
+PhysicalCodeBench uses two main evaluation dimensions:
+1. **Text Score (50 points)**: Evaluates code execution success
+   - Code runs without errors (25 points)
+   - Generates proper output files (10 points)
+   - Output files meet required specifications (15 points)
+2. **Visual Score (50 points)**: Evaluates simulation quality
+   - CLIP Score: Measures text-video alignment (25 points)
+   - Motion Smoothness: Evaluates physics simulation quality (25 points)
+Total score is the sum of Text and Visual scores (maximum 100 points).
+"""
+SUBMISSION_TEXT = """
+## How to Submit Your Model Results
+1. Fork the [PhysicalCodeBench repository](https://github.com/Sealical/PhysicalCodeBench)
+2. Generate code for all 50 tasks in the benchmark using your model
+3. Run the evaluation pipeline with your generated code
+4. Create a submission folder with the following structure:
+   ```
+   submission/
+   ├── model_info.json       # Contains model details (name, size, etc.)
+   ├── evaluation_results/   # Directory containing all result files
+   └── PhysCodeEval_results.json  # Main evaluation results file
+   ```
+5. Submit a pull request with your results
+Your submission will be verified and added to the leaderboard once approved.
+"""
+CITATION_TEXT = """
+@article{PhysicalCodeBench2025,
+  title={PhysicalCodeBench: Evaluating LLMs on Physics-based Simulation Code Generation},
+  author={Your Name and Co-authors},
+  journal={arXiv preprint arXiv:XXXX.XXXXX},
+  year={2025}
+}
+"""
+# Custom CSS for the interface
+custom_css = """
+.markdown-text {
+    font-size: 16px !important;
+    text-align: left !important;
+}
+.tab-button {
+    font-size: 16px !important;
+}
+"""
+# Define column structure for the leaderboard
+class PhysCodeColumn:
+    def __init__(self, name, type, displayed_by_default=True, never_hidden=False, hidden=False):
+        self.name = name
+        self.type = type
+        self.displayed_by_default = displayed_by_default
+        self.never_hidden = never_hidden
+        self.hidden = hidden
+# Define the columns for our leaderboard
+COLUMNS = [
+    PhysCodeColumn("rank", "number", True, True, False),
+    PhysCodeColumn("model", "str", True, True, False),
+    PhysCodeColumn("model_type", "str", True, False, False),
+    PhysCodeColumn("params", "number", True, False, False),
+    PhysCodeColumn("text_score", "number", True, False, False),
+    PhysCodeColumn("visual_score", "number", True, False, False),
+    PhysCodeColumn("total_score", "number", True, False, False),
+    PhysCodeColumn("clip_score", "number", False, False, False),
+    PhysCodeColumn("motion_smooth_score", "number", False, False, False),
+    PhysCodeColumn("execution_success", "number", False, False, False),
+    PhysCodeColumn("file_generation", "number", False, False, False),
+    PhysCodeColumn("file_quality", "number", False, False, False),
+    PhysCodeColumn("submission_date", "date", False, False, False),
+    PhysCodeColumn("license", "str", False, False, False)
+]
+# Enums for model metadata
+class ModelType:
+    Proprietary = "Proprietary"
+    OpenSource = "Open Source"
+    Unknown = "Unknown"
+    @staticmethod
+    def to_str(model_type):
+        return model_type
+# Load sample data (replace with your actual data loading logic)
+def get_leaderboard_df():
+    # Sample data based on your README
+    data = [
+        {
+            "rank": 1,
+            "model": "GPT4o",
+            "model_type": ModelType.Proprietary,
+            "params": 1000,
+            "text_score": 16.0,
+            "visual_score": 18.262,
+            "total_score": 34.262,
+            "clip_score": 10.2,
+            "motion_smooth_score": 8.062,
+            "execution_success": 10.0,
+            "file_generation": 3.0,
+            "file_quality": 3.0,
+            "submission_date": "2025-01-15",
+            "license": "Proprietary"
+        },
+        {
+            "rank": 2,
+            "model": "Gemini-2.0-flash",
+            "model_type": ModelType.Proprietary,
+            "params": 450,
+            "text_score": 15.0,
+            "visual_score": 16.963,
+            "total_score": 31.963,
+            "clip_score": 9.5,
+            "motion_smooth_score": 7.463,
+            "execution_success": 9.0,
+            "file_generation": 3.0,
+            "file_quality": 3.0,
+            "submission_date": "2025-01-20",
+            "license": "Proprietary"
+        },
+        {
+            "rank": 3,
+            "model": "DS-R1",
+            "model_type": ModelType.OpenSource,
+            "params": 32,
+            "text_score": 14.0,
+            "visual_score": 15.815,
+            "total_score": 29.815,
+            "clip_score": 8.9,
+            "motion_smooth_score": 6.915,
+            "execution_success": 8.5,
+            "file_generation": 3.0,
+            "file_quality": 2.5,
+            "submission_date": "2025-01-25",
+            "license": "Apache 2.0"
+        },
+        {
+            "rank": 4,
+            "model": "DeepSeek-R1-Distill-Qwen-32B",
+            "model_type": ModelType.OpenSource,
+            "params": 32,
+            "text_score": 12.2,
+            "visual_score": 15.82,
+            "total_score": 28.02,
+            "clip_score": 8.8,
+            "motion_smooth_score": 7.02,
+            "execution_success": 7.2,
+            "file_generation": 2.5,
+            "file_quality": 2.5,
+            "submission_date": "2025-01-28",
+            "license": "Apache 2.0"
+        },
+        {
+            "rank": 5,
+            "model": "QwQ-32B",
+            "model_type": ModelType.OpenSource,
+            "params": 32,
+            "text_score": 7.1,
+            "visual_score": 8.964,
+            "total_score": 16.064,
+            "clip_score": 4.964,
+            "motion_smooth_score": 4.0,
+            "execution_success": 4.1,
+            "file_generation": 1.5,
+            "file_quality": 1.5,
+            "submission_date": "2025-02-05",
+            "license": "Apache 2.0"
+        },
+        {
+            "rank": 6,
+            "model": "Qwen-2.5-32B",
+            "model_type": ModelType.OpenSource,
+            "params": 32,
+            "text_score": 0.7,
+            "visual_score": 1.126,
+            "total_score": 1.826,
+            "clip_score": 0.626,
+            "motion_smooth_score": 0.5,
+            "execution_success": 0.5,
+            "file_generation": 0.1,
+            "file_quality": 0.1,
+            "submission_date": "2025-02-10",
+            "license": "Apache 2.0"
+        }
+    ]
+    return pd.DataFrame(data)
+# Function to load submission from JSON file
+def load_submissions_from_json(json_path):
+    if os.path.exists(json_path):
+        with open(json_path, 'r') as f:
+            data = json.load(f)
+        return pd.DataFrame(data)
+    return None
+# Initialize the leaderboard
 def init_leaderboard(dataframe):
     if dataframe is None or dataframe.empty:
         raise ValueError("Leaderboard DataFrame is empty or None.")
     return Leaderboard(
         value=dataframe,
+        datatype=[c.type for c in COLUMNS],
         select_columns=SelectColumns(
+            default_selection=[c.name for c in COLUMNS if c.displayed_by_default],
+            cant_deselect=[c.name for c in COLUMNS if c.never_hidden],
             label="Select Columns to Display:",
         ),
+        search_columns=["model", "license"],
+        hide_columns=[c.name for c in COLUMNS if c.hidden],
         filter_columns=[
+            ColumnFilter("model_type", type="checkboxgroup", label="Model types"),
             ColumnFilter(
+                "params",
                 type="slider",
                 min=0.01,
+                max=1500,
                 label="Select the number of parameters (B)",
             ),
         ],
         interactive=False,
     )
+# Submission form handling
+def process_submission(model_name, model_type, params, license_type, submission_link):
+    # This would be implemented to handle actual submission processing
+    return f"Thank you for submitting {model_name}! Your submission will be reviewed and added to the leaderboard once verified."
+# Main application
+def create_demo():
+    # Load the leaderboard data
+    leaderboard_df = get_leaderboard_df()
+    # Create the Gradio interface
+    demo = gr.Blocks(css=custom_css)
+    with demo:
+        gr.HTML(TITLE)
+        gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
+        with gr.Tabs() as tabs:
+            with gr.TabItem("🏅 Leaderboard", id=0):
+                leaderboard = init_leaderboard(leaderboard_df)
+            with gr.TabItem("📊 Visualizations", id=1):
+                gr.Markdown("## Performance Comparisons")
+                with gr.Row():
+                    with gr.Column():
+                        gr.Markdown("### Text vs. Visual Scores")
+                        # Add a visualization component here (e.g., scatter plot)
+                    with gr.Column():
+                        gr.Markdown("### Score Breakdown by Task Type")
+                        # Add a visualization component here (e.g., bar chart)
                 with gr.Row():
+                    model_selector = gr.Dropdown(
+                        choices=leaderboard_df["model"].tolist(),
+                        label="Select Model for Detailed Analysis",
                         multiselect=False,
                     )
+            with gr.TabItem("📝 About", id=2):
+                gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text")
+            with gr.TabItem("🚀 Submit", id=3):
+                gr.Markdown(SUBMISSION_TEXT, elem_classes="markdown-text")
+                with gr.Row():
+                    with gr.Column():
+                        model_name_input = gr.Textbox(label="Model Name")
+                        model_type_input = gr.Dropdown(
+                            choices=["Proprietary", "Open Source"],
+                            label="Model Type",
+                            multiselect=False,
+                        )
+                        params_input = gr.Number(label="Parameters (billions)")
+                    with gr.Column():
+                        license_input = gr.Textbox(label="License")
+                        submission_link_input = gr.Textbox(label="GitHub Pull Request URL")
+                submit_button = gr.Button("Submit")
+                submission_result = gr.Markdown()
+                submit_button.click(
+                    process_submission,
+                    [model_name_input, model_type_input, params_input, license_input, submission_link_input],
+                    submission_result,
+                )
+        with gr.Row():
+            with gr.Accordion("📙 Citation", open=False):
+                citation_button = gr.Textbox(
+                    value=CITATION_TEXT,
+                    label="Citation",
+                    lines=8,
+                    elem_id="citation-button",
+                    show_copy_button=True,
+                )
+    return demo
+# Launch the application
+if __name__ == "__main__":
+    demo = create_demo()
+    demo.launch()

old_app.py ADDED Viewed

	@@ -0,0 +1,204 @@

+import gradio as gr
+from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
+import pandas as pd
+from apscheduler.schedulers.background import BackgroundScheduler
+from huggingface_hub import snapshot_download
+from src.about import (
+    CITATION_BUTTON_LABEL,
+    CITATION_BUTTON_TEXT,
+    EVALUATION_QUEUE_TEXT,
+    INTRODUCTION_TEXT,
+    LLM_BENCHMARKS_TEXT,
+    TITLE,
+)
+from src.display.css_html_js import custom_css
+from src.display.utils import (
+    BENCHMARK_COLS,
+    COLS,
+    EVAL_COLS,
+    EVAL_TYPES,
+    AutoEvalColumn,
+    ModelType,
+    fields,
+    WeightType,
+    Precision
+)
+from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
+from src.populate import get_evaluation_queue_df, get_leaderboard_df
+from src.submission.submit import add_new_eval
+def restart_space():
+    API.restart_space(repo_id=REPO_ID)
+### Space initialisation
+try:
+    print(EVAL_REQUESTS_PATH)
+    snapshot_download(
+        repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
+    )
+except Exception:
+    restart_space()
+try:
+    print(EVAL_RESULTS_PATH)
+    snapshot_download(
+        repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
+    )
+except Exception:
+    restart_space()
+LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
+(
+    finished_eval_queue_df,
+    running_eval_queue_df,
+    pending_eval_queue_df,
+) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
+def init_leaderboard(dataframe):
+    if dataframe is None or dataframe.empty:
+        raise ValueError("Leaderboard DataFrame is empty or None.")
+    return Leaderboard(
+        value=dataframe,
+        datatype=[c.type for c in fields(AutoEvalColumn)],
+        select_columns=SelectColumns(
+            default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
+            cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
+            label="Select Columns to Display:",
+        ),
+        search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
+        hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
+        filter_columns=[
+            ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
+            ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
+            ColumnFilter(
+                AutoEvalColumn.params.name,
+                type="slider",
+                min=0.01,
+                max=150,
+                label="Select the number of parameters (B)",
+            ),
+            ColumnFilter(
+                AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
+            ),
+        ],
+        bool_checkboxgroup_label="Hide models",
+        interactive=False,
+    )
+demo = gr.Blocks(css=custom_css)
+with demo:
+    gr.HTML(TITLE)
+    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
+    with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
+            leaderboard = init_leaderboard(LEADERBOARD_DF)
+        with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
+            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
+        with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
+            with gr.Column():
+                with gr.Row():
+                    gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
+                with gr.Column():
+                    with gr.Accordion(
+                        f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
+                        open=False,
+                    ):
+                        with gr.Row():
+                            finished_eval_table = gr.components.Dataframe(
+                                value=finished_eval_queue_df,
+                                headers=EVAL_COLS,
+                                datatype=EVAL_TYPES,
+                                row_count=5,
+                            )
+                    with gr.Accordion(
+                        f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
+                        open=False,
+                    ):
+                        with gr.Row():
+                            running_eval_table = gr.components.Dataframe(
+                                value=running_eval_queue_df,
+                                headers=EVAL_COLS,
+                                datatype=EVAL_TYPES,
+                                row_count=5,
+                            )
+                    with gr.Accordion(
+                        f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
+                        open=False,
+                    ):
+                        with gr.Row():
+                            pending_eval_table = gr.components.Dataframe(
+                                value=pending_eval_queue_df,
+                                headers=EVAL_COLS,
+                                datatype=EVAL_TYPES,
+                                row_count=5,
+                            )
+            with gr.Row():
+                gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
+            with gr.Row():
+                with gr.Column():
+                    model_name_textbox = gr.Textbox(label="Model name")
+                    revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
+                    model_type = gr.Dropdown(
+                        choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
+                        label="Model type",
+                        multiselect=False,
+                        value=None,
+                        interactive=True,
+                    )
+                with gr.Column():
+                    precision = gr.Dropdown(
+                        choices=[i.value.name for i in Precision if i != Precision.Unknown],
+                        label="Precision",
+                        multiselect=False,
+                        value="float16",
+                        interactive=True,
+                    )
+                    weight_type = gr.Dropdown(
+                        choices=[i.value.name for i in WeightType],
+                        label="Weights type",
+                        multiselect=False,
+                        value="Original",
+                        interactive=True,
+                    )
+                    base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
+            submit_button = gr.Button("Submit Eval")
+            submission_result = gr.Markdown()
+            submit_button.click(
+                add_new_eval,
+                [
+                    model_name_textbox,
+                    base_model_name_textbox,
+                    revision_name_textbox,
+                    precision,
+                    weight_type,
+                    model_type,
+                ],
+                submission_result,
+            )
+    with gr.Row():
+        with gr.Accordion("📙 Citation", open=False):
+            citation_button = gr.Textbox(
+                value=CITATION_BUTTON_TEXT,
+                label=CITATION_BUTTON_LABEL,
+                lines=20,
+                elem_id="citation-button",
+                show_copy_button=True,
+            )
+scheduler = BackgroundScheduler()
+scheduler.add_job(restart_space, "interval", seconds=1800)
+scheduler.start()
+demo.queue(default_concurrency_limit=40).launch()