Spaces:

kostis-init
/

CP-Bench-Leaderboard

Running

File size: 3,776 Bytes

import json

import gradio as gr
from pathlib import Path

from src.hf_utils import load_leaderboard_data, upload_submission, check_name_exists
from src.eval import start_background_evaluation


def handle_upload(submission_name, uploaded_file, progress=gr.Progress()):
    """Handle file upload and start evaluation."""
    if not uploaded_file:
        return "No file uploaded. Please upload a valid submission file."

    # normalize the submission name
    submission_name = submission_name.strip().replace(" ", "_").lower()
    # keep only alphanumeric characters and underscores, restrict to 30 characters
    submission_name = "".join(
        c for c in submission_name if c.isalnum() or c == "_"
    )[:30]

    if not submission_name or submission_name.strip() == "":
        return "Submission name is required"

    if check_name_exists(submission_name):
        return f"Submission name '{submission_name}' already exists. Please choose a different name."

    try:
        progress(0.3, "Uploading to Hugging Face...")

        # Check if the file is a valid JSONL file
        if not uploaded_file.name.endswith(".jsonl"):
            return "Invalid file format. Please upload a .jsonl file."

        # Check that the keys in the JSONL file are correct ('id' and 'model')
        with open(uploaded_file.name, "r") as file:
            found_one = False
            for line in file:
                found_one = True
                json_object = json.loads(line)
                if not all(key in json_object for key in ["id", "model"]):
                    return "Invalid content. Each line must contain 'id' and 'model' keys."
            if not found_one:
                return "Empty file. Please upload a valid JSONL file."

        success, result = upload_submission(uploaded_file, submission_name)
        if not success:
            return f"Upload failed: {result}"

        progress(0.7, "Starting evaluation...")

        # Start evaluation
        start_background_evaluation(result)

        progress(1.0, "Process complete")
        return f"Upload complete. Evaluation started for: {submission_name}. Refresh the leaderboard to see results. Do not worry if the leaderboard does not update immediately; it may take some time for the results to appear."

    except Exception as e:
        return f"Error processing upload: {str(e)}"


def create_ui():
    """Create and return Gradio UI."""
    with gr.Blocks(title="CP-Bench Leaderboard") as demo:
        gr.Markdown("# CP-Bench Leaderboard")

        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("## 📤 Upload Submission")

                submission_name = gr.Textbox(
                    label="Submission Name (required)",
                    placeholder="Enter a unique name for your submission",
                    interactive=True,
                    info="This name will appear on the leaderboard"
                )
                upload_button = gr.UploadButton("Click to Upload Submission", file_count="single")
                status_box = gr.Textbox(label="Status", interactive=False)

            with gr.Column(scale=3):
                gr.Markdown("## 🏆 Results Leaderboard")
                leaderboard = gr.DataFrame(value=load_leaderboard_data, label="Leaderboard", interactive=False)
                refresh_button = gr.Button("🔄 Refresh Leaderboard")

        # Event handlers
        upload_button.upload(
            fn=handle_upload,
            inputs=[submission_name, upload_button],
            outputs=[status_box],
            show_progress="full",
        )

        refresh_button.click(
            fn=load_leaderboard_data,
            inputs=None,
            outputs=[leaderboard]
        )

    return demo