|
import json |
|
|
|
import gradio as gr |
|
from pathlib import Path |
|
from src.config import SUPPORTED_FRAMEWORKS |
|
|
|
from src.hf_utils import load_leaderboard_data, upload_submission, check_name_exists |
|
from src.eval import start_background_evaluation |
|
|
|
|
|
def handle_upload(submission_name, uploaded_file, report_file, model_framework, base_llm, progress=gr.Progress()): |
|
"""Handle file upload and start evaluation.""" |
|
if model_framework not in SUPPORTED_FRAMEWORKS: |
|
return f"Unsupported modelling framework: {model_framework}. Supported frameworks are: {', '.join(SUPPORTED_FRAMEWORKS)}" |
|
|
|
if not uploaded_file: |
|
return "No file uploaded. Please upload a valid submission file." |
|
|
|
if report_file and not report_file.name.endswith(".pdf"): |
|
return "Invalid report format. Please upload a PDF file." |
|
|
|
|
|
submission_name = submission_name.strip().replace(" ", "_").lower() |
|
|
|
submission_name = "".join(c for c in submission_name if c.isalnum() or c == "_")[:30] |
|
|
|
if not submission_name or submission_name.strip() == "": |
|
return "Submission name is required" |
|
|
|
if not base_llm or base_llm.strip() == "": |
|
return "Base LLM is required. Please specify the base language model used for generating the models." |
|
|
|
if check_name_exists(submission_name): |
|
return f"Submission name '{submission_name}' already exists. Please choose a different name." |
|
|
|
try: |
|
progress(0.3, "Uploading to Hugging Face...") |
|
|
|
|
|
if not uploaded_file.name.endswith(".jsonl"): |
|
return "Invalid file format. Please upload a .jsonl file." |
|
|
|
|
|
with open(uploaded_file.name, "r") as file: |
|
found_one = False |
|
for line in file: |
|
found_one = True |
|
json_object = json.loads(line) |
|
if not all(key in json_object for key in ["id", "model"]): |
|
return "Invalid content. Each line must contain 'id' and 'model' keys." |
|
if not found_one: |
|
return "Empty file. Please upload a valid JSONL file." |
|
|
|
success, result = upload_submission(uploaded_file, submission_name, report_file, model_framework, base_llm) |
|
if not success: |
|
return f"Upload failed: {result}" |
|
|
|
progress(0.7, "Starting evaluation...") |
|
|
|
|
|
start_background_evaluation(result) |
|
|
|
progress(1.0, "Process complete") |
|
return ( |
|
f"β
Submission '{submission_name}' uploaded successfully!\n" |
|
f"Do not worry if the leaderboard does not update immediately; " |
|
f"it may take some time for the results to appear (around 5-10 minutes). " |
|
f"Feel free to close the tab and check back later.") |
|
|
|
except Exception as e: |
|
return f"Error processing upload: {str(e)}" |
|
|
|
|
|
def create_ui(): |
|
"""Create and return Gradio UI.""" |
|
with gr.Blocks(title="Welcome to the CP-Bench leaderboard!") as demo: |
|
gr.Markdown("# CP-Bench Leaderboard") |
|
gr.Markdown( |
|
"This leaderboard is designed to evaluate LLM-generated constraint models for the problems " |
|
"in the [CP-Bench](https://huggingface.co/datasets/kostis-init/CP-Bench) dataset." |
|
"\n\n" |
|
"## How to Submit\n" |
|
"1. **Name your submission**: Choose a unique name for your submission (e.g., `my_cool_submission`). " |
|
"This name will be used to identify your submission on the leaderboard.\n" |
|
"2. **Select the modelling framework**: Indicate which modelling framework your submission uses (e.g., MiniZinc, CPMpy, OR-Tools).\n" |
|
"3. **Upload a PDF report**: This is optional, but we highly encourage you to upload a report " |
|
" (in PDF format) describing your approach. As this is an open competition, we want to avoid submissions " |
|
" that just copy the models from the dataset. The report can be a short description of your approach, " |
|
" the models you generated, and any other relevant information.\n" |
|
"4. **Upload your submission**: Upload a **single** `.jsonl` file containing the generated models. " |
|
" **Each line in the file should be a JSON object with two keys: `id` and `model`.**\n" |
|
" * `id`: The ID of the problem exactly as it appears in the original dataset (e.g., `csplib__csplib_001_car_sequencing`).\n" |
|
" * `model`: The generated model for the problem (as a string representing runnable code). Make sure that it eventually outputs the solution as a json with key(s) as described in the `decision_variables` entry and values as would be expected in the problem. This is part of the evaluation as well: unexpected keys, or value types are considered incorrect. This is because our automatic evaluation is based on the solution printed by the submitted models.\n" |
|
" * An example submission file can be found [here](https://huggingface.co/spaces/kostis-init/CP-Bench-competition/blob/main/template_submission.jsonl).\n" |
|
"\n To help you get started, we also provide a **template script [here](https://huggingface.co/spaces/kostis-init/CP-Bench-competition/blob/main/template.py)**. This script acts as a backbone, showing how to produce a simple, runnable submission for one of the problems. You can use it as a starting point for developing your own logic.\n" |
|
"5. **Check the leaderboard**: After uploading, it may take a few minutes for a submission to be evaluated and appear on the leaderboard.\n" |
|
"\n\n" |
|
"## Important Notes\n" |
|
"1. **Submission Name**: The submission name must be different from any existing submission names.\n" |
|
"2. **File Format**: Ensure that the uploaded files are in the correct format. The submission file must be a `.jsonl` file, and the report must be a `pdf` file.\n" |
|
"3. **Evaluation Script**: It is highly recommended to use the evaluation script provided [here](https://huggingface.co/spaces/kostis-init/CP-Bench-competition/blob/main/src/user_eval.py) to check your results before submission. You can run the script as follows:\n" |
|
" ```bash\n" |
|
" python user_eval.py --submission_file path/to/my/submission.jsonl --modelling_framework CPMpy\n" |
|
" ```\n" |
|
" This will evaluate your submission locally and print the results to the console.\n" |
|
"4. **Modelling Frameworks**: Currently, the supported modelling frameworks are MiniZinc, CPMpy and OR-Tools. More frameworks can be added (feel free to submit pull requests).\n" |
|
"\n\n" |
|
"### If you have any questions or issues, feel free to reach out to us.\n" |
|
"---\n" |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
gr.Markdown("## π€ Upload Submission") |
|
|
|
submission_name = gr.Textbox( |
|
label="Submission Name (required)", |
|
placeholder="Enter a unique name for your submission", |
|
interactive=True, |
|
info="This name will appear on the leaderboard. It is recommended that it represents the approach you used to generate the models (e.g. 'smart_prompting')", |
|
) |
|
model_framework = gr.Dropdown( |
|
label="Modelling Framework (required)", |
|
choices=SUPPORTED_FRAMEWORKS, |
|
value=None, |
|
multiselect=False, |
|
interactive=True, |
|
info="Select the modelling framework used for your submission.", |
|
allow_custom_value=False, |
|
filterable=False, |
|
) |
|
base_llm = gr.Textbox( |
|
label="Base LLM (required)", |
|
placeholder="Enter the base LLM used for generating the models (e.g., GPT-4, Llama-3.3)", |
|
interactive=True, |
|
info="The base LLM used for generating the models." |
|
) |
|
|
|
with gr.Row(): |
|
report_file = gr.File( |
|
label="Upload PDF Report (optional, but recommended)", |
|
file_types=[".pdf"], |
|
file_count="single", |
|
interactive=True, |
|
) |
|
submission_file = gr.File( |
|
label="Upload Submission File (required, .jsonl)", |
|
file_types=[".jsonl"], |
|
file_count="single", |
|
interactive=True, |
|
) |
|
upload_button = gr.Button("Click to Upload Submission") |
|
status_box = gr.Textbox(label="Status", interactive=False) |
|
|
|
with gr.Column(scale=2): |
|
gr.Markdown("## π Results Leaderboard") |
|
leaderboard = gr.DataFrame(value=load_leaderboard_data, interactive=False) |
|
refresh_button = gr.Button("π Refresh Leaderboard") |
|
|
|
|
|
upload_button.click( |
|
fn=handle_upload, |
|
inputs=[submission_name, submission_file, report_file, model_framework, base_llm], |
|
outputs=[status_box], |
|
show_progress="full", |
|
) |
|
|
|
refresh_button.click( |
|
fn=load_leaderboard_data, |
|
inputs=None, |
|
outputs=[leaderboard] |
|
) |
|
|
|
gr.Markdown( |
|
"### If you found our work useful, please consider citing our paper and dataset as follows:\n" |
|
"```bibtex\n" |
|
"@dataset{michailidis_2025_15592407,\n" |
|
"author = {Michailidis, Kostis and Tsouros, Dimosthenis and Guns, Tias},\n" |
|
"title = {CP-Bench},\n" |
|
"month = jun,\n" |
|
"year = 2025,\n" |
|
"publisher = {Zenodo},\n" |
|
"version = {1.0.0},\n" |
|
"doi = {10.5281/zenodo.15592407},\n" |
|
"url = {https://doi.org/10.5281/zenodo.15592407},\n" |
|
"}" |
|
) |
|
|
|
return demo |
|
|