kostis-init commited on
Commit
823637b
Β·
1 Parent(s): 3852c4d
Files changed (2) hide show
  1. app.py +1 -1
  2. src/ui.py +158 -159
app.py CHANGED
@@ -5,5 +5,5 @@ print(f"[{time.time()}] app.py: Script execution has STARTED.")
5
  import os
6
  import subprocess
7
  import src.ui
8
- print(f"[{time.time()}] app.py: Imported all.")
9
  # DO NOT add any other imports or code yet.
 
5
  import os
6
  import subprocess
7
  import src.ui
8
+ print(f"[{time.time()}] app.py: Imported all.", flush=True)
9
  # DO NOT add any other imports or code yet.
src/ui.py CHANGED
@@ -2,166 +2,165 @@ import json
2
 
3
  import gradio as gr
4
  from pathlib import Path
5
- print("importedddd1 1", flush=True)
6
  from src.config import SUPPORTED_FRAMEWORKS
7
 
8
  from src.hf_utils import load_leaderboard_data, upload_submission, check_name_exists
9
  from src.eval import start_background_evaluation
10
- print("importedddd")
11
- #
12
- # def handle_upload(submission_name, uploaded_file, report_file, model_framework, progress=gr.Progress()):
13
- # """Handle file upload and start evaluation."""
14
- # if model_framework not in SUPPORTED_FRAMEWORKS:
15
- # return f"Unsupported modelling framework: {model_framework}. Supported frameworks are: {', '.join(SUPPORTED_FRAMEWORKS)}"
16
- #
17
- # if not uploaded_file:
18
- # return "No file uploaded. Please upload a valid submission file."
19
- #
20
- # if report_file and not report_file.name.endswith(".pdf"):
21
- # return "Invalid report format. Please upload a PDF file."
22
- #
23
- # # normalize the submission name
24
- # submission_name = submission_name.strip().replace(" ", "_").lower()
25
- # # keep only alphanumeric characters and underscores, restrict to 30 characters
26
- # submission_name = "".join(
27
- # c for c in submission_name if c.isalnum() or c == "_"
28
- # )[:30]
29
- #
30
- # if not submission_name or submission_name.strip() == "":
31
- # return "Submission name is required"
32
- #
33
- # if check_name_exists(submission_name):
34
- # return f"Submission name '{submission_name}' already exists. Please choose a different name."
35
- #
36
- # try:
37
- # progress(0.3, "Uploading to Hugging Face...")
38
- #
39
- # # Check if the file is a valid JSONL file
40
- # if not uploaded_file.name.endswith(".jsonl"):
41
- # return "Invalid file format. Please upload a .jsonl file."
42
- #
43
- # # Check that the keys in the JSONL file are correct ('id' and 'model')
44
- # with open(uploaded_file.name, "r") as file:
45
- # found_one = False
46
- # for line in file:
47
- # found_one = True
48
- # json_object = json.loads(line)
49
- # if not all(key in json_object for key in ["id", "model"]):
50
- # return "Invalid content. Each line must contain 'id' and 'model' keys."
51
- # if not found_one:
52
- # return "Empty file. Please upload a valid JSONL file."
53
- #
54
- # success, result = upload_submission(uploaded_file, submission_name, report_file, model_framework)
55
- # if not success:
56
- # return f"Upload failed: {result}"
57
- #
58
- # progress(0.7, "Starting evaluation...")
59
- #
60
- # # Start evaluation
61
- # start_background_evaluation(result)
62
- #
63
- # progress(1.0, "Process complete")
64
- # return (
65
- # f"βœ… Submission '{submission_name}' uploaded successfully!\n"
66
- # f"Do not worry if the leaderboard does not update immediately; "
67
- # f"it may take some time for the results to appear (around 5-10 minutes). "
68
- # f"Feel free to close the tab and check back later.")
69
- #
70
- # except Exception as e:
71
- # return f"Error processing upload: {str(e)}"
72
- #
73
- #
74
- # def create_ui():
75
- # """Create and return Gradio UI."""
76
- # with gr.Blocks(title="Welcome to the CP-Bench leaderboard!") as demo:
77
- # gr.Markdown("# CP-Bench Leaderboard")
78
- # gr.Markdown(
79
- # "This leaderboard is designed to evaluate LLM-generated constraint models for the problems "
80
- # "in the [CP-Bench](https://huggingface.co/datasets/kostis-init/CP-Bench) dataset."
81
- # "\n\n"
82
- # "## How to Submit\n"
83
- # "1. **Name your submission**: Choose a unique name for your submission (e.g., `my_cool_submission`). "
84
- # "This name will be used to identify your submission on the leaderboard.\n"
85
- # "2. **Upload a PDF report**: This is optional, but we highly encourage you to upload a report "
86
- # " (in PDF format) describing your approach. As this is an open competition, we want to avoid submissions "
87
- # " that just copy the models from the dataset. The report can be a short description of your approach, "
88
- # " the models you generated, and any other relevant information.\n"
89
- # "3. **Upload your submission**: Upload a **single** `.jsonl` file containing the generated models. "
90
- # " **Each line in the file should be a JSON object with two keys: `id` and `model`.**\n"
91
- # " * `id`: The ID of the problem exactly as it appears in the original dataset (e.g., `csplib__csplib_001_car_sequencing`).\n"
92
- # " * `model`: The generated model for the problem (as a string representing runnable code). Make sure that it eventually outputs the solution as a json with key(s) as described in the `decision_variables` entry and values as would be expected in the problem. This is part of the evaluation as well: unexpected keys, or value types are considered incorrect. This is because our automatic evaluation is based on the solution printed by the submitted models.\n"
93
- # " * An example submission file can be found [here](https://huggingface.co/spaces/kostis-init/CP-Bench-competition/blob/main/sample_submission.jsonl).\n"
94
- # "3. **Check the leaderboard**: After uploading, you can check the leaderboard to see your results. "
95
- # "It may take a few minutes for a submission to be evaluated and appear on the leaderboard.\n"
96
- # "\n\n"
97
- # "## Important Notes\n"
98
- # "1. **Submission Name**: The submission name must be different from any existing submission names.\n"
99
- # "2. **File Format**: Ensure that the uploaded files are in the correct format. The submission file must be a `.jsonl` file, and the report must be a `pdf` file.\n"
100
- # "3. **Evaluation Script**: It is highly recommended to use the evaluation script provided [here](https://huggingface.co/spaces/kostis-init/CP-Bench-competition/blob/main/user_eval.py) to check your results before submission. You can run the script as follows:\n"
101
- # " ```bash\n"
102
- # " python user_eval.py --submission_file path/to/my/submission.jsonl\n"
103
- # " ```\n"
104
- # " This will evaluate your submission locally and print the results to the console.\n"
105
- # "4. **Modelling Frameworks**: Currently, the supported modelling frameworks are MiniZinc, CPMpy and OR-Tools. More frameworks will be added.\n"
106
- # "\n\n"
107
- # "### If you have any questions or issues, please feel free to reach out to us TODO\n"
108
- # "---\n"
109
- # )
110
- #
111
- # with gr.Row():
112
- # with gr.Column(scale=1):
113
- # gr.Markdown("## πŸ“€ Upload Submission")
114
- #
115
- # submission_name = gr.Textbox(
116
- # label="Submission Name (required)",
117
- # placeholder="Enter a unique name for your submission",
118
- # interactive=True,
119
- # info="This name will appear on the leaderboard"
120
- # )
121
- # model_framework = gr.Dropdown(
122
- # label="Modelling Framework (required)",
123
- # choices=SUPPORTED_FRAMEWORKS,
124
- # value=None,
125
- # multiselect=False,
126
- # interactive=True,
127
- # info="Select the modelling framework used for your submission.",
128
- # allow_custom_value=False,
129
- # filterable=False,
130
- # )
131
- #
132
- # with gr.Row():
133
- # report_file = gr.File(
134
- # label="Upload PDF Report (optional, but recommended)",
135
- # file_types=[".pdf"],
136
- # file_count="single",
137
- # interactive=True,
138
- # )
139
- # submission_file = gr.File(
140
- # label="Upload Submission File (required, .jsonl)",
141
- # file_types=[".jsonl"],
142
- # file_count="single",
143
- # interactive=True,
144
- # )
145
- # upload_button = gr.Button("Click to Upload Submission")
146
- # status_box = gr.Textbox(label="Status", interactive=False)
147
- #
148
- # with gr.Column(scale=2):
149
- # gr.Markdown("## πŸ† Results Leaderboard")
150
- # leaderboard = gr.DataFrame(value=load_leaderboard_data, interactive=False)
151
- # refresh_button = gr.Button("πŸ”„ Refresh Leaderboard")
152
- #
153
- # # Event handlers
154
- # upload_button.click(
155
- # fn=handle_upload,
156
- # inputs=[submission_name, submission_file, report_file, model_framework],
157
- # outputs=[status_box],
158
- # show_progress="full",
159
- # )
160
- #
161
- # refresh_button.click(
162
- # fn=load_leaderboard_data,
163
- # inputs=None,
164
- # outputs=[leaderboard]
165
- # )
166
- #
167
- # return demo
 
2
 
3
  import gradio as gr
4
  from pathlib import Path
 
5
  from src.config import SUPPORTED_FRAMEWORKS
6
 
7
  from src.hf_utils import load_leaderboard_data, upload_submission, check_name_exists
8
  from src.eval import start_background_evaluation
9
+
10
+
11
+ def handle_upload(submission_name, uploaded_file, report_file, model_framework, progress=gr.Progress()):
12
+ """Handle file upload and start evaluation."""
13
+ if model_framework not in SUPPORTED_FRAMEWORKS:
14
+ return f"Unsupported modelling framework: {model_framework}. Supported frameworks are: {', '.join(SUPPORTED_FRAMEWORKS)}"
15
+
16
+ if not uploaded_file:
17
+ return "No file uploaded. Please upload a valid submission file."
18
+
19
+ if report_file and not report_file.name.endswith(".pdf"):
20
+ return "Invalid report format. Please upload a PDF file."
21
+
22
+ # normalize the submission name
23
+ submission_name = submission_name.strip().replace(" ", "_").lower()
24
+ # keep only alphanumeric characters and underscores, restrict to 30 characters
25
+ submission_name = "".join(
26
+ c for c in submission_name if c.isalnum() or c == "_"
27
+ )[:30]
28
+
29
+ if not submission_name or submission_name.strip() == "":
30
+ return "Submission name is required"
31
+
32
+ if check_name_exists(submission_name):
33
+ return f"Submission name '{submission_name}' already exists. Please choose a different name."
34
+
35
+ try:
36
+ progress(0.3, "Uploading to Hugging Face...")
37
+
38
+ # Check if the file is a valid JSONL file
39
+ if not uploaded_file.name.endswith(".jsonl"):
40
+ return "Invalid file format. Please upload a .jsonl file."
41
+
42
+ # Check that the keys in the JSONL file are correct ('id' and 'model')
43
+ with open(uploaded_file.name, "r") as file:
44
+ found_one = False
45
+ for line in file:
46
+ found_one = True
47
+ json_object = json.loads(line)
48
+ if not all(key in json_object for key in ["id", "model"]):
49
+ return "Invalid content. Each line must contain 'id' and 'model' keys."
50
+ if not found_one:
51
+ return "Empty file. Please upload a valid JSONL file."
52
+
53
+ success, result = upload_submission(uploaded_file, submission_name, report_file, model_framework)
54
+ if not success:
55
+ return f"Upload failed: {result}"
56
+
57
+ progress(0.7, "Starting evaluation...")
58
+
59
+ # Start evaluation
60
+ start_background_evaluation(result)
61
+
62
+ progress(1.0, "Process complete")
63
+ return (
64
+ f"βœ… Submission '{submission_name}' uploaded successfully!\n"
65
+ f"Do not worry if the leaderboard does not update immediately; "
66
+ f"it may take some time for the results to appear (around 5-10 minutes). "
67
+ f"Feel free to close the tab and check back later.")
68
+
69
+ except Exception as e:
70
+ return f"Error processing upload: {str(e)}"
71
+
72
+
73
+ def create_ui():
74
+ """Create and return Gradio UI."""
75
+ with gr.Blocks(title="Welcome to the CP-Bench leaderboard!") as demo:
76
+ gr.Markdown("# CP-Bench Leaderboard")
77
+ gr.Markdown(
78
+ "This leaderboard is designed to evaluate LLM-generated constraint models for the problems "
79
+ "in the [CP-Bench](https://huggingface.co/datasets/kostis-init/CP-Bench) dataset."
80
+ "\n\n"
81
+ "## How to Submit\n"
82
+ "1. **Name your submission**: Choose a unique name for your submission (e.g., `my_cool_submission`). "
83
+ "This name will be used to identify your submission on the leaderboard.\n"
84
+ "2. **Upload a PDF report**: This is optional, but we highly encourage you to upload a report "
85
+ " (in PDF format) describing your approach. As this is an open competition, we want to avoid submissions "
86
+ " that just copy the models from the dataset. The report can be a short description of your approach, "
87
+ " the models you generated, and any other relevant information.\n"
88
+ "3. **Upload your submission**: Upload a **single** `.jsonl` file containing the generated models. "
89
+ " **Each line in the file should be a JSON object with two keys: `id` and `model`.**\n"
90
+ " * `id`: The ID of the problem exactly as it appears in the original dataset (e.g., `csplib__csplib_001_car_sequencing`).\n"
91
+ " * `model`: The generated model for the problem (as a string representing runnable code). Make sure that it eventually outputs the solution as a json with key(s) as described in the `decision_variables` entry and values as would be expected in the problem. This is part of the evaluation as well: unexpected keys, or value types are considered incorrect. This is because our automatic evaluation is based on the solution printed by the submitted models.\n"
92
+ " * An example submission file can be found [here](https://huggingface.co/spaces/kostis-init/CP-Bench-competition/blob/main/sample_submission.jsonl).\n"
93
+ "3. **Check the leaderboard**: After uploading, you can check the leaderboard to see your results. "
94
+ "It may take a few minutes for a submission to be evaluated and appear on the leaderboard.\n"
95
+ "\n\n"
96
+ "## Important Notes\n"
97
+ "1. **Submission Name**: The submission name must be different from any existing submission names.\n"
98
+ "2. **File Format**: Ensure that the uploaded files are in the correct format. The submission file must be a `.jsonl` file, and the report must be a `pdf` file.\n"
99
+ "3. **Evaluation Script**: It is highly recommended to use the evaluation script provided [here](https://huggingface.co/spaces/kostis-init/CP-Bench-competition/blob/main/user_eval.py) to check your results before submission. You can run the script as follows:\n"
100
+ " ```bash\n"
101
+ " python user_eval.py --submission_file path/to/my/submission.jsonl\n"
102
+ " ```\n"
103
+ " This will evaluate your submission locally and print the results to the console.\n"
104
+ "4. **Modelling Frameworks**: Currently, the supported modelling frameworks are MiniZinc, CPMpy and OR-Tools. More frameworks will be added.\n"
105
+ "\n\n"
106
+ "### If you have any questions or issues, please feel free to reach out to us TODO\n"
107
+ "---\n"
108
+ )
109
+
110
+ with gr.Row():
111
+ with gr.Column(scale=1):
112
+ gr.Markdown("## πŸ“€ Upload Submission")
113
+
114
+ submission_name = gr.Textbox(
115
+ label="Submission Name (required)",
116
+ placeholder="Enter a unique name for your submission",
117
+ interactive=True,
118
+ info="This name will appear on the leaderboard"
119
+ )
120
+ model_framework = gr.Dropdown(
121
+ label="Modelling Framework (required)",
122
+ choices=SUPPORTED_FRAMEWORKS,
123
+ value=None,
124
+ multiselect=False,
125
+ interactive=True,
126
+ info="Select the modelling framework used for your submission.",
127
+ allow_custom_value=False,
128
+ filterable=False,
129
+ )
130
+
131
+ with gr.Row():
132
+ report_file = gr.File(
133
+ label="Upload PDF Report (optional, but recommended)",
134
+ file_types=[".pdf"],
135
+ file_count="single",
136
+ interactive=True,
137
+ )
138
+ submission_file = gr.File(
139
+ label="Upload Submission File (required, .jsonl)",
140
+ file_types=[".jsonl"],
141
+ file_count="single",
142
+ interactive=True,
143
+ )
144
+ upload_button = gr.Button("Click to Upload Submission")
145
+ status_box = gr.Textbox(label="Status", interactive=False)
146
+
147
+ with gr.Column(scale=2):
148
+ gr.Markdown("## πŸ† Results Leaderboard")
149
+ leaderboard = gr.DataFrame(value=load_leaderboard_data, interactive=False)
150
+ refresh_button = gr.Button("πŸ”„ Refresh Leaderboard")
151
+
152
+ # Event handlers
153
+ upload_button.click(
154
+ fn=handle_upload,
155
+ inputs=[submission_name, submission_file, report_file, model_framework],
156
+ outputs=[status_box],
157
+ show_progress="full",
158
+ )
159
+
160
+ refresh_button.click(
161
+ fn=load_leaderboard_data,
162
+ inputs=None,
163
+ outputs=[leaderboard]
164
+ )
165
+
166
+ return demo