kostis-init commited on
Commit
21ed616
Β·
1 Parent(s): 180f9fe

change submission from directory to a single jsonl file

Browse files
Files changed (4) hide show
  1. src/config.py +1 -1
  2. src/eval.py +88 -86
  3. src/hf_utils.py +10 -11
  4. src/ui.py +22 -6
src/config.py CHANGED
@@ -8,4 +8,4 @@ DS_SUBMISSIONS_PATH = "submissions"
8
  DS_RESULTS_PATH = "results"
9
 
10
  # leaderboard
11
- LDB_COLS = ["Submission Name", "Execution (%)", "Consistency (%)", "Final Solution Accuracy (%)", "# of Models submitted"]
 
8
  DS_RESULTS_PATH = "results"
9
 
10
  # leaderboard
11
+ LDB_COLS = ["Submission Name", "Solution Found (%)", "Consistency (%)", "Final Solution Accuracy (%)", "# of Models submitted"]
src/eval.py CHANGED
@@ -81,84 +81,64 @@ def start_background_evaluation(submission_path):
81
  thread.start()
82
  return True
83
 
 
 
 
 
 
 
 
 
 
 
84
 
85
- def extract_json_from_string(text_output: str):
86
  """
87
- Attempts to find and parse the first valid JSON object or array from a string.
88
- Handles cases where JSON is preceded or followed by non-JSON text.
 
 
 
 
89
  """
90
- idx = 0
91
- while idx < len(text_output):
92
- # Find the next potential start of a JSON structure
93
- start_brace = text_output.find('{', idx)
94
- start_bracket = text_output.find('[', idx)
95
-
96
- if start_brace == -1 and start_bracket == -1:
97
- # No more '{' or '[' found in the rest of the string
98
- return None
99
-
100
- # Determine the actual starting character for this attempt
101
- if start_brace != -1 and (start_bracket == -1 or start_brace < start_bracket):
102
- json_start_index = start_brace
103
- else:
104
- json_start_index = start_bracket
105
 
106
- potential_json_segment = text_output[json_start_index:]
 
 
 
 
 
 
 
 
 
107
 
108
- try:
109
- # Use raw_decode to parse the first valid JSON object from the segment
110
- decoder = json.JSONDecoder()
111
- json_obj, end_index_in_segment = decoder.raw_decode(potential_json_segment)
112
- # Successfully parsed a JSON object
113
- return json_obj
114
- except json.JSONDecodeError:
115
- # This segment (starting at json_start_index) wasn't a valid JSON.
116
- # Advance the search index past the character that caused the current attempt.
117
- idx = json_start_index + 1
118
-
119
- return None # No valid JSON found in the entire string
120
-
121
-
122
- def run_instance(instance_path_str: str,
123
- timeout: int = SCRIPT_EXECUTION_TIMEOUT): # SCRIPT_EXECUTION_TIMEOUT should be defined
124
- """Run the instance file and robustly capture the JSON output."""
125
- command = [sys.executable, instance_path_str]
126
- instance_name = Path(instance_path_str).name
127
  try:
128
- result = subprocess.run(command, capture_output=True, text=True, timeout=timeout, encoding='utf-8',
129
- errors='replace')
130
-
131
- # Check return code first
132
- if result.returncode != 0:
133
- # Log stderr for debugging if the script itself failed
134
- error_message = result.stderr[:500].strip() if result.stderr else "<No stderr>"
135
- print(f" ERROR: Running {instance_name} (Return Code: {result.returncode}): {error_message}", flush=True)
136
- return None
137
-
138
- # Attempt to extract JSON from stdout
139
- stdout_text = result.stdout
140
- if not stdout_text or not stdout_text.strip():
141
- print(f" ERROR: No stdout from {instance_name}.", flush=True)
142
- return None
143
-
144
- solution = extract_json_from_string(stdout_text)
145
-
146
- if solution is None:
147
- # Be more verbose if JSON extraction fails
148
- abbreviated_stdout = stdout_text.replace('\n', '\\n')[:300] # Show newlines as \n for brevity
149
- print(
150
- f" ERROR: Could not extract valid JSON from {instance_name}. Raw stdout (abbreviated): '{abbreviated_stdout}...'",
151
- flush=True)
152
- return None
153
-
154
- return solution
155
 
156
- except subprocess.TimeoutExpired:
157
- print(f" ERROR: Timeout running {instance_name} (>{timeout}s)", flush=True)
158
- return None
 
159
  except Exception as e:
160
- print(f" ERROR: Unexpected error running {instance_name}: {e}", flush=True)
161
- return None
 
 
 
 
 
162
 
163
 
164
  def add_constraints_as_string(solution):
@@ -238,14 +218,14 @@ def main(
238
  print(f" Downloading submission files from '{submission_path_in_dataset}' to '{local_submission_dir}'...",
239
  flush=True)
240
  try:
241
- # Download the relevant submission files
242
- snapshot_download(
243
  repo_id=user_dataset_repo_id,
244
  repo_type="dataset",
245
  local_dir=local_submission_dir,
246
- allow_patterns=[f"{submission_path_in_dataset}/*"],
247
  )
248
- print(f" Downloaded submission files successfully.", flush=True)
249
 
250
  except Exception as e_download:
251
  print(f" CRITICAL ERROR - Failed to download submission files: {e_download}", flush=True)
@@ -269,6 +249,18 @@ def main(
269
  # (Attempt to upload error summary)
270
  return 1
271
 
 
 
 
 
 
 
 
 
 
 
 
 
272
  # Statistics
273
  total_submitted_models = 0
274
  models_ran_successfully = 0
@@ -285,24 +277,35 @@ def main(
285
  summary_f.write("-" * 30 + "\n")
286
 
287
  # Iterate through downloaded submitted models
288
- submitted_model_files = list((local_submission_dir / submission_path_in_dataset).rglob('*.py'))
289
- if not submitted_model_files:
290
- summary_f.write("No .py model files found in downloaded submission.\n")
291
- print(" No .py model files found in downloaded submission.", flush=True)
292
 
293
- for model_file_path in submitted_model_files:
294
  total_submitted_models += 1
295
- problem_name = model_file_path.stem
296
- print(f"\n Processing downloaded model: {model_file_path.name}", flush=True)
297
- summary_f.write(f"\n--- Model: {model_file_path.name} ---\n")
298
 
299
  summary_f.write(" 1. Running submitted model...\n")
300
- generated_solution = run_instance(str(model_file_path))
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  if generated_solution is None:
302
- summary_f.write(" - FAILED to run or get valid JSON solution from submitted model.\n")
303
  continue
 
304
  models_ran_successfully += 1
305
- summary_f.write(f" - SUCCESS: Got solution.\n")
306
 
307
  summary_f.write(f" 2. Checking against ground-truth for '{problem_name}'...\n")
308
  if problem_name not in ground_truth_models:
@@ -328,7 +331,6 @@ def main(
328
  os.unlink(tmp_file_path_str)
329
 
330
  gt_stdout = gt_check_result.stdout
331
- # ... (parse EVAL_OUTPUT tags for consistency and objective)
332
  if "SUCCESS: Model is consistent" in gt_stdout:
333
  summary_f.write(" - CONSISTENCY: PASSED\n")
334
  consistency_checks_passed += 1
 
81
  thread.start()
82
  return True
83
 
84
+ def extract_json_from_code_output(output: str):
85
+ try:
86
+ start_index = output.find('{')
87
+ end_index = output.rfind('}') + 1
88
+ # Extract the JSON part
89
+ json_part = output[start_index:end_index]
90
+ return json.loads(json_part)
91
+ except json.JSONDecodeError:
92
+ return None
93
+
94
 
95
+ def exec_code(code: str, timeout=10, modelling_language='cpmpy'):
96
  """
97
+ Execute the given code and return the output
98
+
99
+ :param code: The code to execute as a string
100
+ :param timeout: The maximum time to wait for the code to execute in seconds
101
+ :param modelling_language: The language to use for execution (cpmpy, minizinc, or-tools)
102
+ :return: A tuple of (success, output, timeout_occured)
103
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
+ # create a temp directory to store the temporary file
106
+ temp_dir_name = "_temp_dir_for_exec_code"
107
+ temp_dir = os.path.join(os.getcwd(), temp_dir_name)
108
+ os.makedirs(temp_dir, exist_ok=True)
109
+
110
+ # write the code to a temporary file
111
+ suffix = '.__hidden_py__' if modelling_language == "cpmpy" or modelling_language == "or-tools" else '.mzn'
112
+ with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix=suffix, dir=temp_dir, encoding='utf-8') as temp_file:
113
+ temp_instance_path = temp_file.name
114
+ temp_file.write(code)
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  try:
117
+ # execute the code
118
+ if modelling_language == "cpmpy" or modelling_language == "or-tools":
119
+ command = [sys.executable, temp_instance_path]
120
+ result = subprocess.run(command, capture_output=True, text=True, timeout=timeout, encoding='utf-8')
121
+
122
+ successfully_executed = (result.returncode == 0)
123
+ output = result.stdout if successfully_executed else result.stderr
124
+ timeout_occurred = False
125
+ elif modelling_language == "minizinc":
126
+ successfully_executed, output, timeout_occurred = exec_code_minizinc(code, timeout)
127
+ else:
128
+ raise ValueError(f"MODELLING_LANGUAGE not supported: {modelling_language}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
+ except subprocess.TimeoutExpired as e:
131
+ successfully_executed = False
132
+ output = f"Timeout Error: Execution time exceeded {timeout} seconds"
133
+ timeout_occurred = True
134
  except Exception as e:
135
+ successfully_executed = False
136
+ output = f"Error: {e}"
137
+ timeout_occurred = False
138
+
139
+ os.remove(temp_instance_path)
140
+
141
+ return successfully_executed, output, timeout_occurred
142
 
143
 
144
  def add_constraints_as_string(solution):
 
218
  print(f" Downloading submission files from '{submission_path_in_dataset}' to '{local_submission_dir}'...",
219
  flush=True)
220
  try:
221
+ # Download the relevant submission file
222
+ hf_hub_download(
223
  repo_id=user_dataset_repo_id,
224
  repo_type="dataset",
225
  local_dir=local_submission_dir,
226
+ filename=f"{submission_path_in_dataset}/submission.jsonl",
227
  )
228
+ print(f" Downloaded submission file successfully.", flush=True)
229
 
230
  except Exception as e_download:
231
  print(f" CRITICAL ERROR - Failed to download submission files: {e_download}", flush=True)
 
249
  # (Attempt to upload error summary)
250
  return 1
251
 
252
+ # load generated models from jsonl to memory
253
+ print(f" Loading generated models from '{local_submission_dir}'...", flush=True)
254
+ submitted_models = []
255
+ with open(os.path.join(local_submission_dir, submission_path_in_dataset, "submission.jsonl"), "r", encoding="utf-8") as f:
256
+ for line in f:
257
+ try:
258
+ json_obj = json.loads(line)
259
+ submitted_models.append(json_obj)
260
+ except json.JSONDecodeError as e:
261
+ print(f" ERROR: Failed to parse JSON object from line: {line}. Error: {e}", flush=True)
262
+ print(f" Loaded {len(submitted_models)} generated models.", flush=True)
263
+
264
  # Statistics
265
  total_submitted_models = 0
266
  models_ran_successfully = 0
 
277
  summary_f.write("-" * 30 + "\n")
278
 
279
  # Iterate through downloaded submitted models
280
+ for submitted_model in submitted_models:
281
+ curr_model = submitted_model[GT_MODEL_CODE_COLUMN]
 
 
282
 
 
283
  total_submitted_models += 1
284
+ problem_name = submitted_model[GT_PROBLEM_NAME_COLUMN]
285
+ print(f"\n Processing downloaded model: {problem_name}", flush=True)
286
+ summary_f.write(f"\n--- Model: {problem_name} ---\n")
287
 
288
  summary_f.write(" 1. Running submitted model...\n")
289
+
290
+ succ_exec, output, timeout_occurred = exec_code(curr_model, timeout=SCRIPT_EXECUTION_TIMEOUT)
291
+
292
+ if timeout_occurred:
293
+ summary_f.write(f" - TIMEOUT: Execution time exceeded {SCRIPT_EXECUTION_TIMEOUT} seconds.\n")
294
+ continue
295
+ if not succ_exec:
296
+ summary_f.write(f" - FAILED: Execution failed with error: {output}\n")
297
+ continue
298
+ if output is None or not output.strip():
299
+ summary_f.write(f" - FAILED: No output from execution.\n")
300
+ continue
301
+ # Attempt to extract JSON from stdout
302
+ generated_solution = extract_json_from_code_output(output)
303
  if generated_solution is None:
304
+ summary_f.write(f" - FAILED: Could not extract JSON solution from output: {output}\n")
305
  continue
306
+
307
  models_ran_successfully += 1
308
+ summary_f.write(f" - SUCCESS: Got solution: {generated_solution}\n")
309
 
310
  summary_f.write(f" 2. Checking against ground-truth for '{problem_name}'...\n")
311
  if problem_name not in ground_truth_models:
 
331
  os.unlink(tmp_file_path_str)
332
 
333
  gt_stdout = gt_check_result.stdout
 
334
  if "SUCCESS: Model is consistent" in gt_stdout:
335
  summary_f.write(" - CONSISTENCY: PASSED\n")
336
  consistency_checks_passed += 1
src/hf_utils.py CHANGED
@@ -90,23 +90,22 @@ def load_leaderboard_data():
90
  return pd.DataFrame(leaderboard_entries)
91
 
92
 
93
- def upload_submission(uploaded_files, dir_name):
94
  """Upload submission to Hugging Face Dataset."""
95
  if not HF_API:
96
  return False, "Hugging Face API not initialized"
97
-
98
  try:
99
  submission_path = f"{DS_SUBMISSIONS_PATH}/{dir_name}"
100
 
101
- for file in uploaded_files:
102
- file_name = os.path.basename(file.name)
103
- HF_API.upload_file(
104
- path_or_fileobj=file,
105
- path_in_repo=f"{submission_path}/{file_name}",
106
- repo_id=DATASET_REPO_ID,
107
- repo_type="dataset",
108
- commit_message=f"Upload submission: {dir_name}"
109
- )
110
 
111
  return True, submission_path
112
  except Exception as e:
 
90
  return pd.DataFrame(leaderboard_entries)
91
 
92
 
93
+ def upload_submission(uploaded_file, dir_name):
94
  """Upload submission to Hugging Face Dataset."""
95
  if not HF_API:
96
  return False, "Hugging Face API not initialized"
97
+
98
  try:
99
  submission_path = f"{DS_SUBMISSIONS_PATH}/{dir_name}"
100
 
101
+ # file_name = os.path.basename(uploaded_file.name)
102
+ HF_API.upload_file(
103
+ path_or_fileobj=uploaded_file,
104
+ path_in_repo=f"{submission_path}/submission.jsonl",
105
+ repo_id=DATASET_REPO_ID,
106
+ repo_type="dataset",
107
+ commit_message=f"Upload submission: {dir_name}"
108
+ )
 
109
 
110
  return True, submission_path
111
  except Exception as e:
src/ui.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import gradio as gr
2
  from pathlib import Path
3
 
@@ -5,10 +7,10 @@ from src.hf_utils import load_leaderboard_data, upload_submission, check_name_ex
5
  from src.eval import start_background_evaluation
6
 
7
 
8
- def handle_upload(submission_name, uploaded_files, progress=gr.Progress()):
9
  """Handle file upload and start evaluation."""
10
- if not uploaded_files or len(uploaded_files) == 0:
11
- return "No directory uploaded or directory is empty, please try again."
12
 
13
  # normalize the submission name
14
  submission_name = submission_name.strip().replace(" ", "_").lower()
@@ -26,8 +28,22 @@ def handle_upload(submission_name, uploaded_files, progress=gr.Progress()):
26
  try:
27
  progress(0.3, "Uploading to Hugging Face...")
28
 
29
- # Upload the directory to Hugging Face
30
- success, result = upload_submission(uploaded_files, submission_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  if not success:
32
  return f"Upload failed: {result}"
33
 
@@ -58,7 +74,7 @@ def create_ui():
58
  interactive=True,
59
  info="This name will appear on the leaderboard"
60
  )
61
- upload_button = gr.UploadButton("Click to Upload Directory", file_count="directory")
62
  status_box = gr.Textbox(label="Status", interactive=False)
63
 
64
  with gr.Column(scale=3):
 
1
+ import json
2
+
3
  import gradio as gr
4
  from pathlib import Path
5
 
 
7
  from src.eval import start_background_evaluation
8
 
9
 
10
+ def handle_upload(submission_name, uploaded_file, progress=gr.Progress()):
11
  """Handle file upload and start evaluation."""
12
+ if not uploaded_file:
13
+ return "No file uploaded. Please upload a valid submission file."
14
 
15
  # normalize the submission name
16
  submission_name = submission_name.strip().replace(" ", "_").lower()
 
28
  try:
29
  progress(0.3, "Uploading to Hugging Face...")
30
 
31
+ # Check if the file is a valid JSONL file
32
+ if not uploaded_file.name.endswith(".jsonl"):
33
+ return "Invalid file format. Please upload a .jsonl file."
34
+
35
+ # Check that the keys in the JSONL file are correct ('id' and 'model')
36
+ with open(uploaded_file.name, "r") as file:
37
+ found_one = False
38
+ for line in file:
39
+ found_one = True
40
+ json_object = json.loads(line)
41
+ if not all(key in json_object for key in ["id", "model"]):
42
+ return "Invalid content. Each line must contain 'id' and 'model' keys."
43
+ if not found_one:
44
+ return "Empty file. Please upload a valid JSONL file."
45
+
46
+ success, result = upload_submission(uploaded_file, submission_name)
47
  if not success:
48
  return f"Upload failed: {result}"
49
 
 
74
  interactive=True,
75
  info="This name will appear on the leaderboard"
76
  )
77
+ upload_button = gr.UploadButton("Click to Upload Submission", file_count="single")
78
  status_box = gr.Textbox(label="Status", interactive=False)
79
 
80
  with gr.Column(scale=3):