kostis-init commited on
Commit
bf018b4
Β·
1 Parent(s): 133e122

clean up model counting

Browse files
Files changed (1) hide show
  1. src/user_eval.py +10 -16
src/user_eval.py CHANGED
@@ -245,7 +245,7 @@ def evaluate_submission(submitted_models, summary_file_path, modelling_framw, to
245
  return 1
246
 
247
  # Statistics
248
- total_submitted_models = 0
249
  total_submitted_models_that_also_exist_in_gt = 0
250
  models_ran_successfully = 0
251
  consistency_checks_passed = 0
@@ -260,15 +260,17 @@ def evaluate_submission(submitted_models, summary_file_path, modelling_framw, to
260
  curr_model = submitted_model[GT_MODEL_CODE_COLUMN]
261
  problem_name = submitted_model[GT_PROBLEM_NAME_COLUMN]
262
 
263
- total_submitted_models += 1
264
- if problem_name in ground_truth_models:
265
- total_submitted_models_that_also_exist_in_gt += 1
266
-
267
  print(f"\n Processing model: {problem_name}", flush=True)
268
  summary_f.write(f"\n--- Model: {problem_name} ---\n")
269
 
270
- summary_f.write(" 1. Running submitted model...\n")
 
 
 
 
 
271
 
 
272
  succ_exec, output, timeout_occurred = exec_code(curr_model, timeout=SCRIPT_EXECUTION_TIMEOUT,
273
  modelling_language=modelling_framw)
274
 
@@ -292,16 +294,8 @@ def evaluate_submission(submitted_models, summary_file_path, modelling_framw, to
292
  continue
293
  summary_f.write(f" - SUCCESS: Got solution: {generated_solution}\n")
294
 
295
- summary_f.write(f" 2. Checking against ground-truth for '{problem_name}'...\n")
296
- if problem_name not in ground_truth_models:
297
- summary_f.write(f" - FAILED: Ground-truth model for '{problem_name}' not found in dataset.\n")
298
- continue
299
- ground_truth_script_content = ground_truth_models[problem_name]
300
- summary_f.write(" - SUCCESS: Found ground-truth model.\n")
301
-
302
- summary_f.write(" 3. Performing self-consistency check on ground-truth model...\n")
303
- modified_gt_script = get_modified_script(ground_truth_script_content, generated_solution)
304
-
305
  try:
306
  with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False, encoding='utf-8',
307
  dir=top_lvl_temp_dir) as tmp_file:
 
245
  return 1
246
 
247
  # Statistics
248
+ total_submitted_models = len(submitted_models)
249
  total_submitted_models_that_also_exist_in_gt = 0
250
  models_ran_successfully = 0
251
  consistency_checks_passed = 0
 
260
  curr_model = submitted_model[GT_MODEL_CODE_COLUMN]
261
  problem_name = submitted_model[GT_PROBLEM_NAME_COLUMN]
262
 
 
 
 
 
263
  print(f"\n Processing model: {problem_name}", flush=True)
264
  summary_f.write(f"\n--- Model: {problem_name} ---\n")
265
 
266
+ if problem_name in ground_truth_models:
267
+ total_submitted_models_that_also_exist_in_gt += 1
268
+ summary_f.write(f" 0. Found ground-truth model for '{problem_name}' in dataset.\n")
269
+ else:
270
+ summary_f.write(f" - SKIPPED: Ground-truth model for '{problem_name}' not found in dataset.\n")
271
+ continue
272
 
273
+ summary_f.write(" 1. Running submitted model...\n")
274
  succ_exec, output, timeout_occurred = exec_code(curr_model, timeout=SCRIPT_EXECUTION_TIMEOUT,
275
  modelling_language=modelling_framw)
276
 
 
294
  continue
295
  summary_f.write(f" - SUCCESS: Got solution: {generated_solution}\n")
296
 
297
+ summary_f.write(" 2. Performing self-consistency check on ground-truth model...\n")
298
+ modified_gt_script = get_modified_script(ground_truth_models[problem_name], generated_solution)
 
 
 
 
 
 
 
 
299
  try:
300
  with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False, encoding='utf-8',
301
  dir=top_lvl_temp_dir) as tmp_file: