Commit
Β·
133e122
1
Parent(s):
15c4da2
fix when counting subm models
Browse files- src/user_eval.py +4 -2
src/user_eval.py
CHANGED
@@ -258,9 +258,12 @@ def evaluate_submission(submitted_models, summary_file_path, modelling_framw, to
|
|
258 |
# Iterate through downloaded submitted models
|
259 |
for submitted_model in tqdm(submitted_models):
|
260 |
curr_model = submitted_model[GT_MODEL_CODE_COLUMN]
|
|
|
261 |
|
262 |
total_submitted_models += 1
|
263 |
-
problem_name
|
|
|
|
|
264 |
print(f"\n Processing model: {problem_name}", flush=True)
|
265 |
summary_f.write(f"\n--- Model: {problem_name} ---\n")
|
266 |
|
@@ -299,7 +302,6 @@ def evaluate_submission(submitted_models, summary_file_path, modelling_framw, to
|
|
299 |
summary_f.write(" 3. Performing self-consistency check on ground-truth model...\n")
|
300 |
modified_gt_script = get_modified_script(ground_truth_script_content, generated_solution)
|
301 |
|
302 |
-
total_submitted_models_that_also_exist_in_gt += 1
|
303 |
try:
|
304 |
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False, encoding='utf-8',
|
305 |
dir=top_lvl_temp_dir) as tmp_file:
|
|
|
258 |
# Iterate through downloaded submitted models
|
259 |
for submitted_model in tqdm(submitted_models):
|
260 |
curr_model = submitted_model[GT_MODEL_CODE_COLUMN]
|
261 |
+
problem_name = submitted_model[GT_PROBLEM_NAME_COLUMN]
|
262 |
|
263 |
total_submitted_models += 1
|
264 |
+
if problem_name in ground_truth_models:
|
265 |
+
total_submitted_models_that_also_exist_in_gt += 1
|
266 |
+
|
267 |
print(f"\n Processing model: {problem_name}", flush=True)
|
268 |
summary_f.write(f"\n--- Model: {problem_name} ---\n")
|
269 |
|
|
|
302 |
summary_f.write(" 3. Performing self-consistency check on ground-truth model...\n")
|
303 |
modified_gt_script = get_modified_script(ground_truth_script_content, generated_solution)
|
304 |
|
|
|
305 |
try:
|
306 |
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False, encoding='utf-8',
|
307 |
dir=top_lvl_temp_dir) as tmp_file:
|