Spaces:
Sleeping
Sleeping
Commit
Β·
0b8f0ae
1
Parent(s):
bf018b4
update eval logging
Browse files- src/user_eval.py +3 -4
src/user_eval.py
CHANGED
@@ -245,7 +245,6 @@ def evaluate_submission(submitted_models, summary_file_path, modelling_framw, to
|
|
245 |
return 1
|
246 |
|
247 |
# Statistics
|
248 |
-
total_submitted_models = len(submitted_models)
|
249 |
total_submitted_models_that_also_exist_in_gt = 0
|
250 |
models_ran_successfully = 0
|
251 |
consistency_checks_passed = 0
|
@@ -326,10 +325,10 @@ def evaluate_submission(submitted_models, summary_file_path, modelling_framw, to
|
|
326 |
# Final statistics (write to summary_f)
|
327 |
summary_f.write("\n" + "=" * 30 + "\n")
|
328 |
summary_f.write("Overall Evaluation Statistics:\n")
|
329 |
-
summary_f.write(f" Total Submitted Models
|
330 |
-
summary_f.write(f" Models That Ran Successfully (out of
|
331 |
summary_f.write(f" Submission coverage perc: {float(total_submitted_models_that_also_exist_in_gt) / len(ground_truth_models) * 100:.2f}%\n")
|
332 |
-
summary_f.write(f" Error perc: {float(
|
333 |
summary_f.write(f" Consistency perc: {consistency_checks_passed / len(ground_truth_models) * 100:.2f}%\n")
|
334 |
summary_f.write(f" Final Solution Accuracy perc: {all_checks_passed / len(ground_truth_models) * 100:.2f}%\n")
|
335 |
summary_f.write("-" * 30 + "\n")
|
|
|
245 |
return 1
|
246 |
|
247 |
# Statistics
|
|
|
248 |
total_submitted_models_that_also_exist_in_gt = 0
|
249 |
models_ran_successfully = 0
|
250 |
consistency_checks_passed = 0
|
|
|
325 |
# Final statistics (write to summary_f)
|
326 |
summary_f.write("\n" + "=" * 30 + "\n")
|
327 |
summary_f.write("Overall Evaluation Statistics:\n")
|
328 |
+
summary_f.write(f" Total Submitted Models that also exist in the dataset: {total_submitted_models_that_also_exist_in_gt}\n")
|
329 |
+
summary_f.write(f" Models That Ran Successfully (out of submitted models): {models_ran_successfully}/{total_submitted_models_that_also_exist_in_gt}\n")
|
330 |
summary_f.write(f" Submission coverage perc: {float(total_submitted_models_that_also_exist_in_gt) / len(ground_truth_models) * 100:.2f}%\n")
|
331 |
+
summary_f.write(f" Error perc: {float(total_submitted_models_that_also_exist_in_gt - models_ran_successfully) / float(total_submitted_models_that_also_exist_in_gt) * 100:.2f}%\n")
|
332 |
summary_f.write(f" Consistency perc: {consistency_checks_passed / len(ground_truth_models) * 100:.2f}%\n")
|
333 |
summary_f.write(f" Final Solution Accuracy perc: {all_checks_passed / len(ground_truth_models) * 100:.2f}%\n")
|
334 |
summary_f.write("-" * 30 + "\n")
|