Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -106,6 +106,7 @@ def evaluate(
|
|
| 106 |
max_as_limit: int = 30 * 1024,
|
| 107 |
max_data_limit: int = 30 * 1024,
|
| 108 |
max_stack_limit: int = 10,
|
|
|
|
| 109 |
check_gt_only: bool = False,
|
| 110 |
no_gt: bool = False,
|
| 111 |
):
|
|
@@ -156,7 +157,7 @@ def evaluate(
|
|
| 156 |
if "solution" in sample
|
| 157 |
else problems[task_id]["complete_prompt"] + sample["completion"]
|
| 158 |
)
|
| 159 |
-
if
|
| 160 |
solution = problems[task_id]["code_prompt"] + "\n pass\n" + solution
|
| 161 |
remainings.add(sample["_identifier"])
|
| 162 |
args = (
|
|
@@ -223,7 +224,7 @@ def evaluate(
|
|
| 223 |
pass_at_k["model"] = os.path.basename(samples).split("--bigcodebench-")[0]
|
| 224 |
pass_at_k["split"] = split
|
| 225 |
pass_at_k["subset"] = subset
|
| 226 |
-
pass_at_k["calibrated"] =
|
| 227 |
pass_at_k["gt_pass_rate"] = gt_pass_rate
|
| 228 |
pass_at_k["failed_tasks"] = failed_tasks
|
| 229 |
|
|
|
|
| 106 |
max_as_limit: int = 30 * 1024,
|
| 107 |
max_data_limit: int = 30 * 1024,
|
| 108 |
max_stack_limit: int = 10,
|
| 109 |
+
calibrated: bool = True,
|
| 110 |
check_gt_only: bool = False,
|
| 111 |
no_gt: bool = False,
|
| 112 |
):
|
|
|
|
| 157 |
if "solution" in sample
|
| 158 |
else problems[task_id]["complete_prompt"] + sample["completion"]
|
| 159 |
)
|
| 160 |
+
if calibrated:
|
| 161 |
solution = problems[task_id]["code_prompt"] + "\n pass\n" + solution
|
| 162 |
remainings.add(sample["_identifier"])
|
| 163 |
args = (
|
|
|
|
| 224 |
pass_at_k["model"] = os.path.basename(samples).split("--bigcodebench-")[0]
|
| 225 |
pass_at_k["split"] = split
|
| 226 |
pass_at_k["subset"] = subset
|
| 227 |
+
pass_at_k["calibrated"] = calibrated
|
| 228 |
pass_at_k["gt_pass_rate"] = gt_pass_rate
|
| 229 |
pass_at_k["failed_tasks"] = failed_tasks
|
| 230 |
|