Spaces:
Sleeping
Sleeping
Allowing capturing and returning output
Browse files- README.md +4 -0
- restrictedpython_code_eval.py +17 -8
README.md
CHANGED
|
@@ -64,6 +64,10 @@ In addition, this metric supports three additional arguments, specifying which i
|
|
| 64 |
|
| 65 |
**`allow_underscore_variable_names`**: (`bool`): Whether or not to allow the use of variable names starting with an underscore. Defaults to False, as it's considered [harmful](https://stackoverflow.com/questions/1301346/what-is-the-meaning-of-a-single-and-a-double-underscore-before-an-object-name).
|
| 66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
As the new arguments are optional, this could be used as a drop-in replacement for `code_eval`.
|
| 68 |
|
| 69 |
Additionally, this metric sets several different `globals` if they are not provided as additional globals. The full list of globals set is: `__metaclass__, __name__, _getiter_, _iter_unpack_sequence_, _getitem_, getattr, _write_, _inplacevar_, _print_`. See the code for additional details.
|
|
|
|
| 64 |
|
| 65 |
**`allow_underscore_variable_names`**: (`bool`): Whether or not to allow the use of variable names starting with an underscore. Defaults to False, as it's considered [harmful](https://stackoverflow.com/questions/1301346/what-is-the-meaning-of-a-single-and-a-double-underscore-before-an-object-name).
|
| 66 |
|
| 67 |
+
**`return_output`**: (`bool`): Whether or not to return the output of the code. Defaults to False.
|
| 68 |
+
|
| 69 |
+
**`output_variable`**: (`str`): The name of the variable to return the output of. Defaults to `'output'`.
|
| 70 |
+
|
| 71 |
As the new arguments are optional, this could be used as a drop-in replacement for `code_eval`.
|
| 72 |
|
| 73 |
Additionally, this metric sets several different `globals` if they are not provided as additional globals. The full list of globals set is: `__metaclass__, __name__, _getiter_, _iter_unpack_sequence_, _getitem_, getattr, _write_, _inplacevar_, _print_`. See the code for additional details.
|
restrictedpython_code_eval.py
CHANGED
|
@@ -263,6 +263,8 @@ Args:
|
|
| 263 |
allowed_imports: an optional list of string, modules the tested code is allowed to import
|
| 264 |
allow_str_format: a bool indicating whether to allow the use of str.format() in the tested code
|
| 265 |
allow_underscore_variable_names: a bool indicating whether to allow the use of underscore variable names in the tested code
|
|
|
|
|
|
|
| 266 |
|
| 267 |
Returns:
|
| 268 |
pass_at_k: dict with pass rates for each k
|
|
@@ -350,7 +352,7 @@ class RestrictedPythonCodeEval(evaluate.Metric):
|
|
| 350 |
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
|
| 351 |
additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
|
| 352 |
allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False,
|
| 353 |
-
allow_underscore_variable_names: bool = False):
|
| 354 |
"""Returns the scores"""
|
| 355 |
|
| 356 |
if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
|
|
@@ -372,7 +374,8 @@ class RestrictedPythonCodeEval(evaluate.Metric):
|
|
| 372 |
test_program, timeout, task_id, completion_id[task_id],
|
| 373 |
use_safe_builtins, use_limited_builtins, use_utility_builtins,
|
| 374 |
additional_globals, additional_locals,
|
| 375 |
-
allowed_imports, allow_str_format, allow_underscore_variable_names
|
|
|
|
| 376 |
)
|
| 377 |
future = executor.submit(_check_correctness, *args)
|
| 378 |
futures.append(future)
|
|
@@ -421,7 +424,7 @@ def _check_correctness(check_program, timeout, task_id, completion_id,
|
|
| 421 |
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
|
| 422 |
additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
|
| 423 |
allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False,
|
| 424 |
-
allow_underscore_variable_names: bool = False):
|
| 425 |
"""
|
| 426 |
Evaluates the functional correctness of a completion by running the test
|
| 427 |
suite provided in the problem.
|
|
@@ -437,6 +440,7 @@ def _check_correctness(check_program, timeout, task_id, completion_id,
|
|
| 437 |
use_safe_builtins, use_limited_builtins, use_utility_builtins,
|
| 438 |
additional_globals, additional_locals,
|
| 439 |
allowed_imports, allow_str_format, allow_underscore_variable_names,
|
|
|
|
| 440 |
)
|
| 441 |
p = multiprocessing.Process(target=_unsafe_execute, args=args)
|
| 442 |
p.start()
|
|
@@ -515,12 +519,11 @@ class DefaultPrinter:
|
|
| 515 |
print(*objects, **kwargs)
|
| 516 |
|
| 517 |
|
| 518 |
-
|
| 519 |
def _unsafe_execute(check_program, result, timeout,
|
| 520 |
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
|
| 521 |
additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
|
| 522 |
allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False,
|
| 523 |
-
allow_underscore_variable_names: bool = False):
|
| 524 |
|
| 525 |
with create_tempdir():
|
| 526 |
|
|
@@ -535,6 +538,9 @@ def _unsafe_execute(check_program, result, timeout,
|
|
| 535 |
# Disable functionalities that can make destructive changes to the test.
|
| 536 |
reliability_guard()
|
| 537 |
|
|
|
|
|
|
|
|
|
|
| 538 |
# Run program.
|
| 539 |
try:
|
| 540 |
builtins = {}
|
|
@@ -604,12 +610,15 @@ def _unsafe_execute(check_program, result, timeout,
|
|
| 604 |
with swallow_io():
|
| 605 |
policy_class = AllowAugmentedAssignAndUnderscoreVariableNamesRestrictingTransformer if allow_underscore_variable_names else AllowAugmentedAssignRestrictingTransformer
|
| 606 |
|
| 607 |
-
|
| 608 |
with time_limit(timeout):
|
| 609 |
byte_code = compile_restricted(check_program, filename="<model output>", mode="exec", policy=policy_class)
|
| 610 |
exec(byte_code, exec_globals, additional_locals)
|
| 611 |
-
|
| 612 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 613 |
except EOFError:
|
| 614 |
result.append("EOF error")
|
| 615 |
except TimeoutException:
|
|
|
|
| 263 |
allowed_imports: an optional list of string, modules the tested code is allowed to import
|
| 264 |
allow_str_format: a bool indicating whether to allow the use of str.format() in the tested code
|
| 265 |
allow_underscore_variable_names: a bool indicating whether to allow the use of underscore variable names in the tested code
|
| 266 |
+
return_output: a bool indicating whether to return the output of the tested code
|
| 267 |
+
output_variable: a string indicating the name of the variable to return if return_output is True
|
| 268 |
|
| 269 |
Returns:
|
| 270 |
pass_at_k: dict with pass rates for each k
|
|
|
|
| 352 |
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
|
| 353 |
additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
|
| 354 |
allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False,
|
| 355 |
+
allow_underscore_variable_names: bool = False, return_output: bool = False, output_variable: str = "output"):
|
| 356 |
"""Returns the scores"""
|
| 357 |
|
| 358 |
if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
|
|
|
|
| 374 |
test_program, timeout, task_id, completion_id[task_id],
|
| 375 |
use_safe_builtins, use_limited_builtins, use_utility_builtins,
|
| 376 |
additional_globals, additional_locals,
|
| 377 |
+
allowed_imports, allow_str_format, allow_underscore_variable_names,
|
| 378 |
+
return_output, output_variable,
|
| 379 |
)
|
| 380 |
future = executor.submit(_check_correctness, *args)
|
| 381 |
futures.append(future)
|
|
|
|
| 424 |
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
|
| 425 |
additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
|
| 426 |
allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False,
|
| 427 |
+
allow_underscore_variable_names: bool = False, return_output: bool = False, output_variable: str = "output"):
|
| 428 |
"""
|
| 429 |
Evaluates the functional correctness of a completion by running the test
|
| 430 |
suite provided in the problem.
|
|
|
|
| 440 |
use_safe_builtins, use_limited_builtins, use_utility_builtins,
|
| 441 |
additional_globals, additional_locals,
|
| 442 |
allowed_imports, allow_str_format, allow_underscore_variable_names,
|
| 443 |
+
return_output, output_variable
|
| 444 |
)
|
| 445 |
p = multiprocessing.Process(target=_unsafe_execute, args=args)
|
| 446 |
p.start()
|
|
|
|
| 519 |
print(*objects, **kwargs)
|
| 520 |
|
| 521 |
|
|
|
|
| 522 |
def _unsafe_execute(check_program, result, timeout,
|
| 523 |
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
|
| 524 |
additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
|
| 525 |
allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False,
|
| 526 |
+
allow_underscore_variable_names: bool = False, return_output: bool = False, output_variable: str = "output"):
|
| 527 |
|
| 528 |
with create_tempdir():
|
| 529 |
|
|
|
|
| 538 |
# Disable functionalities that can make destructive changes to the test.
|
| 539 |
reliability_guard()
|
| 540 |
|
| 541 |
+
if return_output and additional_locals is None:
|
| 542 |
+
additional_locals = {}
|
| 543 |
+
|
| 544 |
# Run program.
|
| 545 |
try:
|
| 546 |
builtins = {}
|
|
|
|
| 610 |
with swallow_io():
|
| 611 |
policy_class = AllowAugmentedAssignAndUnderscoreVariableNamesRestrictingTransformer if allow_underscore_variable_names else AllowAugmentedAssignRestrictingTransformer
|
| 612 |
|
|
|
|
| 613 |
with time_limit(timeout):
|
| 614 |
byte_code = compile_restricted(check_program, filename="<model output>", mode="exec", policy=policy_class)
|
| 615 |
exec(byte_code, exec_globals, additional_locals)
|
| 616 |
+
|
| 617 |
+
if return_output:
|
| 618 |
+
result.append(additional_locals[output_variable])
|
| 619 |
+
else:
|
| 620 |
+
result.append("passed")
|
| 621 |
+
|
| 622 |
except EOFError:
|
| 623 |
result.append("EOF error")
|
| 624 |
except TimeoutException:
|