File size: 3,692 Bytes
6bd9122 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import contextlib
import multiprocessing
import os
import subprocess
import tempfile
def check_correctness(candidate, reference, task_id, completion_id):
"""
Evaluates the functional correctness of a completion by running the test
suite provided in the problem.
:param completion_id: an optional completion ID so we can match
the results later even if execution finishes asynchronously.
"""
manager = multiprocessing.Manager()
base_run_result = manager.list()
process_case(
unsafe_execute_cpp,
candidate["base"],
reference["tests"],
base_run_result,
)
sfinae_run_result = manager.list()
process_case(
unsafe_execute_cpp,
candidate["sfinae"],
reference["tests"],
sfinae_run_result,
)
concepts_run_result = manager.list()
process_case(
unsafe_execute_cpp,
candidate["concepts"],
reference["tests"],
concepts_run_result,
)
return dict(
task_id=task_id,
completion_id=completion_id,
base_run_passed=base_run_result[0] == "passed",
base_run_result=base_run_result[0],
sfinae_run_passed=sfinae_run_result[0] == "passed",
sfinae_run_result=sfinae_run_result[0],
concepts_run_passed=concepts_run_result[0] == "passed",
concepts_run_result=concepts_run_result[0],
)
def process_case(target, candidate, reference, result):
timeout = 60
p = multiprocessing.Process(
target=target,
args=(candidate, reference, result, timeout),
)
p.start()
p.join(timeout=timeout + 5)
if p.is_alive():
p.kill()
if not result:
result.append("timed out")
def unsafe_execute_cpp(candidate, reference, result, timeout):
with create_tempdir():
code = "#include <bits/stdc++.h>\n" + candidate + reference
open(f"test.cpp", "w").write(code)
cpp_compiler = os.getenv("GENERICIFY_CLANG")
compilation_result = subprocess.run(
[cpp_compiler, "-std=c++20", "test.cpp"],
timeout=timeout,
capture_output=True,
)
if compilation_result.returncode != 0:
if compilation_result.stderr:
err = compilation_result.stderr.decode()
else:
err = compilation_result.stdout.decode()
result.append(f"failed: compilation error: {err}")
else:
try:
exec_result = subprocess.run(
["./a.out"], timeout=timeout, capture_output=True
)
if exec_result.returncode == 0:
result.append("passed")
else:
if exec_result.stderr:
try:
err = exec_result.stderr.decode()
except:
err = exec_result.stderr
else:
try:
err = exec_result.stdout.decode()
except:
err = exec_result.stdout
result.append(f"failed: {err}")
except subprocess.TimeoutExpired as e:
result.append("timed out")
@contextlib.contextmanager
def create_tempdir():
with tempfile.TemporaryDirectory() as dirname:
with chdir(dirname):
yield dirname
@contextlib.contextmanager
def chdir(root):
if root == ".":
yield
return
cwd = os.getcwd()
os.chdir(root)
try:
yield
except BaseException as exc:
raise exc
finally:
os.chdir(cwd)
|