Yago Bolivar
fix: enhance output handling and add comprehensive tests for code execution tool
ada4787
import unittest | |
import sys | |
import os | |
from pathlib import Path | |
# Add the parent directory to sys.path to find the src module | |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
from src.python_tool import CodeExecutionTool | |
class TestCodeExecutionTool(unittest.TestCase): | |
def setUp(self): | |
self.code_tool = CodeExecutionTool() | |
def test_analyze_code_safety_imports(self): | |
"""Test that the tool detects banned imports.""" | |
code_with_banned_import = "import os" | |
result = self.code_tool._analyze_code_safety(code_with_banned_import) | |
self.assertFalse(result["safe"]) | |
self.assertIn("os", result["reason"]) | |
def test_analyze_code_safety_exec_eval(self): | |
"""Test that the tool detects exec and eval usage.""" | |
code_with_exec = "exec('print(1)')" | |
result = self.code_tool._analyze_code_safety(code_with_exec) | |
self.assertFalse(result["safe"]) | |
self.assertIn("exec()", result["reason"]) | |
def test_analyze_code_safety_valid_code(self): | |
"""Test that the tool allows safe code.""" | |
safe_code = "print(1 + 1)" | |
result = self.code_tool._analyze_code_safety(safe_code) | |
self.assertTrue(result["safe"]) | |
def test_common_question_reverse_word(self): | |
"""Test the reverse word question from common_questions.json.""" | |
question = ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fI" | |
expected_answer = "Right" | |
reversed_question = question[::-1] | |
self.assertEqual(reversed_question, "If you understand this sentence, write the opposite of the word \"left\" as the answer.") | |
self.assertEqual(expected_answer, "Right") | |
def test_execute_code_success(self): | |
"""Test successful execution of safe Python code.""" | |
safe_code = "print(42)" | |
result = self.code_tool.execute_code(safe_code) | |
self.assertTrue(result["success"]) | |
self.assertEqual(result["raw_output"].strip(), "42") | |
def test_execute_code_numeric_extraction(self): | |
"""Test numeric value extraction from code output.""" | |
numeric_code = "print(3.14)" | |
result = self.code_tool.execute_code(numeric_code) | |
self.assertTrue(result["success"]) | |
self.assertTrue(result["has_numeric_result"]) | |
self.assertEqual(result["numeric_value"], 3.14) | |
def test_execute_code_timeout(self): | |
"""Test that code execution times out as expected.""" | |
timeout_code = "while True: pass" | |
result = self.code_tool.execute_code(timeout_code) | |
self.assertFalse(result["success"]) | |
self.assertIn("timed out", result["error"].lower()) | |
def test_execute_code_error_handling(self): | |
"""Test error handling for code that raises exceptions.""" | |
error_code = "raise ValueError('Test error')" | |
result = self.code_tool.execute_code(error_code) | |
self.assertFalse(result["success"]) | |
self.assertIn("ValueError", result["error"]) | |
def test_execute_code_output_size_limit(self): | |
"""Test that output is truncated if it exceeds max_output_size.""" | |
large_output_code = "print('A' * 20000)" | |
result = self.code_tool.execute_code(large_output_code) | |
self.assertTrue(result["success"]) | |
self.assertLessEqual(len(result["raw_output"]), self.code_tool.max_output_size) | |
def test_execute_file_success(self): | |
"""Test successful execution of a Python file.""" | |
test_file_path = "test_script.py" | |
with open(test_file_path, "w") as f: | |
f.write("print('File executed successfully')") | |
result = self.code_tool.execute_file(test_file_path) | |
self.assertTrue(result["success"]) | |
self.assertEqual(result["raw_output"].strip(), "File executed successfully") | |
os.remove(test_file_path) | |
def test_execute_file_not_found(self): | |
"""Test handling of file not found error.""" | |
result = self.code_tool.execute_file("non_existent_file.py") | |
self.assertFalse(result["success"]) | |
self.assertIn("File not found", result["error"]) | |
if __name__ == "__main__": | |
unittest.main() | |