Final_Assignment_Template3 / tests /test_remote_executors.py
Duibonduil's picture
Upload 21 files
9c31777 verified
raw
history blame
12.6 kB
import io
from textwrap import dedent
from unittest.mock import MagicMock, patch
import docker
import PIL.Image
import pytest
from rich.console import Console
from smolagents.default_tools import FinalAnswerTool, WikipediaSearchTool
from smolagents.monitoring import AgentLogger, LogLevel
from smolagents.remote_executors import DockerExecutor, E2BExecutor, RemotePythonExecutor
from smolagents.utils import AgentError
from .utils.markers import require_run_all
class TestRemotePythonExecutor:
def test_send_tools_empty_tools(self):
executor = RemotePythonExecutor(additional_imports=[], logger=MagicMock())
executor.run_code_raise_errors = MagicMock()
executor.send_tools({})
assert executor.run_code_raise_errors.call_count == 1
# No new packages should be installed
assert "!pip install" not in executor.run_code_raise_errors.call_args.args[0]
@require_run_all
def test_send_tools_with_default_wikipedia_search_tool(self):
tool = WikipediaSearchTool()
executor = RemotePythonExecutor(additional_imports=[], logger=MagicMock())
executor.run_code_raise_errors = MagicMock()
executor.run_code_raise_errors.return_value = (None, "", False)
executor.send_tools({"wikipedia_search": tool})
assert executor.run_code_raise_errors.call_count == 2
assert "!pip install wikipedia-api" == executor.run_code_raise_errors.call_args_list[0].args[0]
assert "class WikipediaSearchTool(Tool)" in executor.run_code_raise_errors.call_args_list[1].args[0]
class TestE2BExecutorUnit:
def test_e2b_executor_instantiation(self):
logger = MagicMock()
with patch("e2b_code_interpreter.Sandbox") as mock_sandbox:
mock_sandbox.return_value.commands.run.return_value.error = None
mock_sandbox.return_value.run_code.return_value.error = None
executor = E2BExecutor(
additional_imports=[], logger=logger, api_key="dummy-api-key", template="dummy-template-id", timeout=60
)
assert isinstance(executor, E2BExecutor)
assert executor.logger == logger
assert executor.sandbox == mock_sandbox.return_value
assert mock_sandbox.call_count == 1
assert mock_sandbox.call_args.kwargs == {
"api_key": "dummy-api-key",
"template": "dummy-template-id",
"timeout": 60,
}
def test_cleanup(self):
"""Test that the cleanup method properly shuts down the sandbox"""
logger = MagicMock()
with patch("e2b_code_interpreter.Sandbox") as mock_sandbox:
# Setup mock
mock_sandbox.return_value.kill = MagicMock()
# Create executor
executor = E2BExecutor(additional_imports=[], logger=logger, api_key="dummy-api-key")
# Call cleanup
executor.cleanup()
# Verify sandbox was killed
mock_sandbox.return_value.kill.assert_called_once()
assert logger.log.call_count >= 2 # Should log start and completion messages
@pytest.fixture
def e2b_executor():
executor = E2BExecutor(
additional_imports=["pillow", "numpy"],
logger=AgentLogger(LogLevel.INFO, Console(force_terminal=False, file=io.StringIO())),
)
yield executor
executor.cleanup()
@require_run_all
class TestE2BExecutorIntegration:
@pytest.fixture(autouse=True)
def set_executor(self, e2b_executor):
self.executor = e2b_executor
@pytest.mark.parametrize(
"code_action, expected_result",
[
(
dedent('''
final_answer("""This is
a multiline
final answer""")
'''),
"This is\na multiline\nfinal answer",
),
(
dedent("""
text = '''Text containing
final_answer(5)
'''
final_answer(text)
"""),
"Text containing\nfinal_answer(5)\n",
),
(
dedent("""
num = 2
if num == 1:
final_answer("One")
elif num == 2:
final_answer("Two")
"""),
"Two",
),
],
)
def test_final_answer_patterns(self, code_action, expected_result):
self.executor.send_tools({"final_answer": FinalAnswerTool()})
result, logs, final_answer = self.executor(code_action)
assert final_answer is True
assert result == expected_result
def test_custom_final_answer(self):
class CustomFinalAnswerTool(FinalAnswerTool):
def forward(self, answer: str) -> str:
return "CUSTOM" + answer
self.executor.send_tools({"final_answer": CustomFinalAnswerTool()})
code_action = dedent("""
final_answer(answer="_answer")
""")
result, logs, final_answer = self.executor(code_action)
assert final_answer is True
assert result == "CUSTOM_answer"
def test_custom_final_answer_with_custom_inputs(self):
class CustomFinalAnswerToolWithCustomInputs(FinalAnswerTool):
inputs = {
"answer1": {"type": "string", "description": "First part of the answer."},
"answer2": {"type": "string", "description": "Second part of the answer."},
}
def forward(self, answer1: str, answer2: str) -> str:
return answer1 + "CUSTOM" + answer2
self.executor.send_tools({"final_answer": CustomFinalAnswerToolWithCustomInputs()})
code_action = dedent("""
final_answer(
answer1="answer1_",
answer2="_answer2"
)
""")
result, logs, final_answer = self.executor(code_action)
assert final_answer is True
assert result == "answer1_CUSTOM_answer2"
@pytest.fixture
def docker_executor():
executor = DockerExecutor(
additional_imports=["pillow", "numpy"],
logger=AgentLogger(LogLevel.INFO, Console(force_terminal=False, file=io.StringIO())),
)
yield executor
executor.delete()
@require_run_all
class TestDockerExecutorIntegration:
@pytest.fixture(autouse=True)
def set_executor(self, docker_executor):
self.executor = docker_executor
def test_initialization(self):
"""Check if DockerExecutor initializes without errors"""
assert self.executor.container is not None, "Container should be initialized"
def test_state_persistence(self):
"""Test that variables and imports form one snippet persist in the next"""
code_action = "import numpy as np; a = 2"
self.executor(code_action)
code_action = "print(np.sqrt(a))"
result, logs, final_answer = self.executor(code_action)
assert "1.41421" in logs
def test_execute_output(self):
"""Test execution that returns a string"""
code_action = 'final_answer("This is the final answer")'
result, logs, final_answer = self.executor(code_action)
assert result == "This is the final answer", "Result should be 'This is the final answer'"
def test_execute_multiline_output(self):
"""Test execution that returns a string"""
code_action = 'result = "This is the final answer"\nfinal_answer(result)'
result, logs, final_answer = self.executor(code_action)
assert result == "This is the final answer", "Result should be 'This is the final answer'"
def test_execute_image_output(self):
"""Test execution that returns a base64 image"""
code_action = dedent("""
import base64
from PIL import Image
from io import BytesIO
image = Image.new("RGB", (10, 10), (255, 0, 0))
final_answer(image)
""")
result, logs, final_answer = self.executor(code_action)
assert isinstance(result, PIL.Image.Image), "Result should be a PIL Image"
def test_syntax_error_handling(self):
"""Test handling of syntax errors"""
code_action = 'print("Missing Parenthesis' # Syntax error
with pytest.raises(AgentError) as exception_info:
self.executor(code_action)
assert "SyntaxError" in str(exception_info.value), "Should raise a syntax error"
def test_cleanup_on_deletion(self):
"""Test if Docker container stops and removes on deletion"""
container_id = self.executor.container.id
self.executor.delete() # Trigger cleanup
client = docker.from_env()
containers = [c.id for c in client.containers.list(all=True)]
assert container_id not in containers, "Container should be removed"
@pytest.mark.parametrize(
"code_action, expected_result",
[
(
dedent('''
final_answer("""This is
a multiline
final answer""")
'''),
"This is\na multiline\nfinal answer",
),
(
dedent("""
text = '''Text containing
final_answer(5)
'''
final_answer(text)
"""),
"Text containing\nfinal_answer(5)\n",
),
(
dedent("""
num = 2
if num == 1:
final_answer("One")
elif num == 2:
final_answer("Two")
"""),
"Two",
),
],
)
def test_final_answer_patterns(self, code_action, expected_result):
self.executor.send_tools({"final_answer": FinalAnswerTool()})
result, logs, final_answer = self.executor(code_action)
assert final_answer is True
assert result == expected_result
def test_custom_final_answer(self):
class CustomFinalAnswerTool(FinalAnswerTool):
def forward(self, answer: str) -> str:
return "CUSTOM" + answer
self.executor.send_tools({"final_answer": CustomFinalAnswerTool()})
code_action = dedent("""
final_answer(answer="_answer")
""")
result, logs, final_answer = self.executor(code_action)
assert final_answer is True
assert result == "CUSTOM_answer"
def test_custom_final_answer_with_custom_inputs(self):
class CustomFinalAnswerToolWithCustomInputs(FinalAnswerTool):
inputs = {
"answer1": {"type": "string", "description": "First part of the answer."},
"answer2": {"type": "string", "description": "Second part of the answer."},
}
def forward(self, answer1: str, answer2: str) -> str:
return answer1 + "CUSTOM" + answer2
self.executor.send_tools({"final_answer": CustomFinalAnswerToolWithCustomInputs()})
code_action = dedent("""
final_answer(
answer1="answer1_",
answer2="_answer2"
)
""")
result, logs, final_answer = self.executor(code_action)
assert final_answer is True
assert result == "answer1_CUSTOM_answer2"
class TestDockerExecutorUnit:
def test_cleanup(self):
"""Test that cleanup properly stops and removes the container"""
logger = MagicMock()
with (
patch("docker.from_env") as mock_docker_client,
patch("requests.post") as mock_post,
patch("websocket.create_connection"),
):
# Setup mocks
mock_container = MagicMock()
mock_container.status = "running"
mock_container.short_id = "test123"
mock_docker_client.return_value.containers.run.return_value = mock_container
mock_docker_client.return_value.images.get.return_value = MagicMock()
mock_post.return_value.status_code = 201
mock_post.return_value.json.return_value = {"id": "test-kernel-id"}
# Create executor
executor = DockerExecutor(additional_imports=[], logger=logger, build_new_image=False)
# Call cleanup
executor.cleanup()
# Verify container was stopped and removed
mock_container.stop.assert_called_once()
mock_container.remove.assert_called_once()