Final_Assignment_Template3

Running

App Files Files Community

Final_Assignment_Template3 / tests /test_remote_executors.py

Duibonduil

Upload 21 files

9c31777 verified about 13 hours ago

raw

history blame contribute delete

12.6 kB

	import io
	from textwrap import dedent
	from unittest.mock import MagicMock, patch

	import docker
	import PIL.Image
	import pytest
	from rich.console import Console

	from smolagents.default_tools import FinalAnswerTool, WikipediaSearchTool
	from smolagents.monitoring import AgentLogger, LogLevel
	from smolagents.remote_executors import DockerExecutor, E2BExecutor, RemotePythonExecutor
	from smolagents.utils import AgentError

	from .utils.markers import require_run_all


	class TestRemotePythonExecutor:
	def test_send_tools_empty_tools(self):
	executor = RemotePythonExecutor(additional_imports=[], logger=MagicMock())
	executor.run_code_raise_errors = MagicMock()
	executor.send_tools({})
	assert executor.run_code_raise_errors.call_count == 1
	# No new packages should be installed
	assert "!pip install" not in executor.run_code_raise_errors.call_args.args[0]

	@require_run_all
	def test_send_tools_with_default_wikipedia_search_tool(self):
	tool = WikipediaSearchTool()
	executor = RemotePythonExecutor(additional_imports=[], logger=MagicMock())
	executor.run_code_raise_errors = MagicMock()
	executor.run_code_raise_errors.return_value = (None, "", False)
	executor.send_tools({"wikipedia_search": tool})
	assert executor.run_code_raise_errors.call_count == 2
	assert "!pip install wikipedia-api" == executor.run_code_raise_errors.call_args_list[0].args[0]
	assert "class WikipediaSearchTool(Tool)" in executor.run_code_raise_errors.call_args_list[1].args[0]


	class TestE2BExecutorUnit:
	def test_e2b_executor_instantiation(self):
	logger = MagicMock()
	with patch("e2b_code_interpreter.Sandbox") as mock_sandbox:
	mock_sandbox.return_value.commands.run.return_value.error = None
	mock_sandbox.return_value.run_code.return_value.error = None
	executor = E2BExecutor(
	additional_imports=[], logger=logger, api_key="dummy-api-key", template="dummy-template-id", timeout=60
	)
	assert isinstance(executor, E2BExecutor)
	assert executor.logger == logger
	assert executor.sandbox == mock_sandbox.return_value
	assert mock_sandbox.call_count == 1
	assert mock_sandbox.call_args.kwargs == {
	"api_key": "dummy-api-key",
	"template": "dummy-template-id",
	"timeout": 60,
	}

	def test_cleanup(self):
	"""Test that the cleanup method properly shuts down the sandbox"""
	logger = MagicMock()
	with patch("e2b_code_interpreter.Sandbox") as mock_sandbox:
	# Setup mock
	mock_sandbox.return_value.kill = MagicMock()

	# Create executor
	executor = E2BExecutor(additional_imports=[], logger=logger, api_key="dummy-api-key")

	# Call cleanup
	executor.cleanup()

	# Verify sandbox was killed
	mock_sandbox.return_value.kill.assert_called_once()
	assert logger.log.call_count >= 2 # Should log start and completion messages


	@pytest.fixture
	def e2b_executor():
	executor = E2BExecutor(
	additional_imports=["pillow", "numpy"],
	logger=AgentLogger(LogLevel.INFO, Console(force_terminal=False, file=io.StringIO())),
	)
	yield executor
	executor.cleanup()


	@require_run_all
	class TestE2BExecutorIntegration:
	@pytest.fixture(autouse=True)
	def set_executor(self, e2b_executor):
	self.executor = e2b_executor

	@pytest.mark.parametrize(
	"code_action, expected_result",
	[
	(
	dedent('''
	final_answer("""This is
	a multiline
	final answer""")
	'''),
	"This is\na multiline\nfinal answer",
	),
	(
	dedent("""
	text = '''Text containing
	final_answer(5)
	'''
	final_answer(text)
	"""),
	"Text containing\nfinal_answer(5)\n",
	),
	(
	dedent("""
	num = 2
	if num == 1:
	final_answer("One")
	elif num == 2:
	final_answer("Two")
	"""),
	"Two",
	),
	],
	)
	def test_final_answer_patterns(self, code_action, expected_result):
	self.executor.send_tools({"final_answer": FinalAnswerTool()})
	result, logs, final_answer = self.executor(code_action)
	assert final_answer is True
	assert result == expected_result

	def test_custom_final_answer(self):
	class CustomFinalAnswerTool(FinalAnswerTool):
	def forward(self, answer: str) -> str:
	return "CUSTOM" + answer

	self.executor.send_tools({"final_answer": CustomFinalAnswerTool()})
	code_action = dedent("""
	final_answer(answer="_answer")
	""")
	result, logs, final_answer = self.executor(code_action)
	assert final_answer is True
	assert result == "CUSTOM_answer"

	def test_custom_final_answer_with_custom_inputs(self):
	class CustomFinalAnswerToolWithCustomInputs(FinalAnswerTool):
	inputs = {
	"answer1": {"type": "string", "description": "First part of the answer."},
	"answer2": {"type": "string", "description": "Second part of the answer."},
	}

	def forward(self, answer1: str, answer2: str) -> str:
	return answer1 + "CUSTOM" + answer2

	self.executor.send_tools({"final_answer": CustomFinalAnswerToolWithCustomInputs()})
	code_action = dedent("""
	final_answer(
	answer1="answer1_",
	answer2="_answer2"
	)
	""")
	result, logs, final_answer = self.executor(code_action)
	assert final_answer is True
	assert result == "answer1_CUSTOM_answer2"


	@pytest.fixture
	def docker_executor():
	executor = DockerExecutor(
	additional_imports=["pillow", "numpy"],
	logger=AgentLogger(LogLevel.INFO, Console(force_terminal=False, file=io.StringIO())),
	)
	yield executor
	executor.delete()


	@require_run_all
	class TestDockerExecutorIntegration:
	@pytest.fixture(autouse=True)
	def set_executor(self, docker_executor):
	self.executor = docker_executor

	def test_initialization(self):
	"""Check if DockerExecutor initializes without errors"""
	assert self.executor.container is not None, "Container should be initialized"

	def test_state_persistence(self):
	"""Test that variables and imports form one snippet persist in the next"""
	code_action = "import numpy as np; a = 2"
	self.executor(code_action)

	code_action = "print(np.sqrt(a))"
	result, logs, final_answer = self.executor(code_action)
	assert "1.41421" in logs

	def test_execute_output(self):
	"""Test execution that returns a string"""
	code_action = 'final_answer("This is the final answer")'
	result, logs, final_answer = self.executor(code_action)
	assert result == "This is the final answer", "Result should be 'This is the final answer'"

	def test_execute_multiline_output(self):
	"""Test execution that returns a string"""
	code_action = 'result = "This is the final answer"\nfinal_answer(result)'
	result, logs, final_answer = self.executor(code_action)
	assert result == "This is the final answer", "Result should be 'This is the final answer'"

	def test_execute_image_output(self):
	"""Test execution that returns a base64 image"""
	code_action = dedent("""
	import base64
	from PIL import Image
	from io import BytesIO
	image = Image.new("RGB", (10, 10), (255, 0, 0))
	final_answer(image)
	""")
	result, logs, final_answer = self.executor(code_action)
	assert isinstance(result, PIL.Image.Image), "Result should be a PIL Image"

	def test_syntax_error_handling(self):
	"""Test handling of syntax errors"""
	code_action = 'print("Missing Parenthesis' # Syntax error
	with pytest.raises(AgentError) as exception_info:
	self.executor(code_action)
	assert "SyntaxError" in str(exception_info.value), "Should raise a syntax error"

	def test_cleanup_on_deletion(self):
	"""Test if Docker container stops and removes on deletion"""
	container_id = self.executor.container.id
	self.executor.delete() # Trigger cleanup

	client = docker.from_env()
	containers = [c.id for c in client.containers.list(all=True)]
	assert container_id not in containers, "Container should be removed"

	@pytest.mark.parametrize(
	"code_action, expected_result",
	[
	(
	dedent('''
	final_answer("""This is
	a multiline
	final answer""")
	'''),
	"This is\na multiline\nfinal answer",
	),
	(
	dedent("""
	text = '''Text containing
	final_answer(5)
	'''
	final_answer(text)
	"""),
	"Text containing\nfinal_answer(5)\n",
	),
	(
	dedent("""
	num = 2
	if num == 1:
	final_answer("One")
	elif num == 2:
	final_answer("Two")
	"""),
	"Two",
	),
	],
	)
	def test_final_answer_patterns(self, code_action, expected_result):
	self.executor.send_tools({"final_answer": FinalAnswerTool()})
	result, logs, final_answer = self.executor(code_action)
	assert final_answer is True
	assert result == expected_result

	def test_custom_final_answer(self):
	class CustomFinalAnswerTool(FinalAnswerTool):
	def forward(self, answer: str) -> str:
	return "CUSTOM" + answer

	self.executor.send_tools({"final_answer": CustomFinalAnswerTool()})
	code_action = dedent("""
	final_answer(answer="_answer")
	""")
	result, logs, final_answer = self.executor(code_action)
	assert final_answer is True
	assert result == "CUSTOM_answer"

	def test_custom_final_answer_with_custom_inputs(self):
	class CustomFinalAnswerToolWithCustomInputs(FinalAnswerTool):
	inputs = {
	"answer1": {"type": "string", "description": "First part of the answer."},
	"answer2": {"type": "string", "description": "Second part of the answer."},
	}

	def forward(self, answer1: str, answer2: str) -> str:
	return answer1 + "CUSTOM" + answer2

	self.executor.send_tools({"final_answer": CustomFinalAnswerToolWithCustomInputs()})
	code_action = dedent("""
	final_answer(
	answer1="answer1_",
	answer2="_answer2"
	)
	""")
	result, logs, final_answer = self.executor(code_action)
	assert final_answer is True
	assert result == "answer1_CUSTOM_answer2"


	class TestDockerExecutorUnit:
	def test_cleanup(self):
	"""Test that cleanup properly stops and removes the container"""
	logger = MagicMock()
	with (
	patch("docker.from_env") as mock_docker_client,
	patch("requests.post") as mock_post,
	patch("websocket.create_connection"),
	):
	# Setup mocks
	mock_container = MagicMock()
	mock_container.status = "running"
	mock_container.short_id = "test123"

	mock_docker_client.return_value.containers.run.return_value = mock_container
	mock_docker_client.return_value.images.get.return_value = MagicMock()

	mock_post.return_value.status_code = 201
	mock_post.return_value.json.return_value = {"id": "test-kernel-id"}

	# Create executor
	executor = DockerExecutor(additional_imports=[], logger=logger, build_new_image=False)

	# Call cleanup
	executor.cleanup()

	# Verify container was stopped and removed
	mock_container.stop.assert_called_once()
	mock_container.remove.assert_called_once()