Final_Assignment_Template3

Running

App Files Files Community

Final_Assignment_Template3 / tests /test_remote_executors.py

Duibonduil

Upload 21 files

9c31777 verified 6 days ago

raw

history blame

12.6 kB

	import io
	from textwrap import dedent
	from unittest.mock import MagicMock, patch

	import docker
	import PIL.Image
	import pytest
	from rich.console import Console

	from smolagents.default_tools import FinalAnswerTool, WikipediaSearchTool
	from smolagents.monitoring import AgentLogger, LogLevel
	from smolagents.remote_executors import DockerExecutor, E2BExecutor, RemotePythonExecutor
	from smolagents.utils import AgentError

	from .utils.markers import require_run_all


	class TestRemotePythonExecutor:
	def test_send_tools_empty_tools(self):
	executor = RemotePythonExecutor(additional_imports=[], logger=MagicMock())
	executor.run_code_raise_errors = MagicMock()
	executor.send_tools({})
	assert executor.run_code_raise_errors.call_count == 1
	# No new packages should be installed
	assert "!pip install" not in executor.run_code_raise_errors.call_args.args[0]

	@require_run_all
	def test_send_tools_with_default_wikipedia_search_tool(self):
	tool = WikipediaSearchTool()
	executor = RemotePythonExecutor(additional_imports=[], logger=MagicMock())
	executor.run_code_raise_errors = MagicMock()
	executor.run_code_raise_errors.return_value = (None, "", False)
	executor.send_tools({"wikipedia_search": tool})
	assert executor.run_code_raise_errors.call_count == 2
	assert "!pip install wikipedia-api" == executor.run_code_raise_errors.call_args_list[0].args[0]
	assert "class WikipediaSearchTool(Tool)" in executor.run_code_raise_errors.call_args_list[1].args[0]


	class TestE2BExecutorUnit:
	def test_e2b_executor_instantiation(self):
	logger = MagicMock()
	with patch("e2b_code_interpreter.Sandbox") as mock_sandbox:
	mock_sandbox.return_value.commands.run.return_value.error = None
	mock_sandbox.return_value.run_code.return_value.error = None
	executor = E2BExecutor(
	additional_imports=[], logger=logger, api_key="dummy-api-key", template="dummy-template-id", timeout=60
	)
	assert isinstance(executor, E2BExecutor)
	assert executor.logger == logger
	assert executor.sandbox == mock_sandbox.return_value
	assert mock_sandbox.call_count == 1
	assert mock_sandbox.call_args.kwargs == {
	"api_key": "dummy-api-key",
	"template": "dummy-template-id",
	"timeout": 60,
	}

	def test_cleanup(self):
	"""Test that the cleanup method properly shuts down the sandbox"""
	logger = MagicMock()
	with patch("e2b_code_interpreter.Sandbox") as mock_sandbox:
	# Setup mock
	mock_sandbox.return_value.kill = MagicMock()

	# Create executor
	executor = E2BExecutor(additional_imports=[], logger=logger, api_key="dummy-api-key")

	# Call cleanup
	executor.cleanup()

	# Verify sandbox was killed
	mock_sandbox.return_value.kill.assert_called_once()
	assert logger.log.call_count >= 2 # Should log start and completion messages


	@pytest.fixture
	def e2b_executor():
	executor = E2BExecutor(
	additional_imports=["pillow", "numpy"],
	logger=AgentLogger(LogLevel.INFO, Console(force_terminal=False, file=io.StringIO())),
	)
	yield executor
	executor.cleanup()


	@require_run_all
	class TestE2BExecutorIntegration:
	@pytest.fixture(autouse=True)
	def set_executor(self, e2b_executor):
	self.executor = e2b_executor

	@pytest.mark.parametrize(
	"code_action, expected_result",
	[
	(
	dedent('''
	final_answer("""This is
	a multiline
	final answer""")
	'''),
	"This is\na multiline\nfinal answer",
	),
	(
	dedent("""
	text = '''Text containing
	final_answer(5)
	'''
	final_answer(text)
	"""),
	"Text containing\nfinal_answer(5)\n",
	),
	(
	dedent("""
	num = 2
	if num == 1:
	final_answer("One")
	elif num == 2:
	final_answer("Two")
	"""),
	"Two",
	),
	],
	)
	def test_final_answer_patterns(self, code_action, expected_result):
	self.executor.send_tools({"final_answer": FinalAnswerTool()})
	result, logs, final_answer = self.executor(code_action)
	assert final_answer is True
	assert result == expected_result

	def test_custom_final_answer(self):
	class CustomFinalAnswerTool(FinalAnswerTool):
	def forward(self, answer: str) -> str:
	return "CUSTOM" + answer

	self.executor.send_tools({"final_answer": CustomFinalAnswerTool()})
	code_action = dedent("""
	final_answer(answer="_answer")
	""")
	result, logs, final_answer = self.executor(code_action)
	assert final_answer is True
	assert result == "CUSTOM_answer"

	def test_custom_final_answer_with_custom_inputs(self):
	class CustomFinalAnswerToolWithCustomInputs(FinalAnswerTool):
	inputs = {
	"answer1": {"type": "string", "description": "First part of the answer."},
	"answer2": {"type": "string", "description": "Second part of the answer."},
	}

	def forward(self, answer1: str, answer2: str) -> str:
	return answer1 + "CUSTOM" + answer2

	self.executor.send_tools({"final_answer": CustomFinalAnswerToolWithCustomInputs()})
	code_action = dedent("""
	final_answer(
	answer1="answer1_",
	answer2="_answer2"
	)
	""")
	result, logs, final_answer = self.executor(code_action)
	assert final_answer is True
	assert result == "answer1_CUSTOM_answer2"


	@pytest.fixture
	def docker_executor():
	executor = DockerExecutor(
	additional_imports=["pillow", "numpy"],
	logger=AgentLogger(LogLevel.INFO, Console(force_terminal=False, file=io.StringIO())),
	)
	yield executor
	executor.delete()


	@require_run_all
	class TestDockerExecutorIntegration:
	@pytest.fixture(autouse=True)
	def set_executor(self, docker_executor):
	self.executor = docker_executor

	def test_initialization(self):
	"""Check if DockerExecutor initializes without errors"""
	assert self.executor.container is not None, "Container should be initialized"

	def test_state_persistence(self):
	"""Test that variables and imports form one snippet persist in the next"""
	code_action = "import numpy as np; a = 2"
	self.executor(code_action)

	code_action = "print(np.sqrt(a))"
	result, logs, final_answer = self.executor(code_action)
	assert "1.41421" in logs

	def test_execute_output(self):
	"""Test execution that returns a string"""
	code_action = 'final_answer("This is the final answer")'
	result, logs, final_answer = self.executor(code_action)
	assert result == "This is the final answer", "Result should be 'This is the final answer'"

	def test_execute_multiline_output(self):
	"""Test execution that returns a string"""
	code_action = 'result = "This is the final answer"\nfinal_answer(result)'
	result, logs, final_answer = self.executor(code_action)
	assert result == "This is the final answer", "Result should be 'This is the final answer'"

	def test_execute_image_output(self):
	"""Test execution that returns a base64 image"""
	code_action = dedent("""
	import base64
	from PIL import Image
	from io import BytesIO
	image = Image.new("RGB", (10, 10), (255, 0, 0))
	final_answer(image)
	""")
	result, logs, final_answer = self.executor(code_action)
	assert isinstance(result, PIL.Image.Image), "Result should be a PIL Image"

	def test_syntax_error_handling(self):
	"""Test handling of syntax errors"""
	code_action = 'print("Missing Parenthesis' # Syntax error
	with pytest.raises(AgentError) as exception_info:
	self.executor(code_action)
	assert "SyntaxError" in str(exception_info.value), "Should raise a syntax error"

	def test_cleanup_on_deletion(self):
	"""Test if Docker container stops and removes on deletion"""
	container_id = self.executor.container.id
	self.executor.delete() # Trigger cleanup

	client = docker.from_env()
	containers = [c.id for c in client.containers.list(all=True)]
	assert container_id not in containers, "Container should be removed"

	@pytest.mark.parametrize(
	"code_action, expected_result",
	[
	(
	dedent('''
	final_answer("""This is
	a multiline
	final answer""")
	'''),
	"This is\na multiline\nfinal answer",
	),
	(
	dedent("""
	text = '''Text containing
	final_answer(5)
	'''
	final_answer(text)
	"""),
	"Text containing\nfinal_answer(5)\n",
	),
	(
	dedent("""
	num = 2
	if num == 1:
	final_answer("One")
	elif num == 2:
	final_answer("Two")
	"""),
	"Two",
	),
	],
	)
	def test_final_answer_patterns(self, code_action, expected_result):
	self.executor.send_tools({"final_answer": FinalAnswerTool()})
	result, logs, final_answer = self.executor(code_action)
	assert final_answer is True
	assert result == expected_result

	def test_custom_final_answer(self):
	class CustomFinalAnswerTool(FinalAnswerTool):
	def forward(self, answer: str) -> str:
	return "CUSTOM" + answer

	self.executor.send_tools({"final_answer": CustomFinalAnswerTool()})
	code_action = dedent("""
	final_answer(answer="_answer")
	""")
	result, logs, final_answer = self.executor(code_action)
	assert final_answer is True
	assert result == "CUSTOM_answer"

	def test_custom_final_answer_with_custom_inputs(self):
	class CustomFinalAnswerToolWithCustomInputs(FinalAnswerTool):
	inputs = {
	"answer1": {"type": "string", "description": "First part of the answer."},
	"answer2": {"type": "string", "description": "Second part of the answer."},
	}

	def forward(self, answer1: str, answer2: str) -> str:
	return answer1 + "CUSTOM" + answer2

	self.executor.send_tools({"final_answer": CustomFinalAnswerToolWithCustomInputs()})
	code_action = dedent("""
	final_answer(
	answer1="answer1_",
	answer2="_answer2"
	)
	""")
	result, logs, final_answer = self.executor(code_action)
	assert final_answer is True
	assert result == "answer1_CUSTOM_answer2"


	class TestDockerExecutorUnit:
	def test_cleanup(self):
	"""Test that cleanup properly stops and removes the container"""
	logger = MagicMock()
	with (
	patch("docker.from_env") as mock_docker_client,
	patch("requests.post") as mock_post,
	patch("websocket.create_connection"),
	):
	# Setup mocks
	mock_container = MagicMock()
	mock_container.status = "running"
	mock_container.short_id = "test123"

	mock_docker_client.return_value.containers.run.return_value = mock_container
	mock_docker_client.return_value.images.get.return_value = MagicMock()

	mock_post.return_value.status_code = 201
	mock_post.return_value.json.return_value = {"id": "test-kernel-id"}

	# Create executor
	executor = DockerExecutor(additional_imports=[], logger=logger, build_new_image=False)

	# Call cleanup
	executor.cleanup()

	# Verify container was stopped and removed
	mock_container.stop.assert_called_once()
	mock_container.remove.assert_called_once()