import unittest from unittest.mock import patch, MagicMock import requests # Import requests for its exception types import os import sys # Add the parent directory to sys.path to find the src module sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # Adjust the import path based on your project structure # If web_browser.py is in a 'src' directory: from src.web_browsing_tool import WebBrowser # If web_browser.py is in the same directory as app.py (and tools are in a 'tools' subdir): # from tools.web_browser import WebBrowser class TestWebBrowser(unittest.TestCase): def setUp(self): self.browser = WebBrowser(user_agent="TestAgent/1.0") @patch('src.web_browsing_tool.requests.get') def test_browse_successful_fetch_and_parse(self, mock_get): # Mock the response from requests.get mock_response = MagicMock() mock_response.status_code = 200 mock_response.content = b"Test Page

Hello World!

" mock_response.raise_for_status = MagicMock() # Ensure this doesn't raise an error mock_get.return_value = mock_response url = "http://example.com/testpage" result = self.browser.browse(url) mock_get.assert_called_once_with(url, headers={"User-Agent": "TestAgent/1.0"}, timeout=15) self.assertEqual(result, "Test Page\nHello World!") @patch('src.web_browsing_tool.requests.get') def test_browse_http_error(self, mock_get): # Mock requests.get to raise an HTTPError mock_get.side_effect = requests.exceptions.HTTPError("404 Client Error: Not Found for url") url = "http://example.com/notfound" result = self.browser.browse(url) mock_get.assert_called_once_with(url, headers={"User-Agent": "TestAgent/1.0"}, timeout=15) self.assertTrue(result.startswith("Error: HTTP error occurred")) self.assertIn("404 Client Error", result) @patch('src.web_browsing_tool.requests.get') def test_browse_connection_error(self, mock_get): mock_get.side_effect = requests.exceptions.ConnectionError("Connection refused") url = "http://example.com/unreachable" result = self.browser.browse(url) self.assertTrue(result.startswith("Error: Connection error occurred")) self.assertIn("Connection refused", result) @patch('src.web_browsing_tool.requests.get') def test_browse_timeout_error(self, mock_get): mock_get.side_effect = requests.exceptions.Timeout("Request timed out") url = "http://example.com/slowresponse" result = self.browser.browse(url) self.assertTrue(result.startswith("Error: Timeout occurred")) self.assertIn("Request timed out", result) @patch('src.web_browsing_tool.requests.get') def test_browse_generic_request_exception(self, mock_get): mock_get.side_effect = requests.exceptions.RequestException("Some other request error") url = "http://example.com/othererror" result = self.browser.browse(url) self.assertTrue(result.startswith("Error: An unexpected error occurred while fetching")) self.assertIn("Some other request error", result) def test_browse_invalid_url_format(self): url = "www.example.com" # Missing http:// or https:// result = self.browser.browse(url) self.assertEqual(result, "Error: Invalid URL format. URL must start with http:// or https://. Received: www.example.com") @patch('src.web_browsing_tool.requests.get') def test_browse_no_text_content(self, mock_get): mock_response = MagicMock() mock_response.status_code = 200 mock_response.content = b"" mock_response.raise_for_status = MagicMock() mock_get.return_value = mock_response url = "http://example.com/notext" result = self.browser.browse(url) self.assertEqual(result, f"Error: No text content found at {url}.") @patch('src.web_browsing_tool.requests.get') def test_browse_strips_extra_whitespace_and_newlines(self, mock_get): mock_response = MagicMock() mock_response.status_code = 200 mock_response.content = b"

Line 1

Line 2

\n\n

Line\n3

Text
" mock_response.raise_for_status = MagicMock() mock_get.return_value = mock_response url = "http://example.com/whitespace" result = self.browser.browse(url) expected_text = "Line 1\nLine 2\nLine\n3\nText" self.assertEqual(result, expected_text) @patch('src.web_browsing_tool.requests.get') def test_browse_for_question_answering_scenario_mercedes_sosa(self, mock_get): """ Tests if the browser can extract relevant text for a question similar to the Mercedes Sosa studio albums count. """ # Use a regular string for HTML content mock_html_content_str = """ Mercedes Sosa Discography

Mercedes Sosa

Studio Albums

Live Albums

""" mock_response = MagicMock() mock_response.status_code = 200 # Encode the string to bytes for the content mock_response.content = mock_html_content_str.encode('utf-8') mock_response.raise_for_status = MagicMock() mock_get.return_value = mock_response url = "http://example.com/mercedes_sosa_discography" result = self.browser.browse(url) # Assert that key information is present in the extracted text self.assertIn("Mercedes Sosa Discography", result) # From title self.assertIn("Studio Albums", result) self.assertIn("1999 - Misa Criolla", result) self.assertIn("2002 - Acústico", result) self.assertIn("2005 - Corazón libre", result) self.assertIn("2009 - Cantora 1", result) self.assertIn("2011 - Canto para caminar", result) # Ensure it doesn't just grab everything indiscriminately or miss sections self.assertIn("Live Albums", result) self.assertIn("2000 - Live in Concert", result) # A further step (outside this tool's direct responsibility but for agent context) # would be to pass this 'result' to an LLM with the question: # "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)?" # The LLM should be able to parse the structured list and count "Acústico", "Corazón libre", "Cantora 1" -> 3. if __name__ == '__main__': unittest.main()