File size: 7,258 Bytes
c467d81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import unittest
from unittest.mock import patch, MagicMock
import requests # Import requests for its exception types

import os
import sys

# Add the parent directory to sys.path to find the src module
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

# Adjust the import path based on your project structure
# If web_browser.py is in a 'src' directory:
from src.web_browsing_tool import WebBrowser

# If web_browser.py is in the same directory as app.py (and tools are in a 'tools' subdir):
# from tools.web_browser import WebBrowser

class TestWebBrowser(unittest.TestCase):

    def setUp(self):
        self.browser = WebBrowser(user_agent="TestAgent/1.0")

    @patch('src.web_browsing_tool.requests.get')
    def test_browse_successful_fetch_and_parse(self, mock_get):
        # Mock the response from requests.get
        mock_response = MagicMock()
        mock_response.status_code = 200
        mock_response.content = b"<html><head><title>Test Page</title></head><body><p>Hello World!</p><script>alert('test');</script></body></html>"
        mock_response.raise_for_status = MagicMock() # Ensure this doesn't raise an error
        mock_get.return_value = mock_response

        url = "http://example.com/testpage"
        result = self.browser.browse(url)

        mock_get.assert_called_once_with(url, headers={"User-Agent": "TestAgent/1.0"}, timeout=15)
        self.assertEqual(result, "Test Page\nHello World!")

    @patch('src.web_browsing_tool.requests.get')
    def test_browse_http_error(self, mock_get):
        # Mock requests.get to raise an HTTPError
        mock_get.side_effect = requests.exceptions.HTTPError("404 Client Error: Not Found for url")

        url = "http://example.com/notfound"
        result = self.browser.browse(url)

        mock_get.assert_called_once_with(url, headers={"User-Agent": "TestAgent/1.0"}, timeout=15)
        self.assertTrue(result.startswith("Error: HTTP error occurred"))
        self.assertIn("404 Client Error", result)

    @patch('src.web_browsing_tool.requests.get')
    def test_browse_connection_error(self, mock_get):
        mock_get.side_effect = requests.exceptions.ConnectionError("Connection refused")

        url = "http://example.com/unreachable"
        result = self.browser.browse(url)
        self.assertTrue(result.startswith("Error: Connection error occurred"))
        self.assertIn("Connection refused", result)

    @patch('src.web_browsing_tool.requests.get')
    def test_browse_timeout_error(self, mock_get):
        mock_get.side_effect = requests.exceptions.Timeout("Request timed out")

        url = "http://example.com/slowresponse"
        result = self.browser.browse(url)
        self.assertTrue(result.startswith("Error: Timeout occurred"))
        self.assertIn("Request timed out", result)

    @patch('src.web_browsing_tool.requests.get')
    def test_browse_generic_request_exception(self, mock_get):
        mock_get.side_effect = requests.exceptions.RequestException("Some other request error")

        url = "http://example.com/othererror"
        result = self.browser.browse(url)
        self.assertTrue(result.startswith("Error: An unexpected error occurred while fetching"))
        self.assertIn("Some other request error", result)

    def test_browse_invalid_url_format(self):
        url = "www.example.com" # Missing http:// or https://
        result = self.browser.browse(url)
        self.assertEqual(result, "Error: Invalid URL format. URL must start with http:// or https://. Received: www.example.com")

    @patch('src.web_browsing_tool.requests.get')
    def test_browse_no_text_content(self, mock_get):
        mock_response = MagicMock()
        mock_response.status_code = 200
        mock_response.content = b"<html><head><script>var x=1;</script></head><body><style>.body {color:red;}</style></body></html>"
        mock_response.raise_for_status = MagicMock()
        mock_get.return_value = mock_response

        url = "http://example.com/notext"
        result = self.browser.browse(url)
        self.assertEqual(result, f"Error: No text content found at {url}.")

    @patch('src.web_browsing_tool.requests.get')
    def test_browse_strips_extra_whitespace_and_newlines(self, mock_get):
        mock_response = MagicMock()
        mock_response.status_code = 200
        mock_response.content = b"<html><body><p>Line 1</p>  <p>Line  2</p>\n\n<p>Line\n3</p><div><span>Text</span></div></body></html>"
        mock_response.raise_for_status = MagicMock()
        mock_get.return_value = mock_response

        url = "http://example.com/whitespace"
        result = self.browser.browse(url)
        expected_text = "Line 1\nLine 2\nLine\n3\nText"
        self.assertEqual(result, expected_text)

    @patch('src.web_browsing_tool.requests.get')
    def test_browse_for_question_answering_scenario_mercedes_sosa(self, mock_get):
        """
        Tests if the browser can extract relevant text for a question
        similar to the Mercedes Sosa studio albums count.
        """
        # Use a regular string for HTML content
        mock_html_content_str = """
        <html>
            <head><title>Mercedes Sosa Discography</title></head>
            <body>
                <h1>Mercedes Sosa</h1>
                <h2>Studio Albums</h2>
                <ul>
                    <li>1999 - Misa Criolla</li>
                    <li>2002 - Ac煤stico</li>
                    <li>2005 - Coraz贸n libre</li>
                    <li>2009 - Cantora 1</li>
                    <li>2011 - Canto para caminar</li>
                </ul>
                <h2>Live Albums</h2>
                <ul>
                    <li>2000 - Live in Concert</li>
                </ul>
            </body>
        </html>
        """
        mock_response = MagicMock()
        mock_response.status_code = 200
        # Encode the string to bytes for the content
        mock_response.content = mock_html_content_str.encode('utf-8')
        mock_response.raise_for_status = MagicMock()
        mock_get.return_value = mock_response

        url = "http://example.com/mercedes_sosa_discography"
        result = self.browser.browse(url)

        # Assert that key information is present in the extracted text
        self.assertIn("Mercedes Sosa Discography", result) # From title
        self.assertIn("Studio Albums", result)
        self.assertIn("1999 - Misa Criolla", result)
        self.assertIn("2002 - Ac煤stico", result)
        self.assertIn("2005 - Coraz贸n libre", result)
        self.assertIn("2009 - Cantora 1", result)
        self.assertIn("2011 - Canto para caminar", result)
        
        # Ensure it doesn't just grab everything indiscriminately or miss sections
        self.assertIn("Live Albums", result)
        self.assertIn("2000 - Live in Concert", result)

        # A further step (outside this tool's direct responsibility but for agent context)
        # would be to pass this 'result' to an LLM with the question:
        # "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)?"
        # The LLM should be able to parse the structured list and count "Ac煤stico", "Coraz贸n libre", "Cantora 1" -> 3.

if __name__ == '__main__':
    unittest.main()