Final_Assignment_Template

Running

App Files Files Community

mjschock commited on Apr 29

Commit

50aafe2

unverified ·

1 Parent(s): ea0c151

Update requirements.txt to add new dependencies for enhanced functionality, including kagglehub, langchain, and llama-index packages. Refactor SmartSearchTool in tool.py to replace Wikipedia search with a webpage visiting approach, improving content retrieval from web search results. Update tool description and logging for better clarity and usability.

Browse files

Files changed (2) hide show

requirements.txt +9 -5
tools/smart_search/tool.py +23 -34

requirements.txt CHANGED Viewed

@@ -1,17 +1,21 @@
-black>=25.1.0
 duckduckgo-search>=8.0.1
 gradio[oauth]>=5.26.0
 isort>=6.0.1
 langgraph>=0.3.34
 litellm>=1.10.0
 pytest>=8.3.5
 pytest-cov>=6.1.1
 python-dotenv>=1.0.0
 requests>=2.32.3
 smolagents[litellm,telemetry]>=1.14.0
 typing-extensions>=4.5.0
 wikipedia-api>=0.8.1
-langchain>=0.1.0
-langchain-community>=0.0.10
-pandas>=2.0.0
-Wikipedia-API>=0.8.1

 duckduckgo-search>=8.0.1
 gradio[oauth]>=5.26.0
 isort>=6.0.1
+kagglehub>=0.3.12
+langchain>=0.1.0
+langchain-community>=0.0.10
 langgraph>=0.3.34
 litellm>=1.10.0
+llama-index>=0.12.33
+llama-index-embeddings-huggingface>=0.5.3
+llama-index-readers-wikipedia>=0.3.0
+mlcroissant>=1.0.17
+pandas>=2.0.0
 pytest>=8.3.5
 pytest-cov>=6.1.1
 python-dotenv>=1.0.0
 requests>=2.32.3
 smolagents[litellm,telemetry]>=1.14.0
 typing-extensions>=4.5.0
+wikipedia>=1.4.0
 wikipedia-api>=0.8.1

tools/smart_search/tool.py CHANGED Viewed

@@ -1,60 +1,49 @@
 import logging
 import re
 from smolagents import Tool
-from smolagents.default_tools import DuckDuckGoSearchTool, WikipediaSearchTool
 logger = logging.getLogger(__name__)
 class SmartSearchTool(Tool):
     name = "smart_search"
-    description = """A smart search tool that first performs a web search and then, if a Wikipedia article is found,
-    uses Wikipedia search for more reliable information."""
     inputs = {"query": {"type": "string", "description": "The search query to find information"}}
     output_type = "string"
     def __init__(self):
         super().__init__()
         self.web_search_tool = DuckDuckGoSearchTool(max_results=1)
-        self.wiki_tool = WikipediaSearchTool(
-            user_agent="SmartSearchTool ([email protected])",
-            language="en",
-            # content_type="summary",
-            content_type="text",
-            extract_format="WIKI"
-        )
     def forward(self, query: str) -> str:
         logger.info(f"Starting smart search for query: {query}")
-        # First perform a web search with a single result
         web_result = self.web_search_tool.forward(query)
         logger.info(f"Web search result: {web_result[:100]}...")
-        # Check if the result contains a Wikipedia link
-        if "wikipedia.org" in web_result.lower():
-            logger.info("Wikipedia link found in web search results")
-            # Extract the Wikipedia page title from the URL using regex
-            wiki_match = re.search(r'wikipedia\.org/wiki/([^)\s]+)', web_result)
-            if wiki_match:
-                wiki_title = wiki_match.group(1)
-                logger.info(f"Extracted Wikipedia title: {wiki_title}")
-                # Use Wikipedia search for more reliable information
-                wiki_result = self.wiki_tool.forward(wiki_title)
-                logger.info(f"Wikipedia search result: {wiki_result[:100]}...")
-                if wiki_result and "No Wikipedia page found" not in wiki_result:
-                    logger.info("Successfully retrieved Wikipedia content")
-                    return f"Web search result:\n{web_result}\n\nWikipedia result:\n{wiki_result}"
-                else:
-                    logger.warning("Wikipedia search failed or returned no results")
-            else:
-                logger.warning("Could not extract Wikipedia title from URL")
-        # If no Wikipedia link was found or Wikipedia search failed, return the web search result
-        logger.info("Returning web search result only")
-        return f"Web search result:\n{web_result}"
 def main(query: str) -> str:

 import logging
 import re
 from smolagents import Tool
+from smolagents.default_tools import DuckDuckGoSearchTool, VisitWebpageTool
 logger = logging.getLogger(__name__)
 class SmartSearchTool(Tool):
     name = "smart_search"
+    description = """A smart search tool that first performs a web search and then visits each URL to get its content."""
     inputs = {"query": {"type": "string", "description": "The search query to find information"}}
     output_type = "string"
     def __init__(self):
         super().__init__()
         self.web_search_tool = DuckDuckGoSearchTool(max_results=1)
+        self.visit_webpage_tool = VisitWebpageTool(max_output_length=-1)
     def forward(self, query: str) -> str:
         logger.info(f"Starting smart search for query: {query}")
+        # Get web search results
         web_result = self.web_search_tool.forward(query)
         logger.info(f"Web search result: {web_result[:100]}...")
+        # Extract URLs from the web search result
+        urls = re.findall(r'https?://[^\s)]+', web_result)
+        if not urls:
+            logger.info("No URLs found in web search result")
+            return f"Web search result:\n{web_result}"
+        # Visit each URL and get its content
+        contents = []
+        for url in urls:
+            logger.info(f"Visiting URL: {url}")
+            try:
+                content = self.visit_webpage_tool.forward(url)
+                if content:
+                    contents.append(f"\nContent from {url}:\n{content}")
+            except Exception as e:
+                logger.warning(f"Error visiting {url}: {e}")
+                contents.append(f"\nError visiting {url}: {e}")
+        # Combine all results
+        return f"Web search result:\n{web_result}\n" + "\n".join(contents)
 def main(query: str) -> str: