Spaces:
Build error
Build error
Update requirements.txt to add new dependencies for enhanced functionality, including kagglehub, langchain, and llama-index packages. Refactor SmartSearchTool in tool.py to replace Wikipedia search with a webpage visiting approach, improving content retrieval from web search results. Update tool description and logging for better clarity and usability.
Browse files- requirements.txt +9 -5
- tools/smart_search/tool.py +23 -34
requirements.txt
CHANGED
@@ -1,17 +1,21 @@
|
|
1 |
-
black>=25.1.0
|
2 |
duckduckgo-search>=8.0.1
|
3 |
gradio[oauth]>=5.26.0
|
4 |
isort>=6.0.1
|
|
|
|
|
|
|
5 |
langgraph>=0.3.34
|
6 |
litellm>=1.10.0
|
|
|
|
|
|
|
|
|
|
|
7 |
pytest>=8.3.5
|
8 |
pytest-cov>=6.1.1
|
9 |
python-dotenv>=1.0.0
|
10 |
requests>=2.32.3
|
11 |
smolagents[litellm,telemetry]>=1.14.0
|
12 |
typing-extensions>=4.5.0
|
|
|
13 |
wikipedia-api>=0.8.1
|
14 |
-
langchain>=0.1.0
|
15 |
-
langchain-community>=0.0.10
|
16 |
-
pandas>=2.0.0
|
17 |
-
Wikipedia-API>=0.8.1
|
|
|
|
|
1 |
duckduckgo-search>=8.0.1
|
2 |
gradio[oauth]>=5.26.0
|
3 |
isort>=6.0.1
|
4 |
+
kagglehub>=0.3.12
|
5 |
+
langchain>=0.1.0
|
6 |
+
langchain-community>=0.0.10
|
7 |
langgraph>=0.3.34
|
8 |
litellm>=1.10.0
|
9 |
+
llama-index>=0.12.33
|
10 |
+
llama-index-embeddings-huggingface>=0.5.3
|
11 |
+
llama-index-readers-wikipedia>=0.3.0
|
12 |
+
mlcroissant>=1.0.17
|
13 |
+
pandas>=2.0.0
|
14 |
pytest>=8.3.5
|
15 |
pytest-cov>=6.1.1
|
16 |
python-dotenv>=1.0.0
|
17 |
requests>=2.32.3
|
18 |
smolagents[litellm,telemetry]>=1.14.0
|
19 |
typing-extensions>=4.5.0
|
20 |
+
wikipedia>=1.4.0
|
21 |
wikipedia-api>=0.8.1
|
|
|
|
|
|
|
|
tools/smart_search/tool.py
CHANGED
@@ -1,60 +1,49 @@
|
|
1 |
import logging
|
2 |
import re
|
3 |
from smolagents import Tool
|
4 |
-
from smolagents.default_tools import DuckDuckGoSearchTool,
|
5 |
|
6 |
logger = logging.getLogger(__name__)
|
7 |
|
8 |
|
9 |
class SmartSearchTool(Tool):
|
10 |
name = "smart_search"
|
11 |
-
description = """A smart search tool that first performs a web search and then
|
12 |
-
uses Wikipedia search for more reliable information."""
|
13 |
inputs = {"query": {"type": "string", "description": "The search query to find information"}}
|
14 |
output_type = "string"
|
15 |
|
16 |
def __init__(self):
|
17 |
super().__init__()
|
18 |
self.web_search_tool = DuckDuckGoSearchTool(max_results=1)
|
19 |
-
self.
|
20 |
-
user_agent="SmartSearchTool ([email protected])",
|
21 |
-
language="en",
|
22 |
-
# content_type="summary",
|
23 |
-
content_type="text",
|
24 |
-
extract_format="WIKI"
|
25 |
-
)
|
26 |
|
27 |
def forward(self, query: str) -> str:
|
28 |
logger.info(f"Starting smart search for query: {query}")
|
29 |
|
30 |
-
#
|
31 |
web_result = self.web_search_tool.forward(query)
|
32 |
logger.info(f"Web search result: {web_result[:100]}...")
|
33 |
|
34 |
-
#
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
if wiki_match:
|
40 |
-
wiki_title = wiki_match.group(1)
|
41 |
-
logger.info(f"Extracted Wikipedia title: {wiki_title}")
|
42 |
-
|
43 |
-
# Use Wikipedia search for more reliable information
|
44 |
-
wiki_result = self.wiki_tool.forward(wiki_title)
|
45 |
-
logger.info(f"Wikipedia search result: {wiki_result[:100]}...")
|
46 |
-
|
47 |
-
if wiki_result and "No Wikipedia page found" not in wiki_result:
|
48 |
-
logger.info("Successfully retrieved Wikipedia content")
|
49 |
-
return f"Web search result:\n{web_result}\n\nWikipedia result:\n{wiki_result}"
|
50 |
-
else:
|
51 |
-
logger.warning("Wikipedia search failed or returned no results")
|
52 |
-
else:
|
53 |
-
logger.warning("Could not extract Wikipedia title from URL")
|
54 |
|
55 |
-
#
|
56 |
-
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
|
60 |
def main(query: str) -> str:
|
|
|
1 |
import logging
|
2 |
import re
|
3 |
from smolagents import Tool
|
4 |
+
from smolagents.default_tools import DuckDuckGoSearchTool, VisitWebpageTool
|
5 |
|
6 |
logger = logging.getLogger(__name__)
|
7 |
|
8 |
|
9 |
class SmartSearchTool(Tool):
|
10 |
name = "smart_search"
|
11 |
+
description = """A smart search tool that first performs a web search and then visits each URL to get its content."""
|
|
|
12 |
inputs = {"query": {"type": "string", "description": "The search query to find information"}}
|
13 |
output_type = "string"
|
14 |
|
15 |
def __init__(self):
|
16 |
super().__init__()
|
17 |
self.web_search_tool = DuckDuckGoSearchTool(max_results=1)
|
18 |
+
self.visit_webpage_tool = VisitWebpageTool(max_output_length=-1)
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
def forward(self, query: str) -> str:
|
21 |
logger.info(f"Starting smart search for query: {query}")
|
22 |
|
23 |
+
# Get web search results
|
24 |
web_result = self.web_search_tool.forward(query)
|
25 |
logger.info(f"Web search result: {web_result[:100]}...")
|
26 |
|
27 |
+
# Extract URLs from the web search result
|
28 |
+
urls = re.findall(r'https?://[^\s)]+', web_result)
|
29 |
+
if not urls:
|
30 |
+
logger.info("No URLs found in web search result")
|
31 |
+
return f"Web search result:\n{web_result}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
+
# Visit each URL and get its content
|
34 |
+
contents = []
|
35 |
+
for url in urls:
|
36 |
+
logger.info(f"Visiting URL: {url}")
|
37 |
+
try:
|
38 |
+
content = self.visit_webpage_tool.forward(url)
|
39 |
+
if content:
|
40 |
+
contents.append(f"\nContent from {url}:\n{content}")
|
41 |
+
except Exception as e:
|
42 |
+
logger.warning(f"Error visiting {url}: {e}")
|
43 |
+
contents.append(f"\nError visiting {url}: {e}")
|
44 |
+
|
45 |
+
# Combine all results
|
46 |
+
return f"Web search result:\n{web_result}\n" + "\n".join(contents)
|
47 |
|
48 |
|
49 |
def main(query: str) -> str:
|