riokorb's picture
Updated agent.py to accommodate errors when trying to use LlamaIndex WikipediaReader
f1280b9 verified
raw
history blame
6.27 kB
"""Tool definitions and utility functions for the agent"""
from typing import List, Dict, Any
import os
from dotenv import load_dotenv
from llama_index.core.tools import BaseTool, FunctionTool
from llama_index.readers.wikipedia import WikipediaReader
from llama_index.readers.web import SimpleWebPageReader
from llama_index.core.schema import Document
import wikipedia
# Load environment variables
load_dotenv()
# --- Text Processing Tools ---
def text_reverser(text: str) -> str:
"""
Reverse the given text. Useful for answering questions that are written backwards.
Args:
text: The text to reverse
Returns:
The reversed text
"""
return text[::-1]
# --- Math Tools ---
def simple_calculator(operation: str, a: float, b: float) -> float:
"""
Perform a simple calculation.
Args:
operation: One of 'add', 'subtract', 'multiply', 'divide'
a: First number
b: Second number
Returns:
The result of the calculation
"""
if operation == "add":
return a + b
elif operation == "subtract":
return a - b
elif operation == "multiply":
return a * b
elif operation == "divide":
if b == 0:
raise ValueError("Cannot divide by zero")
return a / b
else:
raise ValueError(f"Unknown operation: {operation}")
# --- Information Retrieval Tools ---
def wikipedia_search(query: str, num_results: int = 2) -> str:
"""
Search Wikipedia for information.
Args:
query: The search query
num_results: Number of results to return (default: 2)
Returns:
A formatted string with the search results
"""
try:
# First try with LlamaIndex WikipediaReader
reader = WikipediaReader()
docs = reader.load_data(query=query, max_docs=num_results)
if docs:
results = []
for i, doc in enumerate(docs, 1):
title = doc.metadata.get("title", "Unknown Title")
content = doc.text[:1000] + "..." if len(doc.text) > 1000 else doc.text
results.append(f"Result {i}: {title}\n{content}\n")
return "\n".join(results)
else:
# If no results from LlamaIndex, try with direct Wikipedia package
print(f"No results from LlamaIndex WikipediaReader for '{query}', trying direct Wikipedia package...")
return _fallback_wikipedia_search(query, num_results)
except Exception as e:
print(f"Error with LlamaIndex WikipediaReader: {str(e)}")
# Fall back to direct Wikipedia package
print(f"Falling back to direct Wikipedia package...")
try:
return _fallback_wikipedia_search(query, num_results)
except Exception as fallback_error:
print(f"Fallback also failed: {fallback_error}")
return f"Error searching Wikipedia: Unable to retrieve information about '{query}'. Please try a different search term or approach."
def _fallback_wikipedia_search(query: str, num_results: int = 2) -> str:
"""
Fallback implementation using the direct Wikipedia package.
"""
# First search for pages
search_results = wikipedia.search(query, results=num_results)
if not search_results:
return f"No Wikipedia results found for '{query}'."
results = []
for i, page_title in enumerate(search_results, 1):
try:
# Get the page content
page = wikipedia.page(page_title)
title = page.title
# Get a summary instead of full content
content = page.summary[:1000] + "..." if len(page.summary) > 1000 else page.summary
results.append(f"Result {i}: {title}\n{content}\n")
except wikipedia.exceptions.DisambiguationError as e:
# Handle disambiguation pages
options = e.options[:5] # Limit to 5 options
results.append(f"Result {i}: Multiple options found for '{page_title}':\n" +
"\n".join([f"- {opt}" for opt in options]))
except wikipedia.exceptions.PageError:
# Skip pages that don't exist
continue
except Exception as e:
results.append(f"Result {i}: Error retrieving information for '{page_title}': {str(e)}")
if not results:
return f"Could not retrieve valid information for '{query}'."
return "\n".join(results)
def web_search(url: str) -> str:
"""
Fetch and extract content from a specific web page.
Args:
url: The URL of the web page to search
Returns:
The extracted content from the web page
"""
try:
reader = SimpleWebPageReader()
docs = reader.load_data(urls=[url])
if not docs:
return f"No content found for URL: {url}"
# Just return the content of the first document
return docs[0].text
except Exception as e:
return f"Error retrieving web page: {str(e)}"
# --- Tool Selection and Routing ---
def get_tools() -> List[BaseTool]:
"""Create and return a list of tools for the agent."""
text_reverser_tool = FunctionTool.from_defaults(
fn=text_reverser,
name="text_reverser",
description="Reverses the given text. Useful for processing reversed questions or text.",
)
calculator_tool = FunctionTool.from_defaults(
fn=simple_calculator,
name="calculator",
description="Performs simple calculations: add, subtract, multiply, divide.",
)
wikipedia_tool = FunctionTool.from_defaults(
fn=wikipedia_search,
name="wikipedia_search",
description="Searches Wikipedia for information on a topic.",
)
web_tool = FunctionTool.from_defaults(
fn=web_search,
name="web_search",
description="Fetches and extracts content from a specific web page.",
)
return [
text_reverser_tool,
calculator_tool,
wikipedia_tool,
web_tool
]
if __name__ == "__main__":
print("This module defines tools for the agent. Run app.py or standalone_debug.py to test the agent.")