Spaces:
Sleeping
Sleeping
Updated agent.py to accommodate errors when trying to use LlamaIndex WikipediaReader
Browse files
agent.py
CHANGED
@@ -8,6 +8,8 @@ from llama_index.readers.wikipedia import WikipediaReader
|
|
8 |
from llama_index.readers.web import SimpleWebPageReader
|
9 |
from llama_index.core.schema import Document
|
10 |
|
|
|
|
|
11 |
# Load environment variables
|
12 |
load_dotenv()
|
13 |
|
@@ -63,20 +65,66 @@ def wikipedia_search(query: str, num_results: int = 2) -> str:
|
|
63 |
A formatted string with the search results
|
64 |
"""
|
65 |
try:
|
|
|
66 |
reader = WikipediaReader()
|
67 |
docs = reader.load_data(query=query, max_docs=num_results)
|
68 |
-
if
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
|
|
|
|
|
|
78 |
except Exception as e:
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
def web_search(url: str) -> str:
|
82 |
"""
|
|
|
8 |
from llama_index.readers.web import SimpleWebPageReader
|
9 |
from llama_index.core.schema import Document
|
10 |
|
11 |
+
import wikipedia
|
12 |
+
|
13 |
# Load environment variables
|
14 |
load_dotenv()
|
15 |
|
|
|
65 |
A formatted string with the search results
|
66 |
"""
|
67 |
try:
|
68 |
+
# First try with LlamaIndex WikipediaReader
|
69 |
reader = WikipediaReader()
|
70 |
docs = reader.load_data(query=query, max_docs=num_results)
|
71 |
+
if docs:
|
72 |
+
results = []
|
73 |
+
for i, doc in enumerate(docs, 1):
|
74 |
+
title = doc.metadata.get("title", "Unknown Title")
|
75 |
+
content = doc.text[:1000] + "..." if len(doc.text) > 1000 else doc.text
|
76 |
+
results.append(f"Result {i}: {title}\n{content}\n")
|
77 |
+
|
78 |
+
return "\n".join(results)
|
79 |
+
else:
|
80 |
+
# If no results from LlamaIndex, try with direct Wikipedia package
|
81 |
+
print(f"No results from LlamaIndex WikipediaReader for '{query}', trying direct Wikipedia package...")
|
82 |
+
return _fallback_wikipedia_search(query, num_results)
|
83 |
+
|
84 |
except Exception as e:
|
85 |
+
print(f"Error with LlamaIndex WikipediaReader: {str(e)}")
|
86 |
+
# Fall back to direct Wikipedia package
|
87 |
+
print(f"Falling back to direct Wikipedia package...")
|
88 |
+
try:
|
89 |
+
return _fallback_wikipedia_search(query, num_results)
|
90 |
+
except Exception as fallback_error:
|
91 |
+
print(f"Fallback also failed: {fallback_error}")
|
92 |
+
return f"Error searching Wikipedia: Unable to retrieve information about '{query}'. Please try a different search term or approach."
|
93 |
+
|
94 |
+
def _fallback_wikipedia_search(query: str, num_results: int = 2) -> str:
|
95 |
+
"""
|
96 |
+
Fallback implementation using the direct Wikipedia package.
|
97 |
+
"""
|
98 |
+
# First search for pages
|
99 |
+
search_results = wikipedia.search(query, results=num_results)
|
100 |
+
|
101 |
+
if not search_results:
|
102 |
+
return f"No Wikipedia results found for '{query}'."
|
103 |
+
|
104 |
+
results = []
|
105 |
+
for i, page_title in enumerate(search_results, 1):
|
106 |
+
try:
|
107 |
+
# Get the page content
|
108 |
+
page = wikipedia.page(page_title)
|
109 |
+
title = page.title
|
110 |
+
# Get a summary instead of full content
|
111 |
+
content = page.summary[:1000] + "..." if len(page.summary) > 1000 else page.summary
|
112 |
+
results.append(f"Result {i}: {title}\n{content}\n")
|
113 |
+
except wikipedia.exceptions.DisambiguationError as e:
|
114 |
+
# Handle disambiguation pages
|
115 |
+
options = e.options[:5] # Limit to 5 options
|
116 |
+
results.append(f"Result {i}: Multiple options found for '{page_title}':\n" +
|
117 |
+
"\n".join([f"- {opt}" for opt in options]))
|
118 |
+
except wikipedia.exceptions.PageError:
|
119 |
+
# Skip pages that don't exist
|
120 |
+
continue
|
121 |
+
except Exception as e:
|
122 |
+
results.append(f"Result {i}: Error retrieving information for '{page_title}': {str(e)}")
|
123 |
+
|
124 |
+
if not results:
|
125 |
+
return f"Could not retrieve valid information for '{query}'."
|
126 |
+
|
127 |
+
return "\n".join(results)
|
128 |
|
129 |
def web_search(url: str) -> str:
|
130 |
"""
|