riokorb commited on
Commit
f1280b9
·
verified ·
1 Parent(s): 882871c

Updated agent.py to accommodate errors when trying to use LlamaIndex WikipediaReader

Browse files
Files changed (1) hide show
  1. agent.py +59 -11
agent.py CHANGED
@@ -8,6 +8,8 @@ from llama_index.readers.wikipedia import WikipediaReader
8
  from llama_index.readers.web import SimpleWebPageReader
9
  from llama_index.core.schema import Document
10
 
 
 
11
  # Load environment variables
12
  load_dotenv()
13
 
@@ -63,20 +65,66 @@ def wikipedia_search(query: str, num_results: int = 2) -> str:
63
  A formatted string with the search results
64
  """
65
  try:
 
66
  reader = WikipediaReader()
67
  docs = reader.load_data(query=query, max_docs=num_results)
68
- if not docs:
69
- return f"No Wikipedia results found for '{query}'."
70
-
71
- results = []
72
- for i, doc in enumerate(docs, 1):
73
- title = doc.metadata.get("title", "Unknown Title")
74
- content = doc.text[:1000] + "..." if len(doc.text) > 1000 else doc.text
75
- results.append(f"Result {i}: {title}\n{content}\n")
76
-
77
- return "\n".join(results)
 
 
 
78
  except Exception as e:
79
- return f"Error searching Wikipedia: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
  def web_search(url: str) -> str:
82
  """
 
8
  from llama_index.readers.web import SimpleWebPageReader
9
  from llama_index.core.schema import Document
10
 
11
+ import wikipedia
12
+
13
  # Load environment variables
14
  load_dotenv()
15
 
 
65
  A formatted string with the search results
66
  """
67
  try:
68
+ # First try with LlamaIndex WikipediaReader
69
  reader = WikipediaReader()
70
  docs = reader.load_data(query=query, max_docs=num_results)
71
+ if docs:
72
+ results = []
73
+ for i, doc in enumerate(docs, 1):
74
+ title = doc.metadata.get("title", "Unknown Title")
75
+ content = doc.text[:1000] + "..." if len(doc.text) > 1000 else doc.text
76
+ results.append(f"Result {i}: {title}\n{content}\n")
77
+
78
+ return "\n".join(results)
79
+ else:
80
+ # If no results from LlamaIndex, try with direct Wikipedia package
81
+ print(f"No results from LlamaIndex WikipediaReader for '{query}', trying direct Wikipedia package...")
82
+ return _fallback_wikipedia_search(query, num_results)
83
+
84
  except Exception as e:
85
+ print(f"Error with LlamaIndex WikipediaReader: {str(e)}")
86
+ # Fall back to direct Wikipedia package
87
+ print(f"Falling back to direct Wikipedia package...")
88
+ try:
89
+ return _fallback_wikipedia_search(query, num_results)
90
+ except Exception as fallback_error:
91
+ print(f"Fallback also failed: {fallback_error}")
92
+ return f"Error searching Wikipedia: Unable to retrieve information about '{query}'. Please try a different search term or approach."
93
+
94
+ def _fallback_wikipedia_search(query: str, num_results: int = 2) -> str:
95
+ """
96
+ Fallback implementation using the direct Wikipedia package.
97
+ """
98
+ # First search for pages
99
+ search_results = wikipedia.search(query, results=num_results)
100
+
101
+ if not search_results:
102
+ return f"No Wikipedia results found for '{query}'."
103
+
104
+ results = []
105
+ for i, page_title in enumerate(search_results, 1):
106
+ try:
107
+ # Get the page content
108
+ page = wikipedia.page(page_title)
109
+ title = page.title
110
+ # Get a summary instead of full content
111
+ content = page.summary[:1000] + "..." if len(page.summary) > 1000 else page.summary
112
+ results.append(f"Result {i}: {title}\n{content}\n")
113
+ except wikipedia.exceptions.DisambiguationError as e:
114
+ # Handle disambiguation pages
115
+ options = e.options[:5] # Limit to 5 options
116
+ results.append(f"Result {i}: Multiple options found for '{page_title}':\n" +
117
+ "\n".join([f"- {opt}" for opt in options]))
118
+ except wikipedia.exceptions.PageError:
119
+ # Skip pages that don't exist
120
+ continue
121
+ except Exception as e:
122
+ results.append(f"Result {i}: Error retrieving information for '{page_title}': {str(e)}")
123
+
124
+ if not results:
125
+ return f"Could not retrieve valid information for '{query}'."
126
+
127
+ return "\n".join(results)
128
 
129
  def web_search(url: str) -> str:
130
  """