naman1102 commited on
Commit
a1dc7ba
·
1 Parent(s): 9ea2377

Update tools.py

Browse files
Files changed (1) hide show
  1. tools.py +55 -52
tools.py CHANGED
@@ -9,7 +9,7 @@ import time
9
  import os
10
  from duckduckgo_search import DDGS
11
  from langchain_core.tools import tool
12
- from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
13
  import arxiv
14
  import fitz # PyMuPDF
15
  import tempfile
@@ -211,63 +211,66 @@ def wikipedia_search_tool(wiki_query: str) -> str:
211
  """
212
  TOOL NAME: Wikipedia Search Tool
213
 
214
- Purpose: When the user asks for historical, biographical, scientific, or factual information, use this tool.
 
 
215
 
216
- Input: A string describing a topic to search on Wikipedia.
217
- Tips: If you dont get enough information, try again with a different querry
218
  Example usage:
219
- - "Who was Marie Curie?"
220
- - "Explain quantum entanglement"
221
- - "Tell me about the French Revolution"
 
222
  """
223
- print("Reached Wikipedia tool, with query = ", wiki_query)
 
 
 
 
224
  try:
225
- docs = WikipediaLoader(query=wiki_query, load_max_docs=3).load() # Reduced from 5 to 3
226
-
227
- result = ""
228
- counter = 1
229
- for doc in docs:
230
- # Handle different metadata structures
231
- title = "Unknown Title"
232
- if hasattr(doc, 'metadata') and doc.metadata:
233
- # Try different possible title keys
234
- if 'title' in doc.metadata:
235
- title = doc.metadata['title']
236
- elif 'Title' in doc.metadata:
237
- title = doc.metadata['Title']
238
- elif 'source' in doc.metadata:
239
- title = doc.metadata['source']
240
- else:
241
- # Use first available key as title
242
- if doc.metadata:
243
- first_key = list(doc.metadata.keys())[0]
244
- title = f"Wikipedia: {doc.metadata[first_key]}"
245
-
246
- # Trim content to key information only (reduced from 2000 to 800 characters)
247
- content = doc.page_content[:800] if len(doc.page_content) > 800 else doc.page_content
248
-
249
- # Add document but keep it concise
250
- result += f"\n\nWikipedia Result {counter}: {title}\nSummary: {content}..."
251
- counter += 1
252
-
253
- # Stop after 2 documents to keep response manageable
254
- if counter > 2:
255
- break
256
-
257
- if not result.strip():
258
- print("No wiki result found")
259
- return "No Wikipedia results found for the given query. [END_OF_SEARCH]"
260
-
261
- # Add clear end marker
262
- result += "\n\n[END_OF_SEARCH] - Wikipedia search complete. Use this information to answer the question."
263
-
264
- print("Wikipedia search completed successfully")
265
- return result
266
 
 
 
 
267
  except Exception as e:
268
- error_msg = f"Error during Wikipedia search: {str(e)} [END_OF_SEARCH]"
269
- return error_msg
270
-
271
  @tool
272
  def arxiv_search_tool(query: str) -> str:
273
  """
 
9
  import os
10
  from duckduckgo_search import DDGS
11
  from langchain_core.tools import tool
12
+ from langchain_community.document_loaders import ArxivLoader
13
  import arxiv
14
  import fitz # PyMuPDF
15
  import tempfile
 
211
  """
212
  TOOL NAME: Wikipedia Search Tool
213
 
214
+ Purpose: When the user asks about general knowledge, facts, or wants to know about a specific topic, use this tool.
215
+
216
+ Input: A string describing the topic to search for on Wikipedia.
217
 
 
 
218
  Example usage:
219
+ - "What is the capital of France?"
220
+ - "Find information about quantum computing"
221
+ - "What is the history of the internet?"
222
+ If no valid wiki_query is provided, returns {}.
223
  """
224
+ print("reached wikipedia search tool")
225
+ query = wiki_query
226
+ if not query:
227
+ return {}
228
+
229
  try:
230
+ # 1) Use the MediaWiki API to search for page titles matching the query
231
+ search_params = {
232
+ "action": "query",
233
+ "list": "search",
234
+ "srsearch": query,
235
+ "format": "json",
236
+ "utf8": 1
237
+ }
238
+ search_resp = requests.get("https://en.wikipedia.org/w/api.php", params=search_params, timeout=10)
239
+ search_resp.raise_for_status()
240
+ search_data = search_resp.json()
241
+
242
+ search_results = search_data.get("query", {}).get("search", [])
243
+ # print("wikipedia: search_results",search_results)
244
+ if not search_results:
245
+ print(f"No Wikipedia page found for '{query}'.")
246
+ return f"No Wikipedia page found for '{query}'."
247
+
248
+ # 2) Take the first search result's title
249
+ first_title = search_results[0].get("title", "")
250
+ if not first_title:
251
+ print("Unexpected format from Wikipedia search.")
252
+ return "Unexpected format from Wikipedia search."
253
+
254
+ # 3) Fetch the page summary for that title via the REST summary endpoint
255
+ title_for_url = requests.utils.requote_uri(first_title)
256
+ summary_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{title_for_url}"
257
+ summary_resp = requests.get(summary_url, timeout=10)
258
+ summary_resp.raise_for_status()
259
+ summary_data = summary_resp.json()
260
+
261
+ # 4) Extract either the "extract" field or a fallback message
262
+ summary_text = summary_data.get("extract")
263
+ if not summary_text:
264
+ summary_text = summary_data.get("description", "No summary available.")
265
+ print(f"Title: {first_title}\n\n{summary_text}")
266
+ return f"Title: {first_title}\n\n{summary_text}"
 
 
 
 
267
 
268
+
269
+ except requests.exceptions.RequestException as e:
270
+ return f"Wikipedia search error: {e}"
271
  except Exception as e:
272
+ return f"Unexpected error in wikipedia_search_tool: {e}"
273
+
 
274
  @tool
275
  def arxiv_search_tool(query: str) -> str:
276
  """