New_Final_Assignment

Sleeping

App Files Files Community

naman1102 commited on Jun 11

Commit

a1dc7ba

1 Parent(s): 9ea2377

Update tools.py

Browse files

Files changed (1) hide show

tools.py +55 -52

tools.py CHANGED Viewed

@@ -9,7 +9,7 @@ import time
 import os
 from duckduckgo_search import DDGS
 from langchain_core.tools import tool
-from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
 import arxiv
 import fitz  # PyMuPDF
 import tempfile
@@ -211,63 +211,66 @@ def wikipedia_search_tool(wiki_query: str) -> str:
     """
     TOOL NAME: Wikipedia Search Tool
-    Purpose: When the user asks for historical, biographical, scientific, or factual information, use this tool.
-    Input: A string describing a topic to search on Wikipedia.
-    Tips: If you dont get enough information, try again with a different querry
     Example usage:
-    - "Who was Marie Curie?"
-    - "Explain quantum entanglement"
-    - "Tell me about the French Revolution"
     """
-    print("Reached Wikipedia tool, with query = ", wiki_query)
     try:
-        docs = WikipediaLoader(query=wiki_query, load_max_docs=3).load()  # Reduced from 5 to 3
-        result = ""
-        counter = 1
-        for doc in docs:
-            # Handle different metadata structures
-            title = "Unknown Title"
-            if hasattr(doc, 'metadata') and doc.metadata:
-                # Try different possible title keys
-                if 'title' in doc.metadata:
-                    title = doc.metadata['title']
-                elif 'Title' in doc.metadata:
-                    title = doc.metadata['Title']
-                elif 'source' in doc.metadata:
-                    title = doc.metadata['source']
-                else:
-                    # Use first available key as title
-                    if doc.metadata:
-                        first_key = list(doc.metadata.keys())[0]
-                        title = f"Wikipedia: {doc.metadata[first_key]}"
-            # Trim content to key information only (reduced from 2000 to 800 characters)
-            content = doc.page_content[:800] if len(doc.page_content) > 800 else doc.page_content
-            # Add document but keep it concise
-            result += f"\n\nWikipedia Result {counter}: {title}\nSummary: {content}..."
-            counter += 1
-            # Stop after 2 documents to keep response manageable
-            if counter > 2:
-                break
-        if not result.strip():
-            print("No wiki result found")
-            return "No Wikipedia results found for the given query. [END_OF_SEARCH]"
-        # Add clear end marker
-        result += "\n\n[END_OF_SEARCH] - Wikipedia search complete. Use this information to answer the question."
-        print("Wikipedia search completed successfully")
-        return result
     except Exception as e:
-        error_msg = f"Error during Wikipedia search: {str(e)} [END_OF_SEARCH]"
-        return error_msg
 @tool
 def arxiv_search_tool(query: str) -> str:
     """

 import os
 from duckduckgo_search import DDGS
 from langchain_core.tools import tool
+from langchain_community.document_loaders import ArxivLoader
 import arxiv
 import fitz  # PyMuPDF
 import tempfile
     """
     TOOL NAME: Wikipedia Search Tool
+    Purpose: When the user asks about general knowledge, facts, or wants to know about a specific topic, use this tool.
+    Input: A string describing the topic to search for on Wikipedia.
     Example usage:
+    - "What is the capital of France?"
+    - "Find information about quantum computing"
+    - "What is the history of the internet?"
+    If no valid wiki_query is provided, returns {}.
     """
+    print("reached wikipedia search tool")
+    query = wiki_query
+    if not query:
+        return {}
     try:
+        # 1) Use the MediaWiki API to search for page titles matching the query
+        search_params = {
+            "action": "query",
+            "list": "search",
+            "srsearch": query,
+            "format": "json",
+            "utf8": 1
+        }
+        search_resp = requests.get("https://en.wikipedia.org/w/api.php", params=search_params, timeout=10)
+        search_resp.raise_for_status()
+        search_data = search_resp.json()
+        search_results = search_data.get("query", {}).get("search", [])
+        # print("wikipedia: search_results",search_results)
+        if not search_results:
+            print(f"No Wikipedia page found for '{query}'.")
+            return f"No Wikipedia page found for '{query}'."
+        # 2) Take the first search result's title
+        first_title = search_results[0].get("title", "")
+        if not first_title:
+            print("Unexpected format from Wikipedia search.")
+            return "Unexpected format from Wikipedia search."
+        # 3) Fetch the page summary for that title via the REST summary endpoint
+        title_for_url = requests.utils.requote_uri(first_title)
+        summary_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{title_for_url}"
+        summary_resp = requests.get(summary_url, timeout=10)
+        summary_resp.raise_for_status()
+        summary_data = summary_resp.json()
+        # 4) Extract either the "extract" field or a fallback message
+        summary_text = summary_data.get("extract")
+        if not summary_text:
+            summary_text = summary_data.get("description", "No summary available.")
+        print(f"Title: {first_title}\n\n{summary_text}")
+        return f"Title: {first_title}\n\n{summary_text}"
+    except requests.exceptions.RequestException as e:
+        return f"Wikipedia search error: {e}"
     except Exception as e:
+        return f"Unexpected error in wikipedia_search_tool: {e}"
 @tool
 def arxiv_search_tool(query: str) -> str:
     """