File size: 1,900 Bytes
24062eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
"""
Wikipedia tool based on LlamaIndex's WikipediaToolSpec
https://docs.llamaindex.ai/en/stable/api_reference/tools/wikipedia/
"""
from typing import Dict, Any

import requests
from llama_index.core.tools import FunctionTool
from llama_index.core.tools.tool_spec.base import BaseToolSpec

class CustomWikipediaToolSpec(BaseToolSpec):
    """
    Specifies two tools for querying information from Wikipedia.
    """

    # Define the functions that we export to the LLM
    spec_functions = ["search_data"]

    def load_data(
        self, page: str, lang: str = "en", **load_kwargs: Dict[str, Any]
    ) -> str:
        """
        Retrieve a Wikipedia page. Useful for learning about a particular concept that isn't private information.

        Args:
            page (str): Title of the page to read.
            lang (str): Language of Wikipedia to read. (default: English)
        """
        import wikipedia

        wikipedia.set_lang(lang)
        try:
            wikipedia_page = wikipedia.page(page, **load_kwargs, auto_suggest=False)
        except wikipedia.PageError:
            return "Unable to load page. Try searching instead."
        
        # Return wikitext because the result does not contain tables
        raw_url = wikipedia_page.url + "?action=raw"
        try:
            raw_content = requests.get(raw_url).content
        except:
            return "Unable to load page."
        return raw_content

    def search_data(self, query: str, lang: str = "en") -> str:
        """
        Search Wikipedia for a page related to the given query.

        Args:
            query (str): the string to search for
        """
        import wikipedia

        pages = wikipedia.search(query)
        if len(pages) == 0:
            return "No search results. Try changing your query and use keywords instead of full sentences."
        return self.load_data(pages[0], lang)