File size: 4,172 Bytes
73152bb
c02d868
ae12037
a9e9116
667e88a
ae12037
c02d868
ae12037
a9e9116
73152bb
a9e9116
 
ae12037
f51a39b
 
 
 
ae12037
 
a9e9116
 
 
 
 
f51a39b
73152bb
a9e9116
 
73152bb
 
a9e9116
f51a39b
a9e9116
 
73152bb
a9e9116
 
 
f51a39b
 
 
a9e9116
 
 
 
 
 
 
 
 
 
 
 
 
 
f51a39b
73152bb
a9e9116
f51a39b
73152bb
 
 
f51a39b
 
 
73152bb
 
 
 
f51a39b
73152bb
 
 
 
 
 
 
 
 
 
 
 
 
 
a9e9116
 
73152bb
 
 
 
 
 
 
ae12037
f51a39b
a9e9116
f51a39b
 
427cae4
ae12037
f51a39b
 
 
 
 
73152bb
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import os
import requests
import urllib.parse
from bs4 import BeautifulSoup

class DuckDuckGoAgent:
    def __init__(self):
        print("DuckDuckGoAgent initialized.")
        self.headers = {
            "User-Agent": "Mozilla/5.0"
        }

    def get_duckduckgo_answer(self, query: str) -> str:
        """
        Attempt to get an answer from the DuckDuckGo API.
        If no abstract text is found, fall back to scraping.
        """
        search_query = urllib.parse.quote(query)
        url = f"https://api.duckduckgo.com/?q={search_query}&format=json&no_html=1&skip_disambig=1"

        try:
            response = requests.get(url, timeout=10)
            if response.status_code == 200:
                data = response.json()
                # If AbstractText exists and is non-empty, return it
                if 'AbstractText' in data and data['AbstractText']:
                    return data['AbstractText'][:200]
                else:
                    print("No abstract found, falling back to scraping.")
                    return self.scrape_duckduckgo(query)
            else:
                print(f"DuckDuckGo API failed with status: {response.status_code}")
                return self.scrape_duckduckgo(query)
        except Exception as e:
            print(f"Error contacting DuckDuckGo API: {e}")
            return self.scrape_duckduckgo(query)

    def scrape_duckduckgo(self, query: str) -> str:
        """
        Fallback to scraping DuckDuckGo search results if API fails or no abstract found.
        """
        print("Using fallback: scraping HTML results.")
        try:
            response = requests.post(
                "https://html.duckduckgo.com/html/",
                data={"q": query},
                headers=self.headers,
                timeout=10
            )
            soup = BeautifulSoup(response.text, "html.parser")
            snippets = soup.select(".result__snippet")
            for s in snippets:
                text = s.get_text().strip()
                if text:
                    return text[:200]
            print("No useful snippets found, falling back to Hugging Face LLM.")
            return self.call_huggingface_llm(query)
        except Exception as e:
            print(f"Error scraping DuckDuckGo: {e}")
            return self.call_huggingface_llm(query)

    def call_huggingface_llm(self, prompt: str) -> str:
        """
        Fallback to Hugging Face LLM if DuckDuckGo API and scraping both fail.
        """
        hf_api_key = os.getenv("HF_API_TOKEN")
        model = "mistralai/Mistral-7B-Instruct-v0.1"

        if not hf_api_key:
            return "Error: Hugging Face API Token is not configured."

        url = f"https://api-inference.huggingface.co/models/{model}"
        headers = {
            "Authorization": f"Bearer {hf_api_key}",
            "Content-Type": "application/json"
        }

        payload = {
            "inputs": prompt,
            "parameters": {
                "max_new_tokens": 200,
                "temperature": 0.7
            }
        }

        try:
            response = requests.post(url, headers=headers, json=payload, timeout=30)
            response.raise_for_status()
            output = response.json()
            if isinstance(output, list) and "generated_text" in output[0]:
                return output[0]["generated_text"].strip()[:200]
            elif isinstance(output, dict) and "error" in output:
                return f"HF LLM error: {output['error']}"
            else:
                return "No response generated from Hugging Face LLM."
        except Exception as e:
            print(f"Error contacting Hugging Face LLM: {e}")
            return "Error contacting Hugging Face model."

    def __call__(self, question: str) -> str:
        """
        Main entry point for the agent to process a question.
        It will first attempt DuckDuckGo, then fall back to scraping or Hugging Face LLM.
        """
        print(f"Agent received question: {question[:50]}...")
        answer = self.get_duckduckgo_answer(question)
        print(f"Agent returning answer: {answer}")
        return answer