Spaces:
Sleeping
Sleeping
import os | |
import requests | |
import urllib.parse | |
from bs4 import BeautifulSoup | |
class DuckDuckGoAgent: | |
def __init__(self): | |
print("DuckDuckGoAgent initialized.") | |
self.headers = { | |
"User-Agent": "Mozilla/5.0" | |
} | |
def get_duckduckgo_answer(self, query: str) -> str: | |
""" | |
Attempt to get an answer from the DuckDuckGo API. | |
If no abstract text is found, fall back to scraping. | |
""" | |
search_query = urllib.parse.quote(query) | |
url = f"https://api.duckduckgo.com/?q={search_query}&format=json&no_html=1&skip_disambig=1" | |
try: | |
response = requests.get(url, timeout=10) | |
if response.status_code == 200: | |
data = response.json() | |
# If AbstractText exists and is non-empty, return it | |
if 'AbstractText' in data and data['AbstractText']: | |
return data['AbstractText'][:200] | |
else: | |
print("No abstract found, falling back to scraping.") | |
return self.scrape_duckduckgo(query) | |
else: | |
print(f"DuckDuckGo API failed with status: {response.status_code}") | |
return self.scrape_duckduckgo(query) | |
except Exception as e: | |
print(f"Error contacting DuckDuckGo API: {e}") | |
return self.scrape_duckduckgo(query) | |
def scrape_duckduckgo(self, query: str) -> str: | |
""" | |
Fallback to scraping DuckDuckGo search results if API fails or no abstract found. | |
""" | |
print("Using fallback: scraping HTML results.") | |
try: | |
response = requests.post( | |
"https://html.duckduckgo.com/html/", | |
data={"q": query}, | |
headers=self.headers, | |
timeout=10 | |
) | |
soup = BeautifulSoup(response.text, "html.parser") | |
snippets = soup.select(".result__snippet") | |
for s in snippets: | |
text = s.get_text().strip() | |
if text: | |
return text[:200] | |
print("No useful snippets found, falling back to Hugging Face LLM.") | |
return self.call_huggingface_llm(query) | |
except Exception as e: | |
print(f"Error scraping DuckDuckGo: {e}") | |
return self.call_huggingface_llm(query) | |
def call_huggingface_llm(self, prompt: str) -> str: | |
""" | |
Fallback to Hugging Face LLM if DuckDuckGo API and scraping both fail. | |
""" | |
hf_api_key = os.getenv("HF_API_TOKEN") | |
model = "mistralai/Mistral-7B-Instruct-v0.1" | |
if not hf_api_key: | |
return "Error: Hugging Face API Token is not configured." | |
url = f"https://api-inference.huggingface.co/models/{model}" | |
headers = { | |
"Authorization": f"Bearer {hf_api_key}", | |
"Content-Type": "application/json" | |
} | |
payload = { | |
"inputs": prompt, | |
"parameters": { | |
"max_new_tokens": 200, | |
"temperature": 0.7 | |
} | |
} | |
try: | |
response = requests.post(url, headers=headers, json=payload, timeout=30) | |
response.raise_for_status() | |
output = response.json() | |
if isinstance(output, list) and "generated_text" in output[0]: | |
return output[0]["generated_text"].strip()[:200] | |
elif isinstance(output, dict) and "error" in output: | |
return f"HF LLM error: {output['error']}" | |
else: | |
return "No response generated from Hugging Face LLM." | |
except Exception as e: | |
print(f"Error contacting Hugging Face LLM: {e}") | |
return "Error contacting Hugging Face model." | |
def __call__(self, question: str) -> str: | |
""" | |
Main entry point for the agent to process a question. | |
It will first attempt DuckDuckGo, then fall back to scraping or Hugging Face LLM. | |
""" | |
print(f"Agent received question: {question[:50]}...") | |
answer = self.get_duckduckgo_answer(question) | |
print(f"Agent returning answer: {answer}") | |
return answer | |