Spaces:
Sleeping
Sleeping
File size: 4,172 Bytes
73152bb c02d868 ae12037 a9e9116 667e88a ae12037 c02d868 ae12037 a9e9116 73152bb a9e9116 ae12037 f51a39b ae12037 a9e9116 f51a39b 73152bb a9e9116 73152bb a9e9116 f51a39b a9e9116 73152bb a9e9116 f51a39b a9e9116 f51a39b 73152bb a9e9116 f51a39b 73152bb f51a39b 73152bb f51a39b 73152bb a9e9116 73152bb ae12037 f51a39b a9e9116 f51a39b 427cae4 ae12037 f51a39b 73152bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import os
import requests
import urllib.parse
from bs4 import BeautifulSoup
class DuckDuckGoAgent:
def __init__(self):
print("DuckDuckGoAgent initialized.")
self.headers = {
"User-Agent": "Mozilla/5.0"
}
def get_duckduckgo_answer(self, query: str) -> str:
"""
Attempt to get an answer from the DuckDuckGo API.
If no abstract text is found, fall back to scraping.
"""
search_query = urllib.parse.quote(query)
url = f"https://api.duckduckgo.com/?q={search_query}&format=json&no_html=1&skip_disambig=1"
try:
response = requests.get(url, timeout=10)
if response.status_code == 200:
data = response.json()
# If AbstractText exists and is non-empty, return it
if 'AbstractText' in data and data['AbstractText']:
return data['AbstractText'][:200]
else:
print("No abstract found, falling back to scraping.")
return self.scrape_duckduckgo(query)
else:
print(f"DuckDuckGo API failed with status: {response.status_code}")
return self.scrape_duckduckgo(query)
except Exception as e:
print(f"Error contacting DuckDuckGo API: {e}")
return self.scrape_duckduckgo(query)
def scrape_duckduckgo(self, query: str) -> str:
"""
Fallback to scraping DuckDuckGo search results if API fails or no abstract found.
"""
print("Using fallback: scraping HTML results.")
try:
response = requests.post(
"https://html.duckduckgo.com/html/",
data={"q": query},
headers=self.headers,
timeout=10
)
soup = BeautifulSoup(response.text, "html.parser")
snippets = soup.select(".result__snippet")
for s in snippets:
text = s.get_text().strip()
if text:
return text[:200]
print("No useful snippets found, falling back to Hugging Face LLM.")
return self.call_huggingface_llm(query)
except Exception as e:
print(f"Error scraping DuckDuckGo: {e}")
return self.call_huggingface_llm(query)
def call_huggingface_llm(self, prompt: str) -> str:
"""
Fallback to Hugging Face LLM if DuckDuckGo API and scraping both fail.
"""
hf_api_key = os.getenv("HF_API_TOKEN")
model = "mistralai/Mistral-7B-Instruct-v0.1"
if not hf_api_key:
return "Error: Hugging Face API Token is not configured."
url = f"https://api-inference.huggingface.co/models/{model}"
headers = {
"Authorization": f"Bearer {hf_api_key}",
"Content-Type": "application/json"
}
payload = {
"inputs": prompt,
"parameters": {
"max_new_tokens": 200,
"temperature": 0.7
}
}
try:
response = requests.post(url, headers=headers, json=payload, timeout=30)
response.raise_for_status()
output = response.json()
if isinstance(output, list) and "generated_text" in output[0]:
return output[0]["generated_text"].strip()[:200]
elif isinstance(output, dict) and "error" in output:
return f"HF LLM error: {output['error']}"
else:
return "No response generated from Hugging Face LLM."
except Exception as e:
print(f"Error contacting Hugging Face LLM: {e}")
return "Error contacting Hugging Face model."
def __call__(self, question: str) -> str:
"""
Main entry point for the agent to process a question.
It will first attempt DuckDuckGo, then fall back to scraping or Hugging Face LLM.
"""
print(f"Agent received question: {question[:50]}...")
answer = self.get_duckduckgo_answer(question)
print(f"Agent returning answer: {answer}")
return answer
|