arina-hf-spaces-api / app /core /search_utils.py
adsurkasur's picture
clone from arina-hf-spaces
68964c2
from duckduckgo_search import DDGS
from app.core.db_setup import web_search_keywords_collection
from app.core.logging_setup import logger
def needs_web_search(query):
"""Determine if a query requires web search based on keywords."""
try:
# Retrieve all keywords from the database
keywords = web_search_keywords_collection.find({}, {"_id": 0, "keyword": 1})
if not keywords:
logger.warning("No web search keywords found in the database.")
return False
web_search_keywords = [kw["keyword"] for kw in keywords] # Extract keywords from documents
except Exception as e:
logger.error(f"Error retrieving web search keywords: {e}")
return False
query_lower = query.lower()
return any(keyword in query_lower for keyword in web_search_keywords)
def search_duckduckgo(query, max_results=3):
"""
Perform a DuckDuckGo search and return summarized results.
Args:
query (str): The search query.
max_results (int): The maximum number of results to fetch.
Returns:
tuple: A summary (str) and a list of links (list of str), or (None, []) if no results.
"""
try:
with DDGS() as ddgs:
results = ddgs.text(query, max_results=max_results)
first_result = next(results, None)
if not first_result:
return None, []
summary = first_result.get("body", "No summary available")
links = [r["href"] for r in results if r.get("href")]
return summary, links
except Exception as e:
logger.error(f"[search_duckduckgo] Error during search: {e}", extra={"query": query, "max_results": max_results})
return None, []
def add_web_search_keyword(keyword):
"""Add a new web search keyword to the database."""
try:
if not web_search_keywords_collection.find_one({"keyword": keyword}):
web_search_keywords_collection.insert_one({"keyword": keyword})
logger.info(f"Added new web search keyword: {keyword}")
else:
logger.info(f"Keyword '{keyword}' already exists.")
except Exception as e:
logger.error(f"Error adding web search keyword: {e}")