# File: main.py from fastapi import FastAPI, Query from fastapi.responses import Response import requests from bs4 import BeautifulSoup from urllib.parse import quote import xml.etree.ElementTree as ET from datetime import datetime app = FastAPI() # ========== FB2 Generator ========== def html_to_fb2(title: str, body: str) -> str: clean_text = BeautifulSoup(body, "html.parser").get_text(separator="\n") fb2 = f""" nonfiction OPDSDuckScraper {title} en
<p>{title}</p>

{clean_text}

""" return fb2 # ========== DuckDuckGo Search ========== def duckduckgo_search(query: str): res = requests.post( "https://html.duckduckgo.com/html/", data={"q": query}, headers={"User-Agent": "Mozilla/5.0"}, timeout=10 ) res.raise_for_status() soup = BeautifulSoup(res.text, "html.parser") results = [] for a in soup.select("a.result__a"): href = a.get("href") title = a.get_text() if href and title: results.append((title.strip(), href)) if len(results) >= 10: break return results # ========== OPDS Feed Generators ========== def generate_root_feed(): ns = "http://www.w3.org/2005/Atom" ET.register_namespace("", ns) feed = ET.Element("feed", xmlns=ns) ET.SubElement(feed, "title").text = "DuckDuckGo OPDS Catalog" ET.SubElement(feed, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") # Relative search link (OpenSearch template) feed.append(ET.Element("link", { "rel": "search", "type": "application/atom+xml", "href": "/opds/search?q={searchTerms}", "templated": "true" })) return ET.tostring(feed, encoding="utf-8", xml_declaration=True) def generate_search_feed(query: str, results): ns = "http://www.w3.org/2005/Atom" ET.register_namespace("", ns) feed = ET.Element("feed", xmlns=ns) ET.SubElement(feed, "title").text = f"Search results for '{query}'" ET.SubElement(feed, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") for title, url in results: entry = ET.SubElement(feed, "entry") ET.SubElement(entry, "title").text = title ET.SubElement(entry, "id").text = url ET.SubElement(entry, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") ET.SubElement(entry, "link", { "rel": "http://opds-spec.org/acquisition", "href": f"/download?url={quote(url, safe='')}", "type": "application/fb2+xml" }) return ET.tostring(feed, encoding="utf-8", xml_declaration=True) # ========== Routes ========== @app.get("/opds", include_in_schema=False) def opds_root() -> Response: xml_data = generate_root_feed() return Response(content=xml_data, media_type="application/atom+xml") @app.get("/opds/search") def opds_search(q: str = Query(..., description="Search query")) -> Response: results = duckduckgo_search(q) xml_data = generate_search_feed(q, results) return Response(content=xml_data, media_type="application/atom+xml") @app.get("/download") def download_fb2(url: str) -> Response: res = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, timeout=10) res.raise_for_status() soup = BeautifulSoup(res.text, "html.parser") title = soup.title.string.strip() if soup.title and soup.title.string else "article" body = str(soup.body) fb2 = html_to_fb2(title, body) filename = f"{quote(title, safe='').replace('%20','_')[:30]}.fb2" return Response( content=fb2, media_type="application/fb2+xml", headers={"Content-Disposition": f"attachment; filename={filename}"} )