|
|
|
|
|
from fastapi import FastAPI, Query |
|
from fastapi.responses import Response |
|
import requests |
|
from bs4 import BeautifulSoup |
|
from urllib.parse import quote |
|
import xml.etree.ElementTree as ET |
|
from datetime import datetime |
|
|
|
app = FastAPI() |
|
|
|
|
|
def html_to_fb2(title: str, body: str) -> str: |
|
clean_text = BeautifulSoup(body, "html.parser").get_text(separator="\n") |
|
fb2 = f"""<?xml version='1.0' encoding='utf-8'?> |
|
<FictionBook xmlns:xlink='http://www.w3.org/1999/xlink'> |
|
<description> |
|
<title-info> |
|
<genre>nonfiction</genre> |
|
<author><first-name>OPDS</first-name><last-name>DuckScraper</last-name></author> |
|
<book-title>{title}</book-title> |
|
<lang>en</lang> |
|
</title-info> |
|
</description> |
|
<body> |
|
<section> |
|
<title><p>{title}</p></title> |
|
<p>{clean_text}</p> |
|
</section> |
|
</body> |
|
</FictionBook>""" |
|
return fb2 |
|
|
|
|
|
def duckduckgo_search(query: str): |
|
res = requests.post( |
|
"https://html.duckduckgo.com/html/", |
|
data={"q": query}, |
|
headers={"User-Agent": "Mozilla/5.0"}, |
|
timeout=10 |
|
) |
|
res.raise_for_status() |
|
soup = BeautifulSoup(res.text, "html.parser") |
|
results = [] |
|
for a in soup.select("a.result__a"): |
|
href = a.get("href") |
|
title = a.get_text() |
|
if href and title: |
|
results.append((title.strip(), href)) |
|
if len(results) >= 10: |
|
break |
|
return results |
|
|
|
|
|
|
|
def generate_root_feed(): |
|
ns = "http://www.w3.org/2005/Atom" |
|
ET.register_namespace("", ns) |
|
feed = ET.Element("feed", xmlns=ns) |
|
ET.SubElement(feed, "title").text = "DuckDuckGo OPDS Catalog" |
|
ET.SubElement(feed, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") |
|
|
|
|
|
feed.append(ET.Element("link", { |
|
"rel": "search", |
|
"type": "application/atom+xml", |
|
"href": "/opds/search?q={searchTerms}", |
|
"templated": "true" |
|
})) |
|
|
|
return ET.tostring(feed, encoding="utf-8", xml_declaration=True) |
|
|
|
|
|
def generate_search_feed(query: str, results): |
|
ns = "http://www.w3.org/2005/Atom" |
|
ET.register_namespace("", ns) |
|
feed = ET.Element("feed", xmlns=ns) |
|
ET.SubElement(feed, "title").text = f"Search results for '{query}'" |
|
ET.SubElement(feed, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") |
|
|
|
for title, url in results: |
|
entry = ET.SubElement(feed, "entry") |
|
ET.SubElement(entry, "title").text = title |
|
ET.SubElement(entry, "id").text = url |
|
ET.SubElement(entry, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") |
|
ET.SubElement(entry, "link", { |
|
"rel": "http://opds-spec.org/acquisition", |
|
"href": f"/download?url={quote(url, safe='')}", |
|
"type": "application/fb2+xml" |
|
}) |
|
return ET.tostring(feed, encoding="utf-8", xml_declaration=True) |
|
|
|
|
|
|
|
@app.get("/opds", include_in_schema=False) |
|
def opds_root() -> Response: |
|
xml_data = generate_root_feed() |
|
return Response(content=xml_data, media_type="application/atom+xml") |
|
|
|
@app.get("/opds/search") |
|
def opds_search(q: str = Query(..., description="Search query")) -> Response: |
|
results = duckduckgo_search(q) |
|
xml_data = generate_search_feed(q, results) |
|
return Response(content=xml_data, media_type="application/atom+xml") |
|
|
|
@app.get("/download") |
|
def download_fb2(url: str) -> Response: |
|
res = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, timeout=10) |
|
res.raise_for_status() |
|
soup = BeautifulSoup(res.text, "html.parser") |
|
title = soup.title.string.strip() if soup.title and soup.title.string else "article" |
|
body = str(soup.body) |
|
fb2 = html_to_fb2(title, body) |
|
filename = f"{quote(title, safe='').replace('%20','_')[:30]}.fb2" |
|
return Response( |
|
content=fb2, |
|
media_type="application/fb2+xml", |
|
headers={"Content-Disposition": f"attachment; filename={filename}"} |
|
) |
|
|
|
|