|
|
|
|
|
from fastapi import FastAPI, Query, Response |
|
from fastapi.responses import XMLResponse |
|
import requests |
|
from bs4 import BeautifulSoup |
|
from urllib.parse import urlencode, quote |
|
import xml.etree.ElementTree as ET |
|
import re |
|
|
|
app = FastAPI() |
|
|
|
|
|
def html_to_fb2(title: str, body: str) -> str: |
|
clean_text = BeautifulSoup(body, "html.parser").get_text() |
|
fb2 = f"""<?xml version='1.0' encoding='utf-8'?> |
|
<FictionBook xmlns:xlink='http://www.w3.org/1999/xlink'> |
|
<description> |
|
<title-info> |
|
<genre>nonfiction</genre> |
|
<author><first-name>OPDS</first-name><last-name>DuckScraper</last-name></author> |
|
<book-title>{title}</book-title> |
|
<lang>en</lang> |
|
</title-info> |
|
</description> |
|
<body> |
|
<section><title><p>{title}</p></title><p>{clean_text}</p></section> |
|
</body> |
|
</FictionBook> |
|
""" |
|
return fb2 |
|
|
|
|
|
def duckduckgo_search(query: str): |
|
res = requests.get("https://html.duckduckgo.com/html/", data={"q": query}, headers={"User-Agent": "Mozilla/5.0"}) |
|
soup = BeautifulSoup(res.text, "html.parser") |
|
results = [] |
|
for a in soup.select("a.result__a"): |
|
href = a.get("href") |
|
title = a.get_text() |
|
if href and title: |
|
results.append((title.strip(), href)) |
|
if len(results) >= 10: |
|
break |
|
return results |
|
|
|
|
|
def generate_opds(query: str, results): |
|
ns = "http://www.w3.org/2005/Atom" |
|
ET.register_namespace("", ns) |
|
feed = ET.Element("feed", xmlns=ns) |
|
ET.SubElement(feed, "title").text = f"Search results for '{query}'" |
|
for title, url in results: |
|
entry = ET.SubElement(feed, "entry") |
|
ET.SubElement(entry, "title").text = title |
|
ET.SubElement(entry, "id").text = url |
|
ET.SubElement(entry, "updated").text = "2025-07-31T12:00:00Z" |
|
ET.SubElement(entry, "link", { |
|
"rel": "http://opds-spec.org/acquisition", |
|
"href": f"/download?url={quote(url)}", |
|
"type": "application/fb2+xml" |
|
}) |
|
return ET.tostring(feed, encoding="utf-8", xml_declaration=True) |
|
|
|
@app.get("/opds", response_class=XMLResponse) |
|
def opds_catalog(q: str = Query(..., description="Search query")): |
|
results = duckduckgo_search(q) |
|
xml_data = generate_opds(q, results) |
|
return Response(content=xml_data, media_type="application/atom+xml") |
|
|
|
@app.get("/download") |
|
def download_fb2(url: str): |
|
try: |
|
res = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, timeout=10) |
|
soup = BeautifulSoup(res.text, "html.parser") |
|
title = soup.title.string if soup.title else "No Title" |
|
body = str(soup.body) |
|
fb2 = html_to_fb2(title, body) |
|
return Response(content=fb2, media_type="application/fb2+xml", headers={"Content-Disposition": f"attachment; filename={re.sub(r'[^a-zA-Z0-9]+', '_', title)[:30]}.fb2"}) |
|
except Exception as e: |
|
return {"error": str(e)} |
|
|