File size: 4,386 Bytes
5be97e9 638e4b7 7e4c8eb 638e4b7 41829e6 89e8c7a 638e4b7 8893ef9 638e4b7 5be97e9 528d174 638e4b7 7e4c8eb 5be97e9 638e4b7 40d72d0 638e4b7 8893ef9 638e4b7 8893ef9 5be97e9 8893ef9 638e4b7 8893ef9 638e4b7 8893ef9 41829e6 b93b826 41829e6 b93b826 41829e6 7ada18c 41829e6 40d72d0 b93b826 528d174 638e4b7 528d174 b93b826 40d72d0 638e4b7 41829e6 8893ef9 528d174 b93b826 528d174 8893ef9 528d174 b93b826 89e8c7a 638e4b7 7e4c8eb 5be97e9 528d174 5be97e9 eae8220 33f6f2c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
# File: main.py
from fastapi import FastAPI, Query
from fastapi.responses import Response
import requests
import xml.etree.ElementTree as ET
from datetime import datetime
from typing import Optional
app = FastAPI()
# ========== FB2 Generator ==========
from bs4 import BeautifulSoup
from urllib.parse import quote
def html_to_fb2(title: str, body: str) -> str:
clean_text = BeautifulSoup(body, "html.parser").get_text(separator="\n")
return f"""<?xml version='1.0' encoding='utf-8'?>
<FictionBook xmlns:xlink='http://www.w3.org/1999/xlink'>
<description>
<title-info>
<genre>nonfiction</genre>
<author><first-name>OPDS</first-name><last-name>DuckScraper</last-name></author>
<book-title>{title}</book-title>
<lang>en</lang>
</title-info>
</description>
<body>
<section>
<title><p>{title}</p></title>
<p>{clean_text}</p>
</section>
</body>
</FictionBook>"""
# ========== DuckDuckGo JSON Search ==========
def duckduckgo_search(query: str):
api_url = "https://api.duckduckgo.com/"
params = {
"q": query,
"format": "json",
"no_html": 1,
"skip_disambig": 1
}
res = requests.get(api_url, params=params, headers={"User-Agent": "Mozilla/5.0"}, timeout=10)
res.raise_for_status()
data = res.json()
results = []
def extract_topics(topics):
for item in topics:
if "FirstURL" in item and "Text" in item:
results.append((item["Text"], item["FirstURL"]))
elif "Topics" in item:
extract_topics(item["Topics"])
extract_topics(data.get("RelatedTopics", []))
return results[:10]
# ========== OPDS Feed Generator ==========
def create_feed(entries: list, q: Optional[str]) -> bytes:
ns = "http://www.w3.org/2005/Atom"
opds_ns = "http://opds-spec.org/2010/catalog"
ET.register_namespace("", ns)
ET.register_namespace("opds", opds_ns)
feed = ET.Element("feed", xmlns=ns)
ET.SubElement(feed, "id").text = "urn:uuid:duckopds-catalog"
ET.SubElement(feed, "title").text = "DuckDuckGo OPDS Catalog"
ET.SubElement(feed, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
if not q:
ET.SubElement(feed, "link", {
"rel": "search",
"type": "application/atom+xml",
"href": "/opds?q={searchTerms}",
"templated": "true"
})
for entry_info in entries:
entry = ET.SubElement(feed, "entry")
ET.SubElement(entry, "id").text = entry_info['id']
ET.SubElement(entry, "title").text = entry_info['title']
ET.SubElement(entry, "updated").text = entry_info['updated']
ET.SubElement(entry, "link", entry_info['link'])
ET.SubElement(entry, "content", {"type": "text"}).text = entry_info['title']
ET.SubElement(entry, "author").append(ET.Element("name", text="DuckOPDS"))
return ET.tostring(feed, encoding="utf-8", xml_declaration=True)
# ========== Routes ==========
@app.get("/opds")
def opds(q: Optional[str] = Query(None, description="Search query")) -> Response:
entries = []
kind = "acquisition"
if q:
results = duckduckgo_search(q)
for title, url in results:
entries.append({
'id': url,
'title': title,
'updated': datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
'link': {
'rel': 'http://opds-spec.org/acquisition',
'href': f"/download?url={quote(url, safe='')}",
'type': 'application/fb2+xml'
}
})
xml_data = create_feed(entries, q)
return Response(content=xml_data,
media_type="application/atom+xml;charset=utf-8")
@app.get("/download")
def download_fb2(url: str) -> Response:
res = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, timeout=10)
res.raise_for_status()
soup = BeautifulSoup(res.text, "html.parser")
title = soup.title.string.strip() if soup.title and soup.title.string else "article"
fb2 = html_to_fb2(title, str(soup.body))
filename = f"{quote(title, safe='').replace('%20','_')[:30]}.fb2"
return Response(
content=fb2,
media_type="application/fb2+xml",
headers={"Content-Disposition": f"attachment; filename={filename}"}
)
|