File size: 5,250 Bytes
5be97e9 638e4b7 7e4c8eb 638e4b7 5be97e9 638e4b7 41829e6 638e4b7 5be97e9 638e4b7 7e4c8eb 5be97e9 638e4b7 5be97e9 638e4b7 5be97e9 638e4b7 41829e6 c7c27df 41829e6 7ada18c 41829e6 33f6f2c 41829e6 638e4b7 7ada18c 638e4b7 41829e6 638e4b7 33f6f2c 41829e6 638e4b7 5be97e9 638e4b7 41829e6 c7c27df 7ada18c 41829e6 e219f4f 33f6f2c 638e4b7 41829e6 7ada18c 638e4b7 7e4c8eb 5be97e9 eae8220 33f6f2c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
# File: main.py
from fastapi import FastAPI, Query
from fastapi.responses import Response
import requests
from bs4 import BeautifulSoup
from urllib.parse import quote
import xml.etree.ElementTree as ET
from datetime import datetime
app = FastAPI()
# ========== FB2 Generator ==========
def html_to_fb2(title: str, body: str) -> str:
clean_text = BeautifulSoup(body, "html.parser").get_text(separator="\n")
fb2 = f"""<?xml version='1.0' encoding='utf-8'?>
<FictionBook xmlns:xlink='http://www.w3.org/1999/xlink'>
<description>
<title-info>
<genre>nonfiction</genre>
<author><first-name>OPDS</first-name><last-name>DuckScraper</last-name></author>
<book-title>{title}</book-title>
<lang>en</lang>
</title-info>
</description>
<body>
<section>
<title><p>{title}</p></title>
<p>{clean_text}</p>
</section>
</body>
</FictionBook>"""
return fb2
# ========== DuckDuckGo Search ==========
def duckduckgo_search(query: str):
res = requests.post(
"https://html.duckduckgo.com/html/",
data={"q": query},
headers={"User-Agent": "Mozilla/5.0"},
timeout=10
)
res.raise_for_status()
soup = BeautifulSoup(res.text, "html.parser")
results = []
for a in soup.select("a.result__a"):
href = a.get("href")
title = a.get_text()
if href and title:
results.append((title.strip(), href))
if len(results) >= 10:
break
return results
# ========== OPDS Feed Generators ==========
def generate_root_feed():
ns = "http://www.w3.org/2005/Atom"
ET.register_namespace("", ns)
feed = ET.Element("feed", xmlns=ns)
ET.SubElement(feed, "id").text = "urn:uuid:duckopds-catalog"
ET.SubElement(feed, "title").text = "DuckDuckGo OPDS Catalog"
ET.SubElement(feed, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
# Entry for search subsection
entry_search = ET.SubElement(feed, "entry")
ET.SubElement(entry_search, "id").text = "urn:uuid:duckopds-search-section"
ET.SubElement(entry_search, "title").text = "Search"
ET.SubElement(entry_search, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
ET.SubElement(entry_search, "link", {
"rel": "subsection",
"href": "/opds/search",
"type": "application/atom+xml;profile=opds-catalog;kind=acquisition"
})
# Entry for cached subsection (placeholder)
entry_cached = ET.SubElement(feed, "entry")
ET.SubElement(entry_cached, "id").text = "urn:uuid:duckopds-cached-section"
ET.SubElement(entry_cached, "title").text = "Cached"
ET.SubElement(entry_cached, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
ET.SubElement(entry_cached, "link", {
"rel": "subsection",
"href": "/opds/cached",
"type": "application/atom+xml;profile=opds-catalog;kind=navigation"
})
return ET.tostring(feed, encoding="utf-8", xml_declaration=True)
def generate_search_feed(query: str, results):
ns = "http://www.w3.org/2005/Atom"
ET.register_namespace("", ns)
feed = ET.Element("feed", xmlns=ns)
ET.SubElement(feed, "id").text = f"urn:uuid:duckopds-search-{quote(query)}"
ET.SubElement(feed, "title").text = f"Search results for '{query}'"
ET.SubElement(feed, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
for title, url in results:
entry = ET.SubElement(feed, "entry")
ET.SubElement(entry, "id").text = url
ET.SubElement(entry, "title").text = title
ET.SubElement(entry, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
ET.SubElement(entry, "link", {
"rel": "http://opds-spec.org/acquisition",
"href": f"/download?url={quote(url, safe='')}",
"type": "application/fb2+xml"
})
return ET.tostring(feed, encoding="utf-8", xml_declaration=True)
# ========== Routes ==========
@app.get("/opds", include_in_schema=False)
def opds_root() -> Response:
xml_data = generate_root_feed()
return Response(
content=xml_data,
media_type="application/atom+xml;profile=opds-catalog;kind=navigation"
)
@app.get("/opds/search")
def opds_search(
q: str = Query(..., description="Search query"),
searchType: str = Query(None, alias="searchType")
) -> Response:
# Ignore searchType parameter
results = duckduckgo_search(q)
xml_data = generate_search_feed(q, results)
return Response(
content=xml_data,
media_type="application/atom+xml;profile=opds-catalog;kind=acquisition"
)
@app.get("/download")
def download_fb2(url: str) -> Response:
res = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, timeout=10)
res.raise_for_status()
soup = BeautifulSoup(res.text, "html.parser")
title = soup.title.string.strip() if soup.title and soup.title.string else "article"
body = str(soup.body)
fb2 = html_to_fb2(title, body)
filename = f"{quote(title, safe='').replace('%20','_')[:30]}.fb2"
return Response(
content=fb2,
media_type="application/fb2+xml",
headers={"Content-Disposition": f"attachment; filename={filename}"}
)
|