Spaces:

Ivan000
/

OPDS

Sleeping

File size: 5,250 Bytes

5be97e9
638e4b7
7e4c8eb
 
638e4b7
 
5be97e9
638e4b7
41829e6
638e4b7
 
 
 
 
5be97e9
638e4b7
 
 
 
 
 
 
 
 
 
 
7e4c8eb
 
5be97e9
 
638e4b7
5be97e9
638e4b7
 
 
 
5be97e9
 
 
 
 
 
 
638e4b7
 
 
 
 
 
 
 
 
 
 
41829e6
 
c7c27df
41829e6
 
 
7ada18c
41829e6
 
 
33f6f2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41829e6
 
 
 
 
638e4b7
 
 
7ada18c
638e4b7
41829e6
 
638e4b7
 
 
33f6f2c
41829e6
638e4b7
 
5be97e9
638e4b7
 
 
 
41829e6
 
 
 
c7c27df
7ada18c
 
 
 
41829e6
 
e219f4f
 
 
 
33f6f2c
638e4b7
41829e6
7ada18c
 
 
 
638e4b7
 
7e4c8eb
5be97e9
 
 
 
 
 
 
 
 
 
 
eae8220
33f6f2c

# File: main.py

from fastapi import FastAPI, Query
from fastapi.responses import Response
import requests
from bs4 import BeautifulSoup
from urllib.parse import quote
import xml.etree.ElementTree as ET
from datetime import datetime

app = FastAPI()

# ========== FB2 Generator ==========
def html_to_fb2(title: str, body: str) -> str:
    clean_text = BeautifulSoup(body, "html.parser").get_text(separator="\n")
    fb2 = f"""<?xml version='1.0' encoding='utf-8'?>
<FictionBook xmlns:xlink='http://www.w3.org/1999/xlink'>
  <description>
    <title-info>
      <genre>nonfiction</genre>
      <author><first-name>OPDS</first-name><last-name>DuckScraper</last-name></author>
      <book-title>{title}</book-title>
      <lang>en</lang>
    </title-info>
  </description>
  <body>
    <section>
      <title><p>{title}</p></title>
      <p>{clean_text}</p>
    </section>
  </body>
</FictionBook>"""
    return fb2

# ========== DuckDuckGo Search ==========
def duckduckgo_search(query: str):
    res = requests.post(
        "https://html.duckduckgo.com/html/", 
        data={"q": query}, 
        headers={"User-Agent": "Mozilla/5.0"}, 
        timeout=10
    )
    res.raise_for_status()
    soup = BeautifulSoup(res.text, "html.parser")
    results = []
    for a in soup.select("a.result__a"):
        href = a.get("href")
        title = a.get_text()
        if href and title:
            results.append((title.strip(), href))
        if len(results) >= 10:
            break
    return results

# ========== OPDS Feed Generators ==========

def generate_root_feed():
    ns = "http://www.w3.org/2005/Atom"
    ET.register_namespace("", ns)
    feed = ET.Element("feed", xmlns=ns)
    ET.SubElement(feed, "id").text = "urn:uuid:duckopds-catalog"
    ET.SubElement(feed, "title").text = "DuckDuckGo OPDS Catalog"
    ET.SubElement(feed, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")

    # Entry for search subsection
    entry_search = ET.SubElement(feed, "entry")
    ET.SubElement(entry_search, "id").text = "urn:uuid:duckopds-search-section"
    ET.SubElement(entry_search, "title").text = "Search"
    ET.SubElement(entry_search, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
    ET.SubElement(entry_search, "link", {
        "rel": "subsection",
        "href": "/opds/search",
        "type": "application/atom+xml;profile=opds-catalog;kind=acquisition"
    })

    # Entry for cached subsection (placeholder)
    entry_cached = ET.SubElement(feed, "entry")
    ET.SubElement(entry_cached, "id").text = "urn:uuid:duckopds-cached-section"
    ET.SubElement(entry_cached, "title").text = "Cached"
    ET.SubElement(entry_cached, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
    ET.SubElement(entry_cached, "link", {
        "rel": "subsection",
        "href": "/opds/cached",
        "type": "application/atom+xml;profile=opds-catalog;kind=navigation"
    })

    return ET.tostring(feed, encoding="utf-8", xml_declaration=True)


def generate_search_feed(query: str, results):
    ns = "http://www.w3.org/2005/Atom"
    ET.register_namespace("", ns)
    feed = ET.Element("feed", xmlns=ns)
    ET.SubElement(feed, "id").text = f"urn:uuid:duckopds-search-{quote(query)}"
    ET.SubElement(feed, "title").text = f"Search results for '{query}'"
    ET.SubElement(feed, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")

    for title, url in results:
        entry = ET.SubElement(feed, "entry")
        ET.SubElement(entry, "id").text = url
        ET.SubElement(entry, "title").text = title
        ET.SubElement(entry, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
        ET.SubElement(entry, "link", {
            "rel": "http://opds-spec.org/acquisition",
            "href": f"/download?url={quote(url, safe='')}",
            "type": "application/fb2+xml"
        })
    return ET.tostring(feed, encoding="utf-8", xml_declaration=True)

# ========== Routes ==========

@app.get("/opds", include_in_schema=False)
def opds_root() -> Response:
    xml_data = generate_root_feed()
    return Response(
        content=xml_data,
        media_type="application/atom+xml;profile=opds-catalog;kind=navigation"
    )

@app.get("/opds/search")
def opds_search(
    q: str = Query(..., description="Search query"),
    searchType: str = Query(None, alias="searchType")
) -> Response:
    # Ignore searchType parameter
    results = duckduckgo_search(q)
    xml_data = generate_search_feed(q, results)
    return Response(
        content=xml_data,
        media_type="application/atom+xml;profile=opds-catalog;kind=acquisition"
    )

@app.get("/download")
def download_fb2(url: str) -> Response:
    res = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, timeout=10)
    res.raise_for_status()
    soup = BeautifulSoup(res.text, "html.parser")
    title = soup.title.string.strip() if soup.title and soup.title.string else "article"
    body = str(soup.body)
    fb2 = html_to_fb2(title, body)
    filename = f"{quote(title, safe='').replace('%20','_')[:30]}.fb2"
    return Response(
        content=fb2,
        media_type="application/fb2+xml",
        headers={"Content-Disposition": f"attachment; filename={filename}"}
    )