Spaces:

Ivan000
/

OPDS

Sleeping

App Files Files Community

Ivan000 commited on 30 days ago

Commit

5be97e9

verified ·

1 Parent(s): fa93e8f

Update main.py

Browse files

Files changed (1) hide show

main.py +30 -19

main.py CHANGED Viewed

@@ -1,18 +1,17 @@
-# duckopds/main.py
 from fastapi import FastAPI, Query, Response
 from fastapi.responses import XMLResponse
 import requests
 from bs4 import BeautifulSoup
-from urllib.parse import urlencode, quote
 import xml.etree.ElementTree as ET
-import re
 app = FastAPI()
 # ========== FB2 Generator ==========
 def html_to_fb2(title: str, body: str) -> str:
-    clean_text = BeautifulSoup(body, "html.parser").get_text()
     fb2 = f"""<?xml version='1.0' encoding='utf-8'?>
 <FictionBook xmlns:xlink='http://www.w3.org/1999/xlink'>
   <description>
@@ -24,15 +23,22 @@ def html_to_fb2(title: str, body: str) -> str:
     </title-info>
   </description>
   <body>
-    <section><title><p>{title}</p></title><p>{clean_text}</p></section>
   </body>
-</FictionBook>
-"""
     return fb2
 # ========== DuckDuckGo Search ==========
 def duckduckgo_search(query: str):
-    res = requests.get("https://html.duckduckgo.com/html/", data={"q": query}, headers={"User-Agent": "Mozilla/5.0"})
     soup = BeautifulSoup(res.text, "html.parser")
     results = []
     for a in soup.select("a.result__a"):
@@ -44,12 +50,13 @@ def duckduckgo_search(query: str):
             break
     return results
-# ========== OPDS Feed ==========
 def generate_opds(query: str, results):
     ns = "http://www.w3.org/2005/Atom"
     ET.register_namespace("", ns)
     feed = ET.Element("feed", xmlns=ns)
     ET.SubElement(feed, "title").text = f"Search results for '{query}'"
     for title, url in results:
         entry = ET.SubElement(feed, "entry")
         ET.SubElement(entry, "title").text = title
@@ -57,7 +64,7 @@ def generate_opds(query: str, results):
         ET.SubElement(entry, "updated").text = "2025-07-31T12:00:00Z"
         ET.SubElement(entry, "link", {
             "rel": "http://opds-spec.org/acquisition",
-            "href": f"/download?url={quote(url)}",
             "type": "application/fb2+xml"
         })
     return ET.tostring(feed, encoding="utf-8", xml_declaration=True)
@@ -70,12 +77,16 @@ def opds_catalog(q: str = Query(..., description="Search query")):
 @app.get("/download")
 def download_fb2(url: str):
-    try:
-        res = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, timeout=10)
-        soup = BeautifulSoup(res.text, "html.parser")
-        title = soup.title.string if soup.title else "No Title"
-        body = str(soup.body)
-        fb2 = html_to_fb2(title, body)
-        return Response(content=fb2, media_type="application/fb2+xml", headers={"Content-Disposition": f"attachment; filename={re.sub(r'[^a-zA-Z0-9]+', '_', title)[:30]}.fb2"})
-    except Exception as e:
-        return {"error": str(e)}

+# File: main.py
 from fastapi import FastAPI, Query, Response
 from fastapi.responses import XMLResponse
 import requests
 from bs4 import BeautifulSoup
+from urllib.parse import quote
 import xml.etree.ElementTree as ET
 app = FastAPI()
 # ========== FB2 Generator ==========
 def html_to_fb2(title: str, body: str) -> str:
+    clean_text = BeautifulSoup(body, "html.parser").get_text(separator="\n")
     fb2 = f"""<?xml version='1.0' encoding='utf-8'?>
 <FictionBook xmlns:xlink='http://www.w3.org/1999/xlink'>
   <description>
     </title-info>
   </description>
   <body>
+    <section><title><p>{title}</p></title>
+      <p>{clean_text}</p>
+    </section>
   </body>
+</FictionBook>"""
     return fb2
 # ========== DuckDuckGo Search ==========
 def duckduckgo_search(query: str):
+    res = requests.post(
+        "https://html.duckduckgo.com/html/",
+        data={"q": query},
+        headers={"User-Agent": "Mozilla/5.0"},
+        timeout=10
+    )
+    res.raise_for_status()
     soup = BeautifulSoup(res.text, "html.parser")
     results = []
     for a in soup.select("a.result__a"):
             break
     return results
+# ========== OPDS Feed Generator ==========
 def generate_opds(query: str, results):
     ns = "http://www.w3.org/2005/Atom"
     ET.register_namespace("", ns)
     feed = ET.Element("feed", xmlns=ns)
     ET.SubElement(feed, "title").text = f"Search results for '{query}'"
+    ET.SubElement(feed, "updated").text = "2025-07-31T12:00:00Z"
     for title, url in results:
         entry = ET.SubElement(feed, "entry")
         ET.SubElement(entry, "title").text = title
         ET.SubElement(entry, "updated").text = "2025-07-31T12:00:00Z"
         ET.SubElement(entry, "link", {
             "rel": "http://opds-spec.org/acquisition",
+            "href": f"/download?url={quote(url, safe='')}",
             "type": "application/fb2+xml"
         })
     return ET.tostring(feed, encoding="utf-8", xml_declaration=True)
 @app.get("/download")
 def download_fb2(url: str):
+    res = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, timeout=10)
+    res.raise_for_status()
+    soup = BeautifulSoup(res.text, "html.parser")
+    title = soup.title.string.strip() if soup.title and soup.title.string else "article"
+    body = str(soup.body)
+    fb2 = html_to_fb2(title, body)
+    filename = f"{quote(title, safe='').replace('%20','_')[:30]}.fb2"
+    return Response(
+        content=fb2,
+        media_type="application/fb2+xml",
+        headers={"Content-Disposition": f"attachment; filename={filename}"}
+    )