Spaces:

Ivan000
/

OPDS

Sleeping

App Files Files Community

Ivan000 commited on 24 days ago

Commit

87c1bc6

verified ·

1 Parent(s): b93b826

Update main.py

Browse files

Files changed (1) hide show

main.py +64 -67

main.py CHANGED Viewed

@@ -3,19 +3,17 @@
 from fastapi import FastAPI, Query
 from fastapi.responses import Response
 import requests
 import xml.etree.ElementTree as ET
 from datetime import datetime
-from typing import Optional
 app = FastAPI()
 # ========== FB2 Generator ==========
-from bs4 import BeautifulSoup
-from urllib.parse import quote
 def html_to_fb2(title: str, body: str) -> str:
     clean_text = BeautifulSoup(body, "html.parser").get_text(separator="\n")
-    return f"""<?xml version='1.0' encoding='utf-8'?>
 <FictionBook xmlns:xlink='http://www.w3.org/1999/xlink'>
   <description>
     <title-info>
@@ -32,81 +30,79 @@ def html_to_fb2(title: str, body: str) -> str:
     </section>
   </body>
 </FictionBook>"""
-# ========== DuckDuckGo JSON Search ==========
 def duckduckgo_search(query: str):
-    api_url = "https://api.duckduckgo.com/"
-    params = {
-        "q": query,
-        "format": "json",
-        "no_html": 1,
-        "skip_disambig": 1
-    }
-    res = requests.get(api_url, params=params, headers={"User-Agent": "Mozilla/5.0"}, timeout=10)
     res.raise_for_status()
-    data = res.json()
     results = []
-    def extract_topics(topics):
-        for item in topics:
-            if "FirstURL" in item and "Text" in item:
-                results.append((item["Text"], item["FirstURL"]))
-            elif "Topics" in item:
-                extract_topics(item["Topics"])
-    extract_topics(data.get("RelatedTopics", []))
-    return results[:10]
-# ========== OPDS Feed Generator ==========
-def create_feed(entries: list, q: Optional[str]) -> bytes:
     ns = "http://www.w3.org/2005/Atom"
-    opds_ns = "http://opds-spec.org/2010/catalog"
     ET.register_namespace("", ns)
-    ET.register_namespace("opds", opds_ns)
     feed = ET.Element("feed", xmlns=ns)
-    ET.SubElement(feed, "id").text = "urn:uuid:duckopds-catalog"
     ET.SubElement(feed, "title").text = "DuckDuckGo OPDS Catalog"
     ET.SubElement(feed, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
-    if not q:
-        ET.SubElement(feed, "link", {
-            "rel": "search",
-            "type": "application/atom+xml",
-            "href": "/opds?q={searchTerms}",
-            "templated": "true"
-        })
-    for entry_info in entries:
-        entry = ET.SubElement(feed, "entry")
-        ET.SubElement(entry, "id").text = entry_info['id']
-        ET.SubElement(entry, "title").text = entry_info['title']
-        ET.SubElement(entry, "updated").text = entry_info['updated']
-        ET.SubElement(entry, "link", entry_info['link'])
-        ET.SubElement(entry, "content", {"type": "text"}).text = entry_info['title']
-        ET.SubElement(entry, "author").append(ET.Element("name", text="DuckOPDS"))
     return ET.tostring(feed, encoding="utf-8", xml_declaration=True)
 # ========== Routes ==========
-@app.get("/opds")
-def opds(q: Optional[str] = Query(None, description="Search query")) -> Response:
-    entries = []
-    kind = "acquisition"
-    if q:
-        results = duckduckgo_search(q)
-        for title, url in results:
-            entries.append({
-                'id': url,
-                'title': title,
-                'updated': datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
-                'link': {
-                    'rel': 'http://opds-spec.org/acquisition',
-                    'href': f"/download?url={quote(url, safe='')}",
-                    'type': 'application/fb2+xml'
-                }
-            })
-    xml_data = create_feed(entries, q)
-    return Response(content=xml_data,
-                    media_type="application/atom+xml;charset=utf-8")
 @app.get("/download")
 def download_fb2(url: str) -> Response:
@@ -114,7 +110,8 @@ def download_fb2(url: str) -> Response:
     res.raise_for_status()
     soup = BeautifulSoup(res.text, "html.parser")
     title = soup.title.string.strip() if soup.title and soup.title.string else "article"
-    fb2 = html_to_fb2(title, str(soup.body))
     filename = f"{quote(title, safe='').replace('%20','_')[:30]}.fb2"
     return Response(
         content=fb2,

 from fastapi import FastAPI, Query
 from fastapi.responses import Response
 import requests
+from bs4 import BeautifulSoup
+from urllib.parse import quote
 import xml.etree.ElementTree as ET
 from datetime import datetime
 app = FastAPI()
 # ========== FB2 Generator ==========
 def html_to_fb2(title: str, body: str) -> str:
     clean_text = BeautifulSoup(body, "html.parser").get_text(separator="\n")
+    fb2 = f"""<?xml version='1.0' encoding='utf-8'?>
 <FictionBook xmlns:xlink='http://www.w3.org/1999/xlink'>
   <description>
     <title-info>
     </section>
   </body>
 </FictionBook>"""
+    return fb2
+# ========== DuckDuckGo Search ==========
 def duckduckgo_search(query: str):
+    res = requests.post(
+        "https://html.duckduckgo.com/html/",
+        data={"q": query},
+        headers={"User-Agent": "Mozilla/5.0"},
+        timeout=10
+    )
     res.raise_for_status()
+    soup = BeautifulSoup(res.text, "html.parser")
     results = []
+    for a in soup.select("a.result__a"):
+        href = a.get("href")
+        title = a.get_text()
+        if href and title:
+            results.append((title.strip(), href))
+        if len(results) >= 10:
+            break
+    return results
+# ========== OPDS Feed Generators ==========
+def generate_root_feed():
     ns = "http://www.w3.org/2005/Atom"
     ET.register_namespace("", ns)
     feed = ET.Element("feed", xmlns=ns)
     ET.SubElement(feed, "title").text = "DuckDuckGo OPDS Catalog"
     ET.SubElement(feed, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
+    # Relative search link (OpenSearch template)
+    feed.append(ET.Element("link", {
+        "rel": "search",
+        "type": "application/atom+xml",
+        "href": "/opds/search?q={searchTerms}",
+        "templated": "true"
+    }))
     return ET.tostring(feed, encoding="utf-8", xml_declaration=True)
+def generate_search_feed(query: str, results):
+    ns = "http://www.w3.org/2005/Atom"
+    ET.register_namespace("", ns)
+    feed = ET.Element("feed", xmlns=ns)
+    ET.SubElement(feed, "title").text = f"Search results for '{query}'"
+    ET.SubElement(feed, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
+    for title, url in results:
+        entry = ET.SubElement(feed, "entry")
+        ET.SubElement(entry, "title").text = title
+        ET.SubElement(entry, "id").text = url
+        ET.SubElement(entry, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
+        ET.SubElement(entry, "link", {
+            "rel": "http://opds-spec.org/acquisition",
+            "href": f"/download?url={quote(url, safe='')}",
+            "type": "application/fb2+xml"
+        })
+    return ET.tostring(feed, encoding="utf-8", xml_declaration=True)
 # ========== Routes ==========
+@app.get("/opds", include_in_schema=False)
+def opds_root() -> Response:
+    xml_data = generate_root_feed()
+    return Response(content=xml_data, media_type="application/atom+xml")
+@app.get("/opds/search")
+def opds_search(q: str = Query(..., description="Search query")) -> Response:
+    results = duckduckgo_search(q)
+    xml_data = generate_search_feed(q, results)
+    return Response(content=xml_data, media_type="application/atom+xml")
 @app.get("/download")
 def download_fb2(url: str) -> Response:
     res.raise_for_status()
     soup = BeautifulSoup(res.text, "html.parser")
     title = soup.title.string.strip() if soup.title and soup.title.string else "article"
+    body = str(soup.body)
+    fb2 = html_to_fb2(title, body)
     filename = f"{quote(title, safe='').replace('%20','_')[:30]}.fb2"
     return Response(
         content=fb2,