Ivan000 commited on
Commit
87c1bc6
·
verified ·
1 Parent(s): b93b826

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +64 -67
main.py CHANGED
@@ -3,19 +3,17 @@
3
  from fastapi import FastAPI, Query
4
  from fastapi.responses import Response
5
  import requests
 
 
6
  import xml.etree.ElementTree as ET
7
  from datetime import datetime
8
- from typing import Optional
9
 
10
  app = FastAPI()
11
 
12
  # ========== FB2 Generator ==========
13
- from bs4 import BeautifulSoup
14
- from urllib.parse import quote
15
-
16
  def html_to_fb2(title: str, body: str) -> str:
17
  clean_text = BeautifulSoup(body, "html.parser").get_text(separator="\n")
18
- return f"""<?xml version='1.0' encoding='utf-8'?>
19
  <FictionBook xmlns:xlink='http://www.w3.org/1999/xlink'>
20
  <description>
21
  <title-info>
@@ -32,81 +30,79 @@ def html_to_fb2(title: str, body: str) -> str:
32
  </section>
33
  </body>
34
  </FictionBook>"""
 
35
 
36
- # ========== DuckDuckGo JSON Search ==========
37
  def duckduckgo_search(query: str):
38
- api_url = "https://api.duckduckgo.com/"
39
- params = {
40
- "q": query,
41
- "format": "json",
42
- "no_html": 1,
43
- "skip_disambig": 1
44
- }
45
- res = requests.get(api_url, params=params, headers={"User-Agent": "Mozilla/5.0"}, timeout=10)
46
  res.raise_for_status()
47
- data = res.json()
48
  results = []
49
- def extract_topics(topics):
50
- for item in topics:
51
- if "FirstURL" in item and "Text" in item:
52
- results.append((item["Text"], item["FirstURL"]))
53
- elif "Topics" in item:
54
- extract_topics(item["Topics"])
55
- extract_topics(data.get("RelatedTopics", []))
56
- return results[:10]
57
-
58
- # ========== OPDS Feed Generator ==========
59
- def create_feed(entries: list, q: Optional[str]) -> bytes:
 
60
  ns = "http://www.w3.org/2005/Atom"
61
- opds_ns = "http://opds-spec.org/2010/catalog"
62
  ET.register_namespace("", ns)
63
- ET.register_namespace("opds", opds_ns)
64
-
65
  feed = ET.Element("feed", xmlns=ns)
66
- ET.SubElement(feed, "id").text = "urn:uuid:duckopds-catalog"
67
  ET.SubElement(feed, "title").text = "DuckDuckGo OPDS Catalog"
68
  ET.SubElement(feed, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
69
 
70
- if not q:
71
- ET.SubElement(feed, "link", {
72
- "rel": "search",
73
- "type": "application/atom+xml",
74
- "href": "/opds?q={searchTerms}",
75
- "templated": "true"
76
- })
77
-
78
- for entry_info in entries:
79
- entry = ET.SubElement(feed, "entry")
80
- ET.SubElement(entry, "id").text = entry_info['id']
81
- ET.SubElement(entry, "title").text = entry_info['title']
82
- ET.SubElement(entry, "updated").text = entry_info['updated']
83
- ET.SubElement(entry, "link", entry_info['link'])
84
- ET.SubElement(entry, "content", {"type": "text"}).text = entry_info['title']
85
- ET.SubElement(entry, "author").append(ET.Element("name", text="DuckOPDS"))
86
 
87
  return ET.tostring(feed, encoding="utf-8", xml_declaration=True)
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  # ========== Routes ==========
90
- @app.get("/opds")
91
- def opds(q: Optional[str] = Query(None, description="Search query")) -> Response:
92
- entries = []
93
- kind = "acquisition"
94
- if q:
95
- results = duckduckgo_search(q)
96
- for title, url in results:
97
- entries.append({
98
- 'id': url,
99
- 'title': title,
100
- 'updated': datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
101
- 'link': {
102
- 'rel': 'http://opds-spec.org/acquisition',
103
- 'href': f"/download?url={quote(url, safe='')}",
104
- 'type': 'application/fb2+xml'
105
- }
106
- })
107
- xml_data = create_feed(entries, q)
108
- return Response(content=xml_data,
109
- media_type="application/atom+xml;charset=utf-8")
110
 
111
  @app.get("/download")
112
  def download_fb2(url: str) -> Response:
@@ -114,7 +110,8 @@ def download_fb2(url: str) -> Response:
114
  res.raise_for_status()
115
  soup = BeautifulSoup(res.text, "html.parser")
116
  title = soup.title.string.strip() if soup.title and soup.title.string else "article"
117
- fb2 = html_to_fb2(title, str(soup.body))
 
118
  filename = f"{quote(title, safe='').replace('%20','_')[:30]}.fb2"
119
  return Response(
120
  content=fb2,
 
3
  from fastapi import FastAPI, Query
4
  from fastapi.responses import Response
5
  import requests
6
+ from bs4 import BeautifulSoup
7
+ from urllib.parse import quote
8
  import xml.etree.ElementTree as ET
9
  from datetime import datetime
 
10
 
11
  app = FastAPI()
12
 
13
  # ========== FB2 Generator ==========
 
 
 
14
  def html_to_fb2(title: str, body: str) -> str:
15
  clean_text = BeautifulSoup(body, "html.parser").get_text(separator="\n")
16
+ fb2 = f"""<?xml version='1.0' encoding='utf-8'?>
17
  <FictionBook xmlns:xlink='http://www.w3.org/1999/xlink'>
18
  <description>
19
  <title-info>
 
30
  </section>
31
  </body>
32
  </FictionBook>"""
33
+ return fb2
34
 
35
+ # ========== DuckDuckGo Search ==========
36
  def duckduckgo_search(query: str):
37
+ res = requests.post(
38
+ "https://html.duckduckgo.com/html/",
39
+ data={"q": query},
40
+ headers={"User-Agent": "Mozilla/5.0"},
41
+ timeout=10
42
+ )
 
 
43
  res.raise_for_status()
44
+ soup = BeautifulSoup(res.text, "html.parser")
45
  results = []
46
+ for a in soup.select("a.result__a"):
47
+ href = a.get("href")
48
+ title = a.get_text()
49
+ if href and title:
50
+ results.append((title.strip(), href))
51
+ if len(results) >= 10:
52
+ break
53
+ return results
54
+
55
+ # ========== OPDS Feed Generators ==========
56
+
57
+ def generate_root_feed():
58
  ns = "http://www.w3.org/2005/Atom"
 
59
  ET.register_namespace("", ns)
 
 
60
  feed = ET.Element("feed", xmlns=ns)
 
61
  ET.SubElement(feed, "title").text = "DuckDuckGo OPDS Catalog"
62
  ET.SubElement(feed, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
63
 
64
+ # Relative search link (OpenSearch template)
65
+ feed.append(ET.Element("link", {
66
+ "rel": "search",
67
+ "type": "application/atom+xml",
68
+ "href": "/opds/search?q={searchTerms}",
69
+ "templated": "true"
70
+ }))
 
 
 
 
 
 
 
 
 
71
 
72
  return ET.tostring(feed, encoding="utf-8", xml_declaration=True)
73
 
74
+
75
+ def generate_search_feed(query: str, results):
76
+ ns = "http://www.w3.org/2005/Atom"
77
+ ET.register_namespace("", ns)
78
+ feed = ET.Element("feed", xmlns=ns)
79
+ ET.SubElement(feed, "title").text = f"Search results for '{query}'"
80
+ ET.SubElement(feed, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
81
+
82
+ for title, url in results:
83
+ entry = ET.SubElement(feed, "entry")
84
+ ET.SubElement(entry, "title").text = title
85
+ ET.SubElement(entry, "id").text = url
86
+ ET.SubElement(entry, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
87
+ ET.SubElement(entry, "link", {
88
+ "rel": "http://opds-spec.org/acquisition",
89
+ "href": f"/download?url={quote(url, safe='')}",
90
+ "type": "application/fb2+xml"
91
+ })
92
+ return ET.tostring(feed, encoding="utf-8", xml_declaration=True)
93
+
94
  # ========== Routes ==========
95
+
96
+ @app.get("/opds", include_in_schema=False)
97
+ def opds_root() -> Response:
98
+ xml_data = generate_root_feed()
99
+ return Response(content=xml_data, media_type="application/atom+xml")
100
+
101
+ @app.get("/opds/search")
102
+ def opds_search(q: str = Query(..., description="Search query")) -> Response:
103
+ results = duckduckgo_search(q)
104
+ xml_data = generate_search_feed(q, results)
105
+ return Response(content=xml_data, media_type="application/atom+xml")
 
 
 
 
 
 
 
 
 
106
 
107
  @app.get("/download")
108
  def download_fb2(url: str) -> Response:
 
110
  res.raise_for_status()
111
  soup = BeautifulSoup(res.text, "html.parser")
112
  title = soup.title.string.strip() if soup.title and soup.title.string else "article"
113
+ body = str(soup.body)
114
+ fb2 = html_to_fb2(title, body)
115
  filename = f"{quote(title, safe='').replace('%20','_')[:30]}.fb2"
116
  return Response(
117
  content=fb2,