Ivan000 commited on
Commit
528d174
·
verified ·
1 Parent(s): 89e8c7a

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +57 -102
main.py CHANGED
@@ -14,7 +14,7 @@ app = FastAPI()
14
  # ========== FB2 Generator ==========
15
  def html_to_fb2(title: str, body: str) -> str:
16
  clean_text = BeautifulSoup(body, "html.parser").get_text(separator="\n")
17
- fb2 = f"""<?xml version='1.0' encoding='utf-8'?>
18
  <FictionBook xmlns:xlink='http://www.w3.org/1999/xlink'>
19
  <description>
20
  <title-info>
@@ -30,8 +30,7 @@ def html_to_fb2(title: str, body: str) -> str:
30
  <p>{clean_text}</p>
31
  </section>
32
  </body>
33
- </FictionBook>"""
34
- return fb2
35
 
36
  # ========== DuckDuckGo Search ==========
37
  def duckduckgo_search(query: str):
@@ -53,114 +52,71 @@ def duckduckgo_search(query: str):
53
  break
54
  return results
55
 
56
- # ========== OPDS Feed Generators ==========
57
- def generate_root_feed():
58
  ns = "http://www.w3.org/2005/Atom"
59
  ET.register_namespace("", ns)
60
  feed = ET.Element("feed", xmlns=ns)
61
  ET.SubElement(feed, "id").text = "urn:uuid:duckopds-catalog"
62
  ET.SubElement(feed, "title").text = "DuckDuckGo OPDS Catalog"
63
  ET.SubElement(feed, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
64
-
65
- # Entry: Search Section
66
- entry_search = ET.SubElement(feed, "entry")
67
- ET.SubElement(entry_search, "id").text = "urn:uuid:duckopds-search-section"
68
- ET.SubElement(entry_search, "title").text = "Search"
69
- ET.SubElement(entry_search, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
70
- ET.SubElement(entry_search, "link", {
71
- "rel": "subsection",
72
- "href": "/opds/search",
73
- "type": "application/atom+xml;profile=opds-catalog;kind=acquisition"
74
- })
75
-
76
- # Entry: Cached Section
77
- entry_cached = ET.SubElement(feed, "entry")
78
- ET.SubElement(entry_cached, "id").text = "urn:uuid:duckopds-cached-section"
79
- ET.SubElement(entry_cached, "title").text = "Cached"
80
- ET.SubElement(entry_cached, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
81
- ET.SubElement(entry_cached, "link", {
82
- "rel": "subsection",
83
- "href": "/opds/cached",
84
- "type": "application/atom+xml;profile=opds-catalog;kind=navigation"
85
- })
86
-
87
- return ET.tostring(feed, encoding="utf-8", xml_declaration=True)
88
-
89
-
90
- def generate_search_form_feed():
91
- ns = "http://www.w3.org/2005/Atom"
92
- ET.register_namespace("", ns)
93
- feed = ET.Element("feed", xmlns=ns)
94
- ET.SubElement(feed, "id").text = "urn:uuid:duckopds-search-form"
95
- ET.SubElement(feed, "title").text = "Search"
96
- ET.SubElement(feed, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
97
- # Templated search link
98
- feed.append(ET.Element("link", {
99
- "rel": "search",
100
- "type": "application/atom+xml;profile=opds-catalog;kind=acquisition",
101
- "href": "/opds/search?q={searchTerms}",
102
- "templated": "true"
103
- }))
104
- return ET.tostring(feed, encoding="utf-8", xml_declaration=True)
105
-
106
-
107
- def generate_search_results_feed(query: str, results):
108
- ns = "http://www.w3.org/2005/Atom"
109
- ET.register_namespace("", ns)
110
- feed = ET.Element("feed", xmlns=ns)
111
- ET.SubElement(feed, "id").text = f"urn:uuid:duckopds-search-{quote(query)}"
112
- ET.SubElement(feed, "title").text = f"Search results for '{query}'"
113
- ET.SubElement(feed, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
114
- for title, url in results:
115
  entry = ET.SubElement(feed, "entry")
116
- ET.SubElement(entry, "id").text = url
117
- ET.SubElement(entry, "title").text = title
118
- ET.SubElement(entry, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
119
- ET.SubElement(entry, "link", {
120
- "rel": "http://opds-spec.org/acquisition",
121
- "href": f"/download?url={quote(url, safe='')}",
122
- "type": "application/fb2+xml"
123
- })
124
  return ET.tostring(feed, encoding="utf-8", xml_declaration=True)
125
 
126
  # ========== Routes ==========
127
  @app.get("/opds", include_in_schema=False)
128
- def opds_root() -> Response:
129
- xml_data = generate_root_feed()
130
- return Response(
131
- content=xml_data,
132
- media_type="application/atom+xml;profile=opds-catalog;kind=navigation"
133
- )
134
-
135
- @app.get("/opds/search")
136
- def opds_search(
137
- q: Optional[str] = Query(None, description="Search query"),
138
- searchType: Optional[str] = Query(None, alias="searchType")
139
- ) -> Response:
140
- if not q:
141
- xml_data = generate_search_form_feed()
142
- return Response(content=xml_data,
143
- media_type="application/atom+xml;profile=opds-catalog;kind=search")
144
- # perform actual search for q
145
- results = duckduckgo_search(q)
146
- xml_data = generate_search_results_feed(q, results)
147
- return Response(
148
- content=xml_data,
149
- media_type="application/atom+xml;profile=opds-catalog;kind=acquisition"
150
- )
151
-
152
- @app.get("/opds/cached")
153
- def opds_cached() -> Response:
154
- # placeholder empty feed
155
- feed = ET.Element("feed", xmlns="http://www.w3.org/2005/Atom")
156
- ET.SubElement(feed, "id").text = "urn:uuid:duckopds-cached"
157
- ET.SubElement(feed, "title").text = "Cached Items"
158
- ET.SubElement(feed, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
159
- xml_data = ET.tostring(feed, encoding="utf-8", xml_declaration=True)
160
- return Response(
161
- content=xml_data,
162
- media_type="application/atom+xml;profile=opds-catalog;kind=navigation"
163
- )
 
 
 
 
 
 
 
 
 
 
 
164
 
165
  @app.get("/download")
166
  def download_fb2(url: str) -> Response:
@@ -168,8 +124,7 @@ def download_fb2(url: str) -> Response:
168
  res.raise_for_status()
169
  soup = BeautifulSoup(res.text, "html.parser")
170
  title = soup.title.string.strip() if soup.title and soup.title.string else "article"
171
- body = str(soup.body)
172
- fb2 = html_to_fb2(title, body)
173
  filename = f"{quote(title, safe='').replace('%20','_')[:30]}.fb2"
174
  return Response(
175
  content=fb2,
 
14
  # ========== FB2 Generator ==========
15
  def html_to_fb2(title: str, body: str) -> str:
16
  clean_text = BeautifulSoup(body, "html.parser").get_text(separator="\n")
17
+ return f"""<?xml version='1.0' encoding='utf-8'?>
18
  <FictionBook xmlns:xlink='http://www.w3.org/1999/xlink'>
19
  <description>
20
  <title-info>
 
30
  <p>{clean_text}</p>
31
  </section>
32
  </body>
33
+ </FictionBook>"""
 
34
 
35
  # ========== DuckDuckGo Search ==========
36
  def duckduckgo_search(query: str):
 
52
  break
53
  return results
54
 
55
+ # ========== OPDS Feed Generator ==========
56
+ def generate_feed(entries):
57
  ns = "http://www.w3.org/2005/Atom"
58
  ET.register_namespace("", ns)
59
  feed = ET.Element("feed", xmlns=ns)
60
  ET.SubElement(feed, "id").text = "urn:uuid:duckopds-catalog"
61
  ET.SubElement(feed, "title").text = "DuckDuckGo OPDS Catalog"
62
  ET.SubElement(feed, "updated").text = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
63
+ for entry_info in entries:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  entry = ET.SubElement(feed, "entry")
65
+ ET.SubElement(entry, "id").text = entry_info['id']
66
+ ET.SubElement(entry, "title").text = entry_info['title']
67
+ ET.SubElement(entry, "updated").text = entry_info['updated']
68
+ ET.SubElement(entry, "link", entry_info['link'])
 
 
 
 
69
  return ET.tostring(feed, encoding="utf-8", xml_declaration=True)
70
 
71
  # ========== Routes ==========
72
  @app.get("/opds", include_in_schema=False)
73
+ def root_opds() -> Response:
74
+ # Only search section
75
+ entries = [{
76
+ 'id': 'urn:uuid:duckopds-search-section',
77
+ 'title': 'Search',
78
+ 'updated': datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
79
+ 'link': {
80
+ 'rel': 'subsection',
81
+ 'href': '/opds', # self endpoint handles search form if no q
82
+ 'type': 'application/atom+xml;profile=opds-catalog;kind=search'
83
+ }
84
+ }]
85
+ xml_data = generate_feed(entries)
86
+ return Response(content=xml_data,
87
+ media_type="application/atom+xml;profile=opds-catalog;kind=navigation")
88
+
89
+ @app.get("/opds")
90
+ def opds_search(q: Optional[str] = Query(None, description="Search query")) -> Response:
91
+ entries = []
92
+ # Always include search entry at top
93
+ entries.append({
94
+ 'id': 'urn:uuid:duckopds-search-section',
95
+ 'title': 'Search',
96
+ 'updated': datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
97
+ 'link': {
98
+ 'rel': 'subsection',
99
+ 'href': '/opds',
100
+ 'type': 'application/atom+xml;profile=opds-catalog;kind=search'
101
+ }
102
+ })
103
+ if q:
104
+ results = duckduckgo_search(q)
105
+ for title, url in results:
106
+ entries.append({
107
+ 'id': url,
108
+ 'title': title,
109
+ 'updated': datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
110
+ 'link': {
111
+ 'rel': 'http://opds-spec.org/acquisition',
112
+ 'href': f"/download?url={quote(url, safe='')}",
113
+ 'type': 'application/fb2+xml'
114
+ }
115
+ })
116
+ xml_data = generate_feed(entries)
117
+ kind = 'acquisition' if q else 'search'
118
+ return Response(content=xml_data,
119
+ media_type=f"application/atom+xml;profile=opds-catalog;kind={kind}")
120
 
121
  @app.get("/download")
122
  def download_fb2(url: str) -> Response:
 
124
  res.raise_for_status()
125
  soup = BeautifulSoup(res.text, "html.parser")
126
  title = soup.title.string.strip() if soup.title and soup.title.string else "article"
127
+ fb2 = html_to_fb2(title, str(soup.body))
 
128
  filename = f"{quote(title, safe='').replace('%20','_')[:30]}.fb2"
129
  return Response(
130
  content=fb2,