Refactor search_web function to enhance formatting for news and search results; improve date handling and source extraction.
Browse files
app.py
CHANGED
@@ -146,28 +146,36 @@ async def search_web(query: str, search_type: str = "search", num_results: Optio
|
|
146 |
|
147 |
successful_extractions += 1
|
148 |
|
149 |
-
#
|
150 |
-
|
151 |
-
#
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
|
|
|
|
|
|
156 |
date_iso = "Unknown"
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
|
164 |
-
chunk = (
|
165 |
-
f"## {meta['title']}\n"
|
166 |
-
f"**Source:** {source} "
|
167 |
-
f"**Date:** {date_iso}\n"
|
168 |
-
f"**URL:** {meta['link']}\n\n"
|
169 |
-
f"{body.strip()}\n"
|
170 |
-
)
|
171 |
chunks.append(chunk)
|
172 |
|
173 |
if not chunks:
|
|
|
146 |
|
147 |
successful_extractions += 1
|
148 |
|
149 |
+
# Format the chunk based on search type
|
150 |
+
if search_type == "news":
|
151 |
+
# News results have date and source
|
152 |
+
try:
|
153 |
+
date_str = meta.get("date", "")
|
154 |
+
if date_str:
|
155 |
+
date_iso = dateparser.parse(date_str, fuzzy=True).strftime("%Y-%m-%d")
|
156 |
+
else:
|
157 |
+
date_iso = "Unknown"
|
158 |
+
except Exception:
|
159 |
date_iso = "Unknown"
|
160 |
+
|
161 |
+
chunk = (
|
162 |
+
f"## {meta['title']}\n"
|
163 |
+
f"**Source:** {meta.get('source', 'Unknown')} "
|
164 |
+
f"**Date:** {date_iso}\n"
|
165 |
+
f"**URL:** {meta['link']}\n\n"
|
166 |
+
f"{body.strip()}\n"
|
167 |
+
)
|
168 |
+
else:
|
169 |
+
# Search results don't have date/source but have domain
|
170 |
+
domain = meta['link'].split('/')[2].replace('www.', '')
|
171 |
+
|
172 |
+
chunk = (
|
173 |
+
f"## {meta['title']}\n"
|
174 |
+
f"**Domain:** {domain}\n"
|
175 |
+
f"**URL:** {meta['link']}\n\n"
|
176 |
+
f"{body.strip()}\n"
|
177 |
+
)
|
178 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
chunks.append(chunk)
|
180 |
|
181 |
if not chunks:
|