victor HF Staff commited on
Commit
4424462
·
1 Parent(s): 9d978bc

Refactor search_web function to enhance formatting for news and search results; improve date handling and source extraction.

Browse files
Files changed (1) hide show
  1. app.py +28 -20
app.py CHANGED
@@ -146,28 +146,36 @@ async def search_web(query: str, search_type: str = "search", num_results: Optio
146
 
147
  successful_extractions += 1
148
 
149
- # Parse and format date
150
- try:
151
- # For news results, date is in 'date' field; for search results, it might be in 'snippet'
152
- date_str = meta.get("date", "")
153
- if date_str:
154
- date_iso = dateparser.parse(date_str, fuzzy=True).strftime("%Y-%m-%d")
155
- else:
 
 
 
156
  date_iso = "Unknown"
157
- except Exception:
158
- date_iso = "Unknown"
159
-
160
- # Format the chunk
161
- # For search results, source might be in 'displayLink' or domain
162
- source = meta.get('source', meta.get('displayLink', meta['link'].split('/')[2]))
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
- chunk = (
165
- f"## {meta['title']}\n"
166
- f"**Source:** {source} "
167
- f"**Date:** {date_iso}\n"
168
- f"**URL:** {meta['link']}\n\n"
169
- f"{body.strip()}\n"
170
- )
171
  chunks.append(chunk)
172
 
173
  if not chunks:
 
146
 
147
  successful_extractions += 1
148
 
149
+ # Format the chunk based on search type
150
+ if search_type == "news":
151
+ # News results have date and source
152
+ try:
153
+ date_str = meta.get("date", "")
154
+ if date_str:
155
+ date_iso = dateparser.parse(date_str, fuzzy=True).strftime("%Y-%m-%d")
156
+ else:
157
+ date_iso = "Unknown"
158
+ except Exception:
159
  date_iso = "Unknown"
160
+
161
+ chunk = (
162
+ f"## {meta['title']}\n"
163
+ f"**Source:** {meta.get('source', 'Unknown')} "
164
+ f"**Date:** {date_iso}\n"
165
+ f"**URL:** {meta['link']}\n\n"
166
+ f"{body.strip()}\n"
167
+ )
168
+ else:
169
+ # Search results don't have date/source but have domain
170
+ domain = meta['link'].split('/')[2].replace('www.', '')
171
+
172
+ chunk = (
173
+ f"## {meta['title']}\n"
174
+ f"**Domain:** {domain}\n"
175
+ f"**URL:** {meta['link']}\n\n"
176
+ f"{body.strip()}\n"
177
+ )
178
 
 
 
 
 
 
 
 
179
  chunks.append(chunk)
180
 
181
  if not chunks: