Refactor search_web function for improved readability: format parameters, enhance example usage, and streamline payload preparation.
Browse files
app.py
CHANGED
@@ -39,7 +39,9 @@ limiter = MovingWindowRateLimiter(storage)
|
|
39 |
rate_limit = parse("200/hour")
|
40 |
|
41 |
|
42 |
-
async def search_web(
|
|
|
|
|
43 |
"""
|
44 |
Search the web for information or fresh news, returning extracted content.
|
45 |
|
@@ -74,7 +76,7 @@ async def search_web(query: str, search_type: str = "search", num_results: Optio
|
|
74 |
Returns error message if API key is missing or search fails.
|
75 |
|
76 |
Examples:
|
77 |
-
- search_web("OpenAI GPT-5", "news"
|
78 |
- search_web("python tutorial", "search") - Get 4 general results about Python (default count)
|
79 |
- search_web("stock market today", "news", 10) - Get 10 news articles about today's market
|
80 |
- search_web("machine learning basics") - Get 4 general search results (all defaults)
|
@@ -86,7 +88,7 @@ async def search_web(query: str, search_type: str = "search", num_results: Optio
|
|
86 |
if num_results is None:
|
87 |
num_results = 4
|
88 |
num_results = max(1, min(20, num_results))
|
89 |
-
|
90 |
# Validate search_type
|
91 |
if search_type not in ["search", "news"]:
|
92 |
search_type = "search"
|
@@ -97,14 +99,16 @@ async def search_web(query: str, search_type: str = "search", num_results: Optio
|
|
97 |
return "Error: Rate limit exceeded. Please try again later (limit: 200 requests per hour)."
|
98 |
|
99 |
# Select endpoint based on search type
|
100 |
-
endpoint =
|
101 |
-
|
|
|
|
|
102 |
# Prepare payload
|
103 |
payload = {"q": query, "num": num_results}
|
104 |
if search_type == "news":
|
105 |
payload["type"] = "news"
|
106 |
payload["page"] = 1
|
107 |
-
|
108 |
async with httpx.AsyncClient(timeout=15) as client:
|
109 |
resp = await client.post(endpoint, headers=HEADERS, json=payload)
|
110 |
|
@@ -116,11 +120,9 @@ async def search_web(query: str, search_type: str = "search", num_results: Optio
|
|
116 |
results = resp.json().get("news", [])
|
117 |
else:
|
118 |
results = resp.json().get("organic", [])
|
119 |
-
|
120 |
if not results:
|
121 |
-
return
|
122 |
-
f"No {search_type} results found for query: '{query}'. Try a different search term or search type."
|
123 |
-
)
|
124 |
|
125 |
# Fetch HTML content concurrently
|
126 |
urls = [r["link"] for r in results]
|
@@ -152,12 +154,14 @@ async def search_web(query: str, search_type: str = "search", num_results: Optio
|
|
152 |
try:
|
153 |
date_str = meta.get("date", "")
|
154 |
if date_str:
|
155 |
-
date_iso = dateparser.parse(date_str, fuzzy=True).strftime(
|
|
|
|
|
156 |
else:
|
157 |
date_iso = "Unknown"
|
158 |
except Exception:
|
159 |
date_iso = "Unknown"
|
160 |
-
|
161 |
chunk = (
|
162 |
f"## {meta['title']}\n"
|
163 |
f"**Source:** {meta.get('source', 'Unknown')} "
|
@@ -167,15 +171,15 @@ async def search_web(query: str, search_type: str = "search", num_results: Optio
|
|
167 |
)
|
168 |
else:
|
169 |
# Search results don't have date/source but have domain
|
170 |
-
domain = meta[
|
171 |
-
|
172 |
chunk = (
|
173 |
f"## {meta['title']}\n"
|
174 |
f"**Domain:** {domain}\n"
|
175 |
f"**URL:** {meta['link']}\n\n"
|
176 |
f"{body.strip()}\n"
|
177 |
)
|
178 |
-
|
179 |
chunks.append(chunk)
|
180 |
|
181 |
if not chunks:
|
@@ -222,7 +226,7 @@ with gr.Blocks(title="Web Search MCP Server") as demo:
|
|
222 |
label="Search Type",
|
223 |
info="Choose search type",
|
224 |
)
|
225 |
-
|
226 |
with gr.Row():
|
227 |
num_results_input = gr.Slider(
|
228 |
minimum=1,
|
@@ -259,7 +263,9 @@ with gr.Blocks(title="Web Search MCP Server") as demo:
|
|
259 |
)
|
260 |
|
261 |
search_button.click(
|
262 |
-
fn=search_web,
|
|
|
|
|
263 |
)
|
264 |
|
265 |
|
|
|
39 |
rate_limit = parse("200/hour")
|
40 |
|
41 |
|
42 |
+
async def search_web(
|
43 |
+
query: str, search_type: str = "search", num_results: Optional[int] = 4
|
44 |
+
) -> str:
|
45 |
"""
|
46 |
Search the web for information or fresh news, returning extracted content.
|
47 |
|
|
|
76 |
Returns error message if API key is missing or search fails.
|
77 |
|
78 |
Examples:
|
79 |
+
- search_web("OpenAI GPT-5", "news") - Get 5 fresh news articles about OpenAI
|
80 |
- search_web("python tutorial", "search") - Get 4 general results about Python (default count)
|
81 |
- search_web("stock market today", "news", 10) - Get 10 news articles about today's market
|
82 |
- search_web("machine learning basics") - Get 4 general search results (all defaults)
|
|
|
88 |
if num_results is None:
|
89 |
num_results = 4
|
90 |
num_results = max(1, min(20, num_results))
|
91 |
+
|
92 |
# Validate search_type
|
93 |
if search_type not in ["search", "news"]:
|
94 |
search_type = "search"
|
|
|
99 |
return "Error: Rate limit exceeded. Please try again later (limit: 200 requests per hour)."
|
100 |
|
101 |
# Select endpoint based on search type
|
102 |
+
endpoint = (
|
103 |
+
SERPER_NEWS_ENDPOINT if search_type == "news" else SERPER_SEARCH_ENDPOINT
|
104 |
+
)
|
105 |
+
|
106 |
# Prepare payload
|
107 |
payload = {"q": query, "num": num_results}
|
108 |
if search_type == "news":
|
109 |
payload["type"] = "news"
|
110 |
payload["page"] = 1
|
111 |
+
|
112 |
async with httpx.AsyncClient(timeout=15) as client:
|
113 |
resp = await client.post(endpoint, headers=HEADERS, json=payload)
|
114 |
|
|
|
120 |
results = resp.json().get("news", [])
|
121 |
else:
|
122 |
results = resp.json().get("organic", [])
|
123 |
+
|
124 |
if not results:
|
125 |
+
return f"No {search_type} results found for query: '{query}'. Try a different search term or search type."
|
|
|
|
|
126 |
|
127 |
# Fetch HTML content concurrently
|
128 |
urls = [r["link"] for r in results]
|
|
|
154 |
try:
|
155 |
date_str = meta.get("date", "")
|
156 |
if date_str:
|
157 |
+
date_iso = dateparser.parse(date_str, fuzzy=True).strftime(
|
158 |
+
"%Y-%m-%d"
|
159 |
+
)
|
160 |
else:
|
161 |
date_iso = "Unknown"
|
162 |
except Exception:
|
163 |
date_iso = "Unknown"
|
164 |
+
|
165 |
chunk = (
|
166 |
f"## {meta['title']}\n"
|
167 |
f"**Source:** {meta.get('source', 'Unknown')} "
|
|
|
171 |
)
|
172 |
else:
|
173 |
# Search results don't have date/source but have domain
|
174 |
+
domain = meta["link"].split("/")[2].replace("www.", "")
|
175 |
+
|
176 |
chunk = (
|
177 |
f"## {meta['title']}\n"
|
178 |
f"**Domain:** {domain}\n"
|
179 |
f"**URL:** {meta['link']}\n\n"
|
180 |
f"{body.strip()}\n"
|
181 |
)
|
182 |
+
|
183 |
chunks.append(chunk)
|
184 |
|
185 |
if not chunks:
|
|
|
226 |
label="Search Type",
|
227 |
info="Choose search type",
|
228 |
)
|
229 |
+
|
230 |
with gr.Row():
|
231 |
num_results_input = gr.Slider(
|
232 |
minimum=1,
|
|
|
263 |
)
|
264 |
|
265 |
search_button.click(
|
266 |
+
fn=search_web,
|
267 |
+
inputs=[query_input, search_type_input, num_results_input],
|
268 |
+
outputs=output,
|
269 |
)
|
270 |
|
271 |
|