victor HF Staff commited on
Commit
36a4f5e
·
1 Parent(s): 4424462

Refactor search_web function for improved readability: format parameters, enhance example usage, and streamline payload preparation.

Browse files
Files changed (1) hide show
  1. app.py +23 -17
app.py CHANGED
@@ -39,7 +39,9 @@ limiter = MovingWindowRateLimiter(storage)
39
  rate_limit = parse("200/hour")
40
 
41
 
42
- async def search_web(query: str, search_type: str = "search", num_results: Optional[int] = 4) -> str:
 
 
43
  """
44
  Search the web for information or fresh news, returning extracted content.
45
 
@@ -74,7 +76,7 @@ async def search_web(query: str, search_type: str = "search", num_results: Optio
74
  Returns error message if API key is missing or search fails.
75
 
76
  Examples:
77
- - search_web("OpenAI GPT-5", "news", 5) - Get 5 fresh news articles about OpenAI
78
  - search_web("python tutorial", "search") - Get 4 general results about Python (default count)
79
  - search_web("stock market today", "news", 10) - Get 10 news articles about today's market
80
  - search_web("machine learning basics") - Get 4 general search results (all defaults)
@@ -86,7 +88,7 @@ async def search_web(query: str, search_type: str = "search", num_results: Optio
86
  if num_results is None:
87
  num_results = 4
88
  num_results = max(1, min(20, num_results))
89
-
90
  # Validate search_type
91
  if search_type not in ["search", "news"]:
92
  search_type = "search"
@@ -97,14 +99,16 @@ async def search_web(query: str, search_type: str = "search", num_results: Optio
97
  return "Error: Rate limit exceeded. Please try again later (limit: 200 requests per hour)."
98
 
99
  # Select endpoint based on search type
100
- endpoint = SERPER_NEWS_ENDPOINT if search_type == "news" else SERPER_SEARCH_ENDPOINT
101
-
 
 
102
  # Prepare payload
103
  payload = {"q": query, "num": num_results}
104
  if search_type == "news":
105
  payload["type"] = "news"
106
  payload["page"] = 1
107
-
108
  async with httpx.AsyncClient(timeout=15) as client:
109
  resp = await client.post(endpoint, headers=HEADERS, json=payload)
110
 
@@ -116,11 +120,9 @@ async def search_web(query: str, search_type: str = "search", num_results: Optio
116
  results = resp.json().get("news", [])
117
  else:
118
  results = resp.json().get("organic", [])
119
-
120
  if not results:
121
- return (
122
- f"No {search_type} results found for query: '{query}'. Try a different search term or search type."
123
- )
124
 
125
  # Fetch HTML content concurrently
126
  urls = [r["link"] for r in results]
@@ -152,12 +154,14 @@ async def search_web(query: str, search_type: str = "search", num_results: Optio
152
  try:
153
  date_str = meta.get("date", "")
154
  if date_str:
155
- date_iso = dateparser.parse(date_str, fuzzy=True).strftime("%Y-%m-%d")
 
 
156
  else:
157
  date_iso = "Unknown"
158
  except Exception:
159
  date_iso = "Unknown"
160
-
161
  chunk = (
162
  f"## {meta['title']}\n"
163
  f"**Source:** {meta.get('source', 'Unknown')} "
@@ -167,15 +171,15 @@ async def search_web(query: str, search_type: str = "search", num_results: Optio
167
  )
168
  else:
169
  # Search results don't have date/source but have domain
170
- domain = meta['link'].split('/')[2].replace('www.', '')
171
-
172
  chunk = (
173
  f"## {meta['title']}\n"
174
  f"**Domain:** {domain}\n"
175
  f"**URL:** {meta['link']}\n\n"
176
  f"{body.strip()}\n"
177
  )
178
-
179
  chunks.append(chunk)
180
 
181
  if not chunks:
@@ -222,7 +226,7 @@ with gr.Blocks(title="Web Search MCP Server") as demo:
222
  label="Search Type",
223
  info="Choose search type",
224
  )
225
-
226
  with gr.Row():
227
  num_results_input = gr.Slider(
228
  minimum=1,
@@ -259,7 +263,9 @@ with gr.Blocks(title="Web Search MCP Server") as demo:
259
  )
260
 
261
  search_button.click(
262
- fn=search_web, inputs=[query_input, search_type_input, num_results_input], outputs=output
 
 
263
  )
264
 
265
 
 
39
  rate_limit = parse("200/hour")
40
 
41
 
42
+ async def search_web(
43
+ query: str, search_type: str = "search", num_results: Optional[int] = 4
44
+ ) -> str:
45
  """
46
  Search the web for information or fresh news, returning extracted content.
47
 
 
76
  Returns error message if API key is missing or search fails.
77
 
78
  Examples:
79
+ - search_web("OpenAI GPT-5", "news") - Get 5 fresh news articles about OpenAI
80
  - search_web("python tutorial", "search") - Get 4 general results about Python (default count)
81
  - search_web("stock market today", "news", 10) - Get 10 news articles about today's market
82
  - search_web("machine learning basics") - Get 4 general search results (all defaults)
 
88
  if num_results is None:
89
  num_results = 4
90
  num_results = max(1, min(20, num_results))
91
+
92
  # Validate search_type
93
  if search_type not in ["search", "news"]:
94
  search_type = "search"
 
99
  return "Error: Rate limit exceeded. Please try again later (limit: 200 requests per hour)."
100
 
101
  # Select endpoint based on search type
102
+ endpoint = (
103
+ SERPER_NEWS_ENDPOINT if search_type == "news" else SERPER_SEARCH_ENDPOINT
104
+ )
105
+
106
  # Prepare payload
107
  payload = {"q": query, "num": num_results}
108
  if search_type == "news":
109
  payload["type"] = "news"
110
  payload["page"] = 1
111
+
112
  async with httpx.AsyncClient(timeout=15) as client:
113
  resp = await client.post(endpoint, headers=HEADERS, json=payload)
114
 
 
120
  results = resp.json().get("news", [])
121
  else:
122
  results = resp.json().get("organic", [])
123
+
124
  if not results:
125
+ return f"No {search_type} results found for query: '{query}'. Try a different search term or search type."
 
 
126
 
127
  # Fetch HTML content concurrently
128
  urls = [r["link"] for r in results]
 
154
  try:
155
  date_str = meta.get("date", "")
156
  if date_str:
157
+ date_iso = dateparser.parse(date_str, fuzzy=True).strftime(
158
+ "%Y-%m-%d"
159
+ )
160
  else:
161
  date_iso = "Unknown"
162
  except Exception:
163
  date_iso = "Unknown"
164
+
165
  chunk = (
166
  f"## {meta['title']}\n"
167
  f"**Source:** {meta.get('source', 'Unknown')} "
 
171
  )
172
  else:
173
  # Search results don't have date/source but have domain
174
+ domain = meta["link"].split("/")[2].replace("www.", "")
175
+
176
  chunk = (
177
  f"## {meta['title']}\n"
178
  f"**Domain:** {domain}\n"
179
  f"**URL:** {meta['link']}\n\n"
180
  f"{body.strip()}\n"
181
  )
182
+
183
  chunks.append(chunk)
184
 
185
  if not chunks:
 
226
  label="Search Type",
227
  info="Choose search type",
228
  )
229
+
230
  with gr.Row():
231
  num_results_input = gr.Slider(
232
  minimum=1,
 
263
  )
264
 
265
  search_button.click(
266
+ fn=search_web,
267
+ inputs=[query_input, search_type_input, num_results_input],
268
+ outputs=output,
269
  )
270
 
271