|
""" |
|
Web Search MCP Server - Feed LLMs with fresh sources |
|
==================================================== |
|
|
|
Prerequisites |
|
------------- |
|
$ pip install "gradio[mcp]" httpx trafilatura python-dateutil limits |
|
|
|
Environment |
|
----------- |
|
export SERPER_API_KEY="YOUR-KEY-HERE" |
|
|
|
Usage |
|
----- |
|
python app_mcp.py |
|
Then connect to: http://localhost:7860/gradio_api/mcp/sse |
|
""" |
|
|
|
import os |
|
import asyncio |
|
from typing import Optional |
|
import httpx |
|
import trafilatura |
|
import gradio as gr |
|
from dateutil import parser as dateparser |
|
from limits import parse |
|
from limits.aio.storage import MemoryStorage |
|
from limits.aio.strategies import MovingWindowRateLimiter |
|
|
|
|
|
SERPER_API_KEY = os.getenv("SERPER_API_KEY") |
|
SERPER_ENDPOINT = "https://google.serper.dev/news" |
|
HEADERS = {"X-API-KEY": SERPER_API_KEY, "Content-Type": "application/json"} |
|
|
|
|
|
storage = MemoryStorage() |
|
limiter = MovingWindowRateLimiter(storage) |
|
rate_limit = parse("200/hour") |
|
|
|
|
|
async def search_web(query: str, num_results: Optional[int] = 4) -> str: |
|
""" |
|
Search the web for recent news and information, returning extracted content. |
|
|
|
This tool searches for recent news articles related to your query and extracts |
|
the main content from each article, providing you with fresh, relevant information |
|
from the web. |
|
|
|
Args: |
|
query (str): The search query. This is REQUIRED. Examples: "apple inc earnings", |
|
"climate change 2024", "AI developments" |
|
num_results (int): Number of results to fetch. This is OPTIONAL. Default is 4. |
|
Range: 1-20. More results = more context but longer response time. |
|
|
|
Returns: |
|
str: Formatted text containing extracted article content with metadata (title, |
|
source, date, URL, and main text) for each result, separated by dividers. |
|
Returns error message if API key is missing or search fails. |
|
|
|
Examples: |
|
- search_web("OpenAI news", 5) - Get 5 recent news articles about OpenAI |
|
- search_web("python 3.13 features") - Get 4 articles about Python 3.13 |
|
- search_web("stock market today", 10) - Get 10 articles about today's market |
|
""" |
|
if not SERPER_API_KEY: |
|
return "Error: SERPER_API_KEY environment variable is not set. Please set it to use this tool." |
|
|
|
|
|
if num_results is None: |
|
num_results = 4 |
|
num_results = max(1, min(20, num_results)) |
|
|
|
try: |
|
|
|
if not await limiter.hit(rate_limit, "global"): |
|
return "Error: Rate limit exceeded. Please try again later (limit: 200 requests per hour)." |
|
|
|
|
|
payload = {"q": query, "type": "news", "num": num_results, "page": 1} |
|
async with httpx.AsyncClient(timeout=15) as client: |
|
resp = await client.post(SERPER_ENDPOINT, headers=HEADERS, json=payload) |
|
|
|
if resp.status_code != 200: |
|
return f"Error: Search API returned status {resp.status_code}. Please check your API key and try again." |
|
|
|
news_items = resp.json().get("news", []) |
|
if not news_items: |
|
return ( |
|
f"No results found for query: '{query}'. Try a different search term." |
|
) |
|
|
|
|
|
urls = [n["link"] for n in news_items] |
|
async with httpx.AsyncClient(timeout=20, follow_redirects=True) as client: |
|
tasks = [client.get(u) for u in urls] |
|
responses = await asyncio.gather(*tasks, return_exceptions=True) |
|
|
|
|
|
chunks = [] |
|
successful_extractions = 0 |
|
|
|
for meta, response in zip(news_items, responses): |
|
if isinstance(response, Exception): |
|
continue |
|
|
|
|
|
body = trafilatura.extract( |
|
response.text, include_formatting=False, include_comments=False |
|
) |
|
|
|
if not body: |
|
continue |
|
|
|
successful_extractions += 1 |
|
|
|
|
|
try: |
|
date_iso = dateparser.parse(meta.get("date", ""), fuzzy=True).strftime( |
|
"%Y-%m-%d" |
|
) |
|
except Exception: |
|
date_iso = meta.get("date", "Unknown") |
|
|
|
|
|
chunk = ( |
|
f"## {meta['title']}\n" |
|
f"**Source:** {meta['source']} " |
|
f"**Date:** {date_iso}\n" |
|
f"**URL:** {meta['link']}\n\n" |
|
f"{body.strip()}\n" |
|
) |
|
chunks.append(chunk) |
|
|
|
if not chunks: |
|
return f"Found {len(news_items)} results for '{query}', but couldn't extract readable content from any of them. The websites might be blocking automated access." |
|
|
|
result = "\n---\n".join(chunks) |
|
summary = f"Successfully extracted content from {successful_extractions} out of {len(news_items)} search results for query: '{query}'\n\n---\n\n" |
|
|
|
return summary + result |
|
|
|
except Exception as e: |
|
return f"Error occurred while searching: {str(e)}. Please try again or check your query." |
|
|
|
|
|
|
|
with gr.Blocks(title="Web Search MCP Server") as demo: |
|
gr.Markdown( |
|
""" |
|
# 🔍 Web Search MCP Server |
|
|
|
This MCP server provides web search capabilities to LLMs. It searches for recent news |
|
and extracts the main content from articles. |
|
|
|
**Note:** This interface is primarily designed for MCP tool usage by LLMs, but you can |
|
also test it manually below. |
|
""" |
|
) |
|
|
|
with gr.Row(): |
|
query_input = gr.Textbox( |
|
label="Search Query", |
|
placeholder='e.g. "OpenAI news", "climate change 2024", "AI developments"', |
|
info="Required: Enter your search query", |
|
) |
|
num_results_input = gr.Slider( |
|
minimum=1, |
|
maximum=20, |
|
value=4, |
|
step=1, |
|
label="Number of Results", |
|
info="Optional: How many articles to fetch (default: 4)", |
|
) |
|
|
|
output = gr.Textbox( |
|
label="Extracted Content", |
|
lines=25, |
|
max_lines=50, |
|
info="The extracted article content will appear here", |
|
) |
|
|
|
search_button = gr.Button("Search", variant="primary") |
|
|
|
|
|
gr.Examples( |
|
examples=[ |
|
["OpenAI GPT-5 news", 5], |
|
["climate change 2024", 4], |
|
["artificial intelligence breakthroughs", 8], |
|
["stock market today", 6], |
|
["python programming updates", 4], |
|
], |
|
inputs=[query_input, num_results_input], |
|
outputs=output, |
|
fn=search_web, |
|
cache_examples=False, |
|
) |
|
|
|
search_button.click( |
|
fn=search_web, inputs=[query_input, num_results_input], outputs=output |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
demo.launch(mcp_server=True, show_api=True) |
|
|