davanstrien's picture
davanstrien HF Staff
Refactor to use Gradio's built-in MCP server support
800b8b5
raw
history blame
17.5 kB
#!/usr/bin/env python3
"""
MCP Server for Hugging Face Dataset and Model Search API using Gradio
"""
import os
from typing import Optional
import gradio as gr
import httpx
# Initialize HTTP client
client = httpx.Client(timeout=30.0)
base_url = os.getenv("HF_SEARCH_API_URL", "http://localhost:8000")
def search_datasets(
query: str,
k: int = 5,
sort_by: str = "similarity",
min_likes: int = 0,
min_downloads: int = 0
) -> str:
"""
Search for datasets based on a text query.
Args:
query: Search query text
k: Number of results to return (1-100)
sort_by: Sort method for results (similarity, likes, downloads, trending)
min_likes: Minimum likes filter
min_downloads: Minimum downloads filter
Returns:
Formatted search results with dataset IDs, summaries, and metadata
"""
params = {
"query": query,
"k": k,
"sort_by": sort_by,
"min_likes": min_likes,
"min_downloads": min_downloads
}
response = client.get(f"{base_url}/search/datasets", params=params)
response.raise_for_status()
data = response.json()
results = data.get("results", [])
if not results:
return "No datasets found."
output = []
for i, result in enumerate(results, 1):
output.append(f"{i}. **{result['dataset_id']}**")
output.append(f" - Summary: {result['summary']}")
output.append(f" - Similarity: {result['similarity']:.3f}")
output.append(f" - Likes: {result['likes']:,} | Downloads: {result['downloads']:,}")
output.append("")
return "\n".join(output)
def find_similar_datasets(
dataset_id: str,
k: int = 5,
sort_by: str = "similarity",
min_likes: int = 0,
min_downloads: int = 0
) -> str:
"""
Find datasets similar to a specified dataset.
Args:
dataset_id: Dataset ID to find similar datasets for
k: Number of results to return (1-100)
sort_by: Sort method for results (similarity, likes, downloads, trending)
min_likes: Minimum likes filter
min_downloads: Minimum downloads filter
Returns:
Formatted list of similar datasets with metadata
"""
params = {
"dataset_id": dataset_id,
"k": k,
"sort_by": sort_by,
"min_likes": min_likes,
"min_downloads": min_downloads
}
response = client.get(f"{base_url}/similarity/datasets", params=params)
response.raise_for_status()
data = response.json()
results = data.get("results", [])
if not results:
return "No similar datasets found."
output = []
for i, result in enumerate(results, 1):
output.append(f"{i}. **{result['dataset_id']}**")
output.append(f" - Summary: {result['summary']}")
output.append(f" - Similarity: {result['similarity']:.3f}")
output.append(f" - Likes: {result['likes']:,} | Downloads: {result['downloads']:,}")
output.append("")
return "\n".join(output)
def search_models(
query: str,
k: int = 5,
sort_by: str = "similarity",
min_likes: int = 0,
min_downloads: int = 0,
min_param_count: int = 0,
max_param_count: Optional[int] = None
) -> str:
"""
Search for models based on a text query with optional parameter count filtering.
Args:
query: Search query text
k: Number of results to return (1-100)
sort_by: Sort method for results (similarity, likes, downloads, trending)
min_likes: Minimum likes filter
min_downloads: Minimum downloads filter
min_param_count: Minimum parameter count (excludes models with unknown params)
max_param_count: Maximum parameter count (None for no limit)
Returns:
Formatted search results with model IDs, summaries, and metadata
"""
params = {
"query": query,
"k": k,
"sort_by": sort_by,
"min_likes": min_likes,
"min_downloads": min_downloads,
"min_param_count": min_param_count
}
if max_param_count is not None:
params["max_param_count"] = max_param_count
response = client.get(f"{base_url}/search/models", params=params)
response.raise_for_status()
data = response.json()
results = data.get("results", [])
if not results:
return "No models found."
output = []
for i, result in enumerate(results, 1):
output.append(f"{i}. **{result['model_id']}**")
output.append(f" - Summary: {result['summary']}")
output.append(f" - Similarity: {result['similarity']:.3f}")
output.append(f" - Likes: {result['likes']:,} | Downloads: {result['downloads']:,}")
if result.get('param_count') is not None and result['param_count'] > 0:
# Format parameter count nicely
param_count = result['param_count']
if param_count >= 1_000_000_000:
param_str = f"{param_count / 1_000_000_000:.1f}B"
elif param_count >= 1_000_000:
param_str = f"{param_count / 1_000_000:.1f}M"
elif param_count >= 1_000:
param_str = f"{param_count / 1_000:.1f}K"
else:
param_str = str(param_count)
output.append(f" - Parameters: {param_str}")
output.append("")
return "\n".join(output)
def find_similar_models(
model_id: str,
k: int = 5,
sort_by: str = "similarity",
min_likes: int = 0,
min_downloads: int = 0,
min_param_count: int = 0,
max_param_count: Optional[int] = None
) -> str:
"""
Find models similar to a specified model.
Args:
model_id: Model ID to find similar models for
k: Number of results to return (1-100)
sort_by: Sort method for results (similarity, likes, downloads, trending)
min_likes: Minimum likes filter
min_downloads: Minimum downloads filter
min_param_count: Minimum parameter count (excludes models with unknown params)
max_param_count: Maximum parameter count (None for no limit)
Returns:
Formatted list of similar models with metadata
"""
params = {
"model_id": model_id,
"k": k,
"sort_by": sort_by,
"min_likes": min_likes,
"min_downloads": min_downloads,
"min_param_count": min_param_count
}
if max_param_count is not None:
params["max_param_count"] = max_param_count
response = client.get(f"{base_url}/similarity/models", params=params)
response.raise_for_status()
data = response.json()
results = data.get("results", [])
if not results:
return "No similar models found."
output = []
for i, result in enumerate(results, 1):
output.append(f"{i}. **{result['model_id']}**")
output.append(f" - Summary: {result['summary']}")
output.append(f" - Similarity: {result['similarity']:.3f}")
output.append(f" - Likes: {result['likes']:,} | Downloads: {result['downloads']:,}")
if result.get('param_count') is not None and result['param_count'] > 0:
# Format parameter count nicely
param_count = result['param_count']
if param_count >= 1_000_000_000:
param_str = f"{param_count / 1_000_000_000:.1f}B"
elif param_count >= 1_000_000:
param_str = f"{param_count / 1_000_000:.1f}M"
elif param_count >= 1_000:
param_str = f"{param_count / 1_000:.1f}K"
else:
param_str = str(param_count)
output.append(f" - Parameters: {param_str}")
output.append("")
return "\n".join(output)
def get_trending_models(
limit: int = 10,
min_likes: int = 0,
min_downloads: int = 0,
min_param_count: int = 0,
max_param_count: Optional[int] = None
) -> str:
"""
Get trending models with their summaries and optional filtering.
Args:
limit: Number of results to return (1-100)
min_likes: Minimum likes filter
min_downloads: Minimum downloads filter
min_param_count: Minimum parameter count (excludes models with unknown params)
max_param_count: Maximum parameter count (None for no limit)
Returns:
Formatted list of trending models with metadata
"""
params = {
"limit": limit,
"min_likes": min_likes,
"min_downloads": min_downloads,
"min_param_count": min_param_count
}
if max_param_count is not None:
params["max_param_count"] = max_param_count
response = client.get(f"{base_url}/trending/models", params=params)
response.raise_for_status()
data = response.json()
results = data.get("results", [])
if not results:
return "No trending models found."
output = []
for i, result in enumerate(results, 1):
output.append(f"{i}. **{result['model_id']}**")
output.append(f" - Summary: {result['summary']}")
output.append(f" - Similarity: {result['similarity']:.3f}")
output.append(f" - Likes: {result['likes']:,} | Downloads: {result['downloads']:,}")
if result.get('param_count') is not None and result['param_count'] > 0:
# Format parameter count nicely
param_count = result['param_count']
if param_count >= 1_000_000_000:
param_str = f"{param_count / 1_000_000_000:.1f}B"
elif param_count >= 1_000_000:
param_str = f"{param_count / 1_000_000:.1f}M"
elif param_count >= 1_000:
param_str = f"{param_count / 1_000:.1f}K"
else:
param_str = str(param_count)
output.append(f" - Parameters: {param_str}")
output.append("")
return "\n".join(output)
def get_trending_datasets(
limit: int = 10,
min_likes: int = 0,
min_downloads: int = 0
) -> str:
"""
Get trending datasets with their summaries.
Args:
limit: Number of results to return (1-100)
min_likes: Minimum likes filter
min_downloads: Minimum downloads filter
Returns:
Formatted list of trending datasets with metadata
"""
params = {
"limit": limit,
"min_likes": min_likes,
"min_downloads": min_downloads
}
response = client.get(f"{base_url}/trending/datasets", params=params)
response.raise_for_status()
data = response.json()
results = data.get("results", [])
if not results:
return "No trending datasets found."
output = []
for i, result in enumerate(results, 1):
output.append(f"{i}. **{result['dataset_id']}**")
output.append(f" - Summary: {result['summary']}")
output.append(f" - Similarity: {result['similarity']:.3f}")
output.append(f" - Likes: {result['likes']:,} | Downloads: {result['downloads']:,}")
output.append("")
return "\n".join(output)
def download_model_card(model_id: str) -> str:
"""
Download the README card for a HuggingFace model.
Args:
model_id: The model ID (e.g., 'username/model-name')
Returns:
The content of the model card (README.md)
"""
url = f"https://huggingface.co/{model_id}/raw/main/README.md"
response = client.get(url)
response.raise_for_status()
return response.text
def download_dataset_card(dataset_id: str) -> str:
"""
Download the README card for a HuggingFace dataset.
Args:
dataset_id: The dataset ID (e.g., 'username/dataset-name')
Returns:
The content of the dataset card (README.md)
"""
url = f"https://huggingface.co/datasets/{dataset_id}/raw/main/README.md"
response = client.get(url)
response.raise_for_status()
return response.text
# Create Gradio interface
with gr.Blocks(title="HuggingFace Search MCP Server") as demo:
gr.Markdown("# HuggingFace Search MCP Server")
gr.Markdown("This server provides semantic search capabilities for HuggingFace models and datasets.")
with gr.Tab("Search Datasets"):
gr.Interface(
fn=search_datasets,
inputs=[
gr.Textbox(label="Query", placeholder="Enter search query"),
gr.Slider(1, 100, value=5, step=1, label="Number of results"),
gr.Dropdown(["similarity", "likes", "downloads", "trending"], value="similarity", label="Sort by"),
gr.Number(value=0, label="Minimum likes"),
gr.Number(value=0, label="Minimum downloads")
],
outputs=gr.Markdown(label="Results"),
title="Search Datasets",
description="Search for datasets based on a text query"
)
with gr.Tab("Find Similar Datasets"):
gr.Interface(
fn=find_similar_datasets,
inputs=[
gr.Textbox(label="Dataset ID", placeholder="username/dataset-name"),
gr.Slider(1, 100, value=5, step=1, label="Number of results"),
gr.Dropdown(["similarity", "likes", "downloads", "trending"], value="similarity", label="Sort by"),
gr.Number(value=0, label="Minimum likes"),
gr.Number(value=0, label="Minimum downloads")
],
outputs=gr.Markdown(label="Results"),
title="Find Similar Datasets",
description="Find datasets similar to a specified dataset"
)
with gr.Tab("Search Models"):
gr.Interface(
fn=search_models,
inputs=[
gr.Textbox(label="Query", placeholder="Enter search query"),
gr.Slider(1, 100, value=5, step=1, label="Number of results"),
gr.Dropdown(["similarity", "likes", "downloads", "trending"], value="similarity", label="Sort by"),
gr.Number(value=0, label="Minimum likes"),
gr.Number(value=0, label="Minimum downloads"),
gr.Number(value=0, label="Minimum parameter count"),
gr.Number(value=None, label="Maximum parameter count (leave empty for no limit)")
],
outputs=gr.Markdown(label="Results"),
title="Search Models",
description="Search for models based on a text query with optional parameter count filtering"
)
with gr.Tab("Find Similar Models"):
gr.Interface(
fn=find_similar_models,
inputs=[
gr.Textbox(label="Model ID", placeholder="username/model-name"),
gr.Slider(1, 100, value=5, step=1, label="Number of results"),
gr.Dropdown(["similarity", "likes", "downloads", "trending"], value="similarity", label="Sort by"),
gr.Number(value=0, label="Minimum likes"),
gr.Number(value=0, label="Minimum downloads"),
gr.Number(value=0, label="Minimum parameter count"),
gr.Number(value=None, label="Maximum parameter count (leave empty for no limit)")
],
outputs=gr.Markdown(label="Results"),
title="Find Similar Models",
description="Find models similar to a specified model"
)
with gr.Tab("Trending Models"):
gr.Interface(
fn=get_trending_models,
inputs=[
gr.Slider(1, 100, value=10, step=1, label="Number of results"),
gr.Number(value=0, label="Minimum likes"),
gr.Number(value=0, label="Minimum downloads"),
gr.Number(value=0, label="Minimum parameter count"),
gr.Number(value=None, label="Maximum parameter count (leave empty for no limit)")
],
outputs=gr.Markdown(label="Results"),
title="Get Trending Models",
description="Get trending models with their summaries and optional filtering"
)
with gr.Tab("Trending Datasets"):
gr.Interface(
fn=get_trending_datasets,
inputs=[
gr.Slider(1, 100, value=10, step=1, label="Number of results"),
gr.Number(value=0, label="Minimum likes"),
gr.Number(value=0, label="Minimum downloads")
],
outputs=gr.Markdown(label="Results"),
title="Get Trending Datasets",
description="Get trending datasets with their summaries"
)
with gr.Tab("Download Model Card"):
gr.Interface(
fn=download_model_card,
inputs=gr.Textbox(label="Model ID", placeholder="username/model-name"),
outputs=gr.Textbox(label="Model Card Content", lines=20),
title="Download Model Card",
description="Download the README card for a HuggingFace model"
)
with gr.Tab("Download Dataset Card"):
gr.Interface(
fn=download_dataset_card,
inputs=gr.Textbox(label="Dataset ID", placeholder="username/dataset-name"),
outputs=gr.Textbox(label="Dataset Card Content", lines=20),
title="Download Dataset Card",
description="Download the README card for a HuggingFace dataset"
)
if __name__ == "__main__":
# Launch with MCP server enabled
demo.launch(mcp_server=True)