import requests import pandas as pd from smolagents import MCPClient def extracthackathontools(): """Scrape all MCP tools from hackathon Spaces and return as a pandas DataFrame to allow download as a bonus feature of the MCP Explorer.""" HF_API = "https://huggingface.co/api/spaces?author=Agents-MCP-Hackathon" try: resp = requests.get(HF_API) resp.raise_for_status() except Exception as e: raise RuntimeError(f"Failed to fetch Spaces list: {e}") spaces = resp.json() rows = [] for space in spaces: tags = space.get("tags", []) if "mcp-server" not in tags: continue space_id = space["id"] space_slug = space_id.replace("/", "-").replace("_", "-").lower() mcp_base_url = f"https://{space_slug}.hf.space/gradio_api/mcp/sse" try: print(mcp_base_url) #mcp_status = requests.get(mcp_base_url, timeout=5) #print(mcp_status.status_code) #if mcp_status.status_code != 200: # print(f"[Skip] Space in error: {mcp_base_url}") # continue mcp_client = MCPClient({"url": mcp_base_url,"transport": "sse"}) # Might be deprecated soon but didnt find out the clean way tools = mcp_client.get_tools() print(len(tools)) mcp_client.disconnect() except Exception as e: print(f"[Warning] Could not fetch tools from {mcp_base_url}: {e}") continue # Infos générales du Space author, name = space_id.split("/") hf_url = f"https://huggingface.co/spaces/{space_id}" created_at = space.get("createdAt", "") n_likes = space.get("likes", 0) for tool in tools: input_fields = ", ".join(param for param in tool.inputs) rows.append({ "Gradio MCP endpoint": f"{mcp_base_url}", "Tool name": tool.name, "Tool description": tool.description, "Tool inputs": input_fields, "Space name": name, "HF Space URL": hf_url, "Likes": n_likes, "Created at": created_at, "Tags": ", ".join(tags) }) df = pd.DataFrame(rows) return df df=extracthackathontools() df.to_parquet("./data/hackathon_mcp_tools.parquet", index=False)