yijun-lee commited on
Commit
e6c3213
·
verified ·
1 Parent(s): b7dab8e

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +504 -0
app.py ADDED
@@ -0,0 +1,504 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ from typing import List, Dict, Optional
4
+ from huggingface_hub import HfApi
5
+ import os
6
+ from dotenv import load_dotenv
7
+ import csv
8
+ from pinecone import Pinecone
9
+ from openai import OpenAI
10
+
11
+ # Load environment variables
12
+ load_dotenv()
13
+
14
+ # Initialize HF API with token if available
15
+ HF_TOKEN = os.getenv("HF_TOKEN")
16
+ api = HfApi(token=HF_TOKEN) if HF_TOKEN else HfApi()
17
+
18
+ def keyword_search_hf_spaces(query: str = "", limit: int = 3) -> Dict:
19
+ """
20
+ Search for MCPs in Hugging Face Spaces.
21
+
22
+ Args:
23
+ query: Search query string
24
+ limit: Maximum number of results to return (default: 3)
25
+
26
+ Returns:
27
+ Dictionary containing search results with MCP information
28
+ """
29
+ try:
30
+ print(f"Debug - Search query: '{query}'") # Debug log
31
+
32
+ # Use list_spaces API with mcp-server filter and sort by likes
33
+ spaces = list(api.list_spaces(
34
+ search=query,
35
+ sort="likes",
36
+ direction=-1, # Descending order
37
+ filter="mcp-server"
38
+ ))
39
+
40
+ results = []
41
+ for space in spaces[:limit]: # Process up to limit matches
42
+ try:
43
+ space_info = {
44
+ "id": space.id,
45
+ "likes": space.likes,
46
+ "trending_score": space.trending_score,
47
+ "source": "huggingface"
48
+ }
49
+ results.append(space_info)
50
+ except Exception as e:
51
+ print(f"Error processing space {space.id}: {str(e)}")
52
+ continue
53
+
54
+ return {
55
+ "results": results,
56
+ "total": len(results)
57
+ }
58
+ except Exception as e:
59
+ print(f"Debug - Critical error in keyword_search_hf_spaces: {str(e)}")
60
+ return {
61
+ "error": str(e),
62
+ "results": [],
63
+ "total": 0
64
+ }
65
+
66
+ def keyword_search_smithery(query: str = "", limit: int = 3) -> Dict:
67
+ """
68
+ Search for MCPs in Smithery Registry.
69
+
70
+ Args:
71
+ query: Search query string
72
+ limit: Maximum number of results to return (default: 3)
73
+
74
+ Returns:
75
+ Dictionary containing search results with MCP information
76
+ """
77
+ try:
78
+ # Get Smithery token from environment
79
+ SMITHERY_TOKEN = os.getenv("SMITHERY_TOKEN")
80
+ if not SMITHERY_TOKEN:
81
+ return {
82
+ "error": "SMITHERY_TOKEN not found",
83
+ "results": [],
84
+ "total": 0
85
+ }
86
+
87
+ # Prepare headers and query parameters
88
+ headers = {
89
+ 'Authorization': f'Bearer {SMITHERY_TOKEN}'
90
+ }
91
+
92
+ # Add filters for deployed and verified servers
93
+ search_query = f"{query} is:deployed"
94
+
95
+ params = {
96
+ 'q': search_query,
97
+ 'page': 1,
98
+ 'pageSize': 100 # Get maximum results
99
+ }
100
+
101
+ # Make API request
102
+ response = requests.get(
103
+ 'https://registry.smithery.ai/servers',
104
+ headers=headers,
105
+ params=params
106
+ )
107
+
108
+ if response.status_code != 200:
109
+ return {
110
+ "error": f"Smithery API error: {response.status_code}",
111
+ "results": [],
112
+ "total": 0
113
+ }
114
+
115
+ # Parse response
116
+ data = response.json()
117
+ results = []
118
+
119
+ # Sort servers by useCount and take top results up to limit
120
+ servers = sorted(data.get('servers', []), key=lambda x: x.get('useCount', 0), reverse=True)[:limit]
121
+
122
+ for server in servers:
123
+ server_info = {
124
+ "id": server.get('qualifiedName'),
125
+ "name": server.get('displayName'),
126
+ "description": server.get('description'),
127
+ "likes": server.get('useCount', 0),
128
+ "source": "smithery"
129
+ }
130
+ results.append(server_info)
131
+
132
+ return {
133
+ "results": results,
134
+ "total": len(results)
135
+ }
136
+
137
+ except Exception as e:
138
+ return {
139
+ "error": str(e),
140
+ "results": [],
141
+ "total": 0
142
+ }
143
+
144
+ def keyword_search(query: str, sources: List[str], limit: int = 3) -> Dict:
145
+ """
146
+ Search for MCPs using keyword matching.
147
+
148
+ Args:
149
+ query: Keyword search query
150
+ sources: List of sources to search from ('huggingface', 'smithery')
151
+ limit: Maximum number of results to return (default: 3)
152
+
153
+ Returns:
154
+ Dictionary containing combined search results
155
+ """
156
+ all_results = []
157
+
158
+ if "huggingface" in sources:
159
+ hf_results = keyword_search_hf_spaces(query, limit)
160
+ all_results.extend(hf_results.get("results", []))
161
+
162
+ if "smithery" in sources:
163
+ smithery_results = keyword_search_smithery(query, limit)
164
+ all_results.extend(smithery_results.get("results", []))
165
+
166
+ return {
167
+ "results": all_results,
168
+ "total": len(all_results),
169
+ "search_type": "keyword"
170
+ }
171
+
172
+ def embedding_search_hf_spaces(query: str = "", limit: int = 3) -> Dict:
173
+ """
174
+ Search for MCPs in Hugging Face Spaces using semantic embedding matching.
175
+
176
+ Args:
177
+ query: Natural language search query
178
+ limit: Maximum number of results to return (default: 3)
179
+
180
+ Returns:
181
+ Dictionary containing search results with MCP information
182
+ """
183
+ try:
184
+ # Initialize Pinecone and OpenAI
185
+ pinecone_api_key = os.getenv('PINECONE_API_KEY')
186
+ openai_api_key = os.getenv('OPENAI_API_KEY')
187
+
188
+ if not pinecone_api_key or not openai_api_key:
189
+ return {
190
+ "error": "API keys not found",
191
+ "results": [],
192
+ "total": 0
193
+ }
194
+
195
+ # Initialize clients
196
+ pc = Pinecone(api_key=pinecone_api_key)
197
+ index = pc.Index("hf-mcp")
198
+ client = OpenAI(api_key=openai_api_key)
199
+
200
+ # Generate embedding using OpenAI
201
+ response = client.embeddings.create(
202
+ input=query,
203
+ model="text-embedding-3-large"
204
+ )
205
+ query_embedding = response.data[0].embedding
206
+
207
+ # Search in Pinecone using the generated embedding
208
+ results = index.query(
209
+ namespace="",
210
+ vector=query_embedding,
211
+ top_k=limit
212
+ )
213
+
214
+ # Process results and get detailed information
215
+ space_results = []
216
+ if not results.matches:
217
+ return {
218
+ "results": [],
219
+ "total": 0
220
+ }
221
+
222
+ for match in results.matches:
223
+ space_id = match.id
224
+ try:
225
+ # Remove 'spaces/' prefix if present
226
+ repo_id = space_id.replace('spaces/', '')
227
+
228
+ # Get space information from HF API
229
+ space = api.space_info(repo_id)
230
+ space_info = {
231
+ "id": space.id,
232
+ "likes": space.likes,
233
+ "trending_score": space.trending_score,
234
+ "source": "huggingface",
235
+ "score": match.score # Add similarity score
236
+ }
237
+ space_results.append(space_info)
238
+ except Exception as e:
239
+ continue
240
+
241
+ return {
242
+ "results": space_results,
243
+ "total": len(space_results)
244
+ }
245
+
246
+ except Exception as e:
247
+ return {
248
+ "error": str(e),
249
+ "results": [],
250
+ "total": 0
251
+ }
252
+
253
+ def embedding_search_smithery(query: str = "", limit: int = 3) -> Dict:
254
+ """
255
+ Search for MCPs in Smithery Registry using semantic embedding matching.
256
+
257
+ Args:
258
+ query: Natural language search query
259
+ limit: Maximum number of results to return (default: 3)
260
+
261
+ Returns:
262
+ Dictionary containing search results with MCP information
263
+ """
264
+ try:
265
+ # Initialize Pinecone and OpenAI
266
+ from pinecone import Pinecone
267
+ from openai import OpenAI
268
+ import os
269
+
270
+ pinecone_api_key = os.getenv('PINECONE_API_KEY')
271
+ openai_api_key = os.getenv('OPENAI_API_KEY')
272
+ smithery_token = os.getenv('SMITHERY_TOKEN')
273
+
274
+ if not pinecone_api_key or not openai_api_key or not smithery_token:
275
+ return {
276
+ "error": "API keys not found",
277
+ "results": [],
278
+ "total": 0
279
+ }
280
+
281
+ # Initialize clients
282
+ pc = Pinecone(api_key=pinecone_api_key)
283
+ index = pc.Index("smithery-mcp")
284
+ client = OpenAI(api_key=openai_api_key)
285
+
286
+ # Generate embedding using OpenAI
287
+ response = client.embeddings.create(
288
+ input=query,
289
+ model="text-embedding-3-large"
290
+ )
291
+ query_embedding = response.data[0].embedding
292
+
293
+ # Search in Pinecone using the generated embedding
294
+ results = index.query(
295
+ namespace="",
296
+ vector=query_embedding,
297
+ top_k=limit
298
+ )
299
+
300
+ # Process results and get detailed information from Smithery
301
+ server_results = []
302
+ if not results.matches:
303
+ return {
304
+ "results": [],
305
+ "total": 0
306
+ }
307
+
308
+ # Prepare headers for Smithery API
309
+ headers = {
310
+ 'Authorization': f'Bearer {smithery_token}'
311
+ }
312
+
313
+ for match in results.matches:
314
+ server_id = match.id
315
+ try:
316
+ # Get server information from Smithery API
317
+ response = requests.get(
318
+ f'https://registry.smithery.ai/servers/{server_id}',
319
+ headers=headers
320
+ )
321
+
322
+ if response.status_code != 200:
323
+ continue
324
+
325
+ server = response.json()
326
+ server_info = {
327
+ "id": server.get('qualifiedName'),
328
+ "name": server.get('displayName'),
329
+ "description": server.get('description'),
330
+ "likes": server.get('useCount', 0),
331
+ "source": "smithery",
332
+ "score": match.score # Add similarity score
333
+ }
334
+ server_results.append(server_info)
335
+ except Exception as e:
336
+ continue
337
+
338
+ return {
339
+ "results": server_results,
340
+ "total": len(server_results)
341
+ }
342
+
343
+ except Exception as e:
344
+ return {
345
+ "error": str(e),
346
+ "results": [],
347
+ "total": 0
348
+ }
349
+
350
+ def embedding_search(query: str, sources: List[str], limit: int = 3) -> Dict:
351
+ """
352
+ Search for MCPs using semantic embedding matching.
353
+
354
+ Args:
355
+ query: Natural language search query
356
+ sources: List of sources to search from ('huggingface', 'smithery')
357
+ limit: Maximum number of results to return (default: 3)
358
+
359
+ Returns:
360
+ Dictionary containing combined search results
361
+ """
362
+ all_results = []
363
+
364
+ if "huggingface" in sources:
365
+ try:
366
+ hf_results = embedding_search_hf_spaces(query, limit)
367
+ all_results.extend(hf_results.get("results", []))
368
+ except Exception as e:
369
+ # Fallback to keyword search if vector search fails
370
+ hf_results = keyword_search_hf_spaces(query, limit)
371
+ all_results.extend(hf_results.get("results", []))
372
+
373
+ if "smithery" in sources:
374
+ try:
375
+ smithery_results = embedding_search_smithery(query, limit)
376
+ all_results.extend(smithery_results.get("results", []))
377
+ except Exception as e:
378
+ # Fallback to keyword search if vector search fails
379
+ smithery_results = keyword_search_smithery(query, limit)
380
+ all_results.extend(smithery_results.get("results", []))
381
+
382
+ return {
383
+ "results": all_results,
384
+ "total": len(all_results),
385
+ "search_type": "embedding"
386
+ }
387
+
388
+ # Create the Gradio interface
389
+ with gr.Blocks(title="🚦 Router MCP", css="""
390
+ #client_radio {
391
+ margin-top: 0 !important;
392
+ padding-top: 0 !important;
393
+ }
394
+ #client_radio .radio-group {
395
+ gap: 0.5rem !important;
396
+ }
397
+ """) as demo:
398
+ gr.Markdown("# 🚦 Router MCP")
399
+ gr.Markdown("### Search MCP compatible spaces using natural language")
400
+
401
+ with gr.Row():
402
+ with gr.Column():
403
+ query_input = gr.Textbox(
404
+ label="Describe the MCP Server you're looking for",
405
+ placeholder="e.g., 'I need an MCP Server that can generate images'"
406
+ )
407
+
408
+ gr.Markdown("### Select sources to search")
409
+ hf_checkbox = gr.Checkbox(label="Hugging Face Spaces", value=True)
410
+ smithery_checkbox = gr.Checkbox(label="Smithery", value=False)
411
+ registry_checkbox = gr.Checkbox(label="Registry (Coming Soon)", value=False, interactive=False)
412
+
413
+ result_limit = gr.Number(
414
+ label="Maximum number of results for each source",
415
+ value=3,
416
+ minimum=1,
417
+ maximum=20,
418
+ step=1
419
+ )
420
+
421
+ gr.Markdown("### Select your MCP Client")
422
+ client_radio = gr.Radio(
423
+ choices=["Cursor", "Windsurf", "Claude Desktop", "VS Code", "Gradio"],
424
+ label="",
425
+ value="Cursor",
426
+ interactive=True,
427
+ elem_id="client_radio"
428
+ )
429
+
430
+ with gr.Row():
431
+ keyword_search_button = gr.Button("Keyword Search")
432
+ embedding_search_button = gr.Button("Semantic Search")
433
+
434
+ with gr.Column():
435
+ results_output = gr.JSON(label="Search Results")
436
+
437
+ # Set up event handlers
438
+ def get_sources():
439
+ return ["huggingface" if hf_checkbox.value else "", "smithery" if smithery_checkbox.value else ""]
440
+
441
+ def handle_keyword_mcp_search(query: str, hf: bool, sm: bool, limit: int) -> Dict:
442
+ """
443
+ Handle keyword-based search for MCP servers across selected sources. If the client (such as Cursor or Claude) encounters a task it cannot handle, it can use MCP to search for other tools that can.
444
+
445
+ Args:
446
+ query (str): The search query string to find matching MCP servers
447
+ hf (bool): Whether to include Hugging Face Spaces in the search (converted to "huggingface" string if True)
448
+ sm (bool): Whether to include Smithery in the search (converted to "smithery" string if True)
449
+ limit (int): Maximum number of results to return per source (default: 3)
450
+
451
+ Returns:
452
+ Dict: A dictionary containing the search results with the following keys:
453
+ - results: List of found MCP servers
454
+ - total: Total number of results
455
+ - search_type: Type of search performed ("keyword")
456
+ """
457
+ return keyword_search(
458
+ query,
459
+ ["huggingface" if hf else "", "smithery" if sm else ""],
460
+ int(limit)
461
+ )
462
+
463
+ def handle_embedding_mcp_search(query: str, hf: bool, sm: bool, limit: int) -> Dict:
464
+ """
465
+ Handle semantic embedding-based search for MCP servers across selected sources. If the client (such as Cursor or Claude) encounters a task it cannot handle, it can use MCP to search for other tools that can.
466
+
467
+ Args:
468
+ query (str): The natural language search query to find semantically similar MCP servers
469
+ hf (bool): Whether to include Hugging Face Spaces in the search (converted to "huggingface" string if True)
470
+ sm (bool): Whether to include Smithery in the search (converted to "smithery" string if True)
471
+ limit (int): Maximum number of results to return per source (default: 3)
472
+
473
+ Returns:
474
+ Dict: A dictionary containing the search results with the following keys:
475
+ - results: List of found MCP servers with similarity scores
476
+ - total: Total number of results
477
+ - search_type: Type of search performed ("embedding")
478
+ """
479
+ return embedding_search(
480
+ query,
481
+ ["huggingface" if hf else "", "smithery" if sm else ""],
482
+ int(limit)
483
+ )
484
+
485
+ keyword_search_button.click(
486
+ fn=handle_keyword_mcp_search,
487
+ inputs=[query_input, hf_checkbox, smithery_checkbox, result_limit],
488
+ outputs=results_output
489
+ )
490
+
491
+ embedding_search_button.click(
492
+ fn=handle_embedding_mcp_search,
493
+ inputs=[query_input, hf_checkbox, smithery_checkbox, result_limit],
494
+ outputs=results_output
495
+ )
496
+
497
+ # query_input.submit(
498
+ # fn=handle_embedding_search,
499
+ # inputs=[query_input, hf_checkbox, smithery_checkbox, result_limit],
500
+ # outputs=results_output
501
+ # )
502
+
503
+ if __name__ == "__main__":
504
+ demo.launch(mcp_server=True)