Spaces:
Running
Running
import gradio as gr | |
import torch | |
import numpy as np | |
from PIL import Image | |
import os | |
import json | |
import base64 | |
from io import BytesIO | |
import requests | |
from typing import Dict, List, Any, Optional | |
from transformers.pipelines import pipeline | |
# MCP imports | |
from modelcontextprotocol.server import Server | |
from modelcontextprotocol.server.gradio import GradioServerTransport | |
from modelcontextprotocol.types import ( | |
CallToolRequestSchema, | |
ErrorCode, | |
ListToolsRequestSchema, | |
McpError, | |
) | |
# Initialize the model | |
model = pipeline("image-feature-extraction", model="nomic-ai/nomic-embed-vision-v1.5", trust_remote_code=True) | |
# Function to generate embeddings from an image | |
def generate_embedding(image): | |
if image is None: | |
return None | |
# Convert to PIL Image if needed | |
if not isinstance(image, Image.Image): | |
image = Image.fromarray(image) | |
try: | |
# Generate embedding using the transformers pipeline | |
result = model(image) | |
# Process the result based on its type | |
embedding_list = None | |
# Handle different possible output types | |
if isinstance(result, torch.Tensor): | |
embedding_list = result.detach().cpu().numpy().flatten().tolist() | |
elif isinstance(result, np.ndarray): | |
embedding_list = result.flatten().tolist() | |
elif isinstance(result, list): | |
# If it's a list of tensors or arrays | |
if result and isinstance(result[0], (torch.Tensor, np.ndarray)): | |
embedding_list = result[0].flatten().tolist() if hasattr(result[0], 'flatten') else result[0] | |
else: | |
embedding_list = result | |
else: | |
# Try to convert to a list as a last resort | |
try: | |
if result is not None: | |
embedding_list = list(result) | |
else: | |
print("Result is None") | |
return None | |
except: | |
print(f"Couldn't convert result of type {type(result)} to list") | |
return None | |
# Ensure we have a valid embedding list | |
if embedding_list is None: | |
return None | |
# Calculate embedding dimension | |
embedding_dim = len(embedding_list) | |
return { | |
"embedding": embedding_list, | |
"dimension": embedding_dim | |
} | |
except Exception as e: | |
print(f"Error generating embedding: {str(e)}") | |
return None | |
# Gradio Interface | |
with gr.Blocks() as demo: | |
gr.Markdown("# Nomic Vision Embedding Model (nomic-ai/nomic-embed-vision-v1.5)") | |
gr.Markdown("Upload an image to generate embeddings using the Nomic Vision model.") | |
with gr.Row(): | |
with gr.Column(): | |
input_image = gr.Image(type="pil", label="Input Image") | |
embed_btn = gr.Button("Generate Embedding") | |
with gr.Column(): | |
embedding_json = gr.JSON(label="Embedding Output") | |
embedding_dim = gr.Textbox(label="Embedding Dimension") | |
def update_embedding(img): | |
result = generate_embedding(img) | |
if result is None: | |
return { | |
embedding_json: None, | |
embedding_dim: "No embedding generated" | |
} | |
return { | |
embedding_json: result, | |
embedding_dim: f"Dimension: {len(result['embedding'])}" | |
} | |
embed_btn.click( | |
fn=update_embedding, | |
inputs=[input_image], | |
outputs=[embedding_json, embedding_dim] | |
) | |
# MCP Server Implementation | |
class NomicEmbeddingServer: | |
def __init__(self): | |
self.server = Server( | |
{ | |
"name": "nomic-embedding-server", | |
"version": "0.1.0", | |
}, | |
{ | |
"capabilities": { | |
"tools": {}, | |
}, | |
} | |
) | |
self.setup_tool_handlers() | |
# Error handling | |
self.server.onerror = lambda error: print(f"[MCP Error] {error}") | |
def setup_tool_handlers(self): | |
self.server.set_request_handler(ListToolsRequestSchema, self.handle_list_tools) | |
self.server.set_request_handler(CallToolRequestSchema, self.handle_call_tool) | |
async def handle_list_tools(self, request): | |
return { | |
"tools": [ | |
{ | |
"name": "embed_image", | |
"description": "Generate embeddings for an image using nomic-ai/nomic-embed-vision-v1.5", | |
"inputSchema": { | |
"type": "object", | |
"properties": { | |
"image_url": { | |
"type": "string", | |
"description": "URL of the image to embed", | |
}, | |
"image_data": { | |
"type": "string", | |
"description": "Base64-encoded image data (alternative to image_url)", | |
}, | |
}, | |
"anyOf": [ | |
{"required": ["image_url"]}, | |
{"required": ["image_data"]}, | |
], | |
}, | |
} | |
] | |
} | |
async def handle_call_tool(self, request): | |
if request.params.name != "embed_image": | |
raise McpError( | |
ErrorCode.MethodNotFound, | |
f"Unknown tool: {request.params.name}" | |
) | |
args = request.params.arguments | |
try: | |
# Handle image from URL | |
if "image_url" in args: | |
response = requests.get(args["image_url"]) | |
image = Image.open(BytesIO(response.content)) | |
# Handle image from base64 data | |
elif "image_data" in args: | |
image_data = base64.b64decode(args["image_data"]) | |
image = Image.open(BytesIO(image_data)) | |
else: | |
raise McpError( | |
ErrorCode.InvalidParams, | |
"Either image_url or image_data must be provided" | |
) | |
# Generate embedding | |
result = generate_embedding(image) | |
return { | |
"content": [ | |
{ | |
"type": "text", | |
"text": json.dumps(result, indent=2), | |
} | |
] | |
} | |
except Exception as e: | |
return { | |
"content": [ | |
{ | |
"type": "text", | |
"text": f"Error generating embedding: {str(e)}", | |
} | |
], | |
"isError": True, | |
} | |
# Initialize and run the MCP server | |
embedding_server = NomicEmbeddingServer() | |
# Connect the MCP server to the Gradio app | |
transport = GradioServerTransport(demo) | |
embedding_server.server.connect(transport) | |
# Launch the Gradio app | |
if __name__ == "__main__": | |
# For Huggingface Spaces, we need to specify the server name and port | |
demo.launch(server_name="0.0.0.0", server_port=7860) |