""" CLIP Image Embedding Generator A simple Gradio-based application for generating CLIP embeddings from uploaded images. Uses OpenAI's CLIP model with proper preprocessing. """ import gradio as gr from transformers import CLIPProcessor, CLIPModel from PIL import Image import torch import numpy as np import spaces # Load model/processor model: CLIPModel = CLIPModel.from_pretrained("openai/clip-vit-large-patch14") processor: CLIPProcessor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14") model.eval() @spaces.GPU def get_embedding(image: Image.Image) -> str: """ Generate CLIP embedding for an image. Args: image (Image.Image): PIL Image object to process Returns: str: The full embedding array as a string """ device: str = "cuda" if torch.cuda.is_available() else "cpu" # Use CLIP's built-in preprocessing inputs = processor(images=image, return_tensors="pt").to(device) model_device = model.to(device) with torch.no_grad(): emb: torch.Tensor = model_device.get_image_features(**inputs) # L2 normalize the embeddings emb = emb / emb.norm(p=2, dim=-1, keepdim=True) # Convert to numpy and return as string emb_numpy = emb.cpu().numpy().squeeze() return str(emb_numpy.tolist()) # Create Gradio interface demo: gr.Interface = gr.Interface( fn=get_embedding, inputs=gr.Image(type="pil", label="Upload Image"), outputs=gr.Textbox(label="Embedding", lines=20, max_lines=30), allow_flagging="never", title="CLIP Image Embedding Generator", description="Upload an image to generate its CLIP embedding vector.", theme=gr.themes.Soft() ) if __name__ == "__main__": demo.launch(mcp_server=True)