import gradio as gr import numpy as np from PIL import Image from sentence_transformers import SentenceTransformer model = SentenceTransformer('clip-ViT-B-32', cache_folder='/app/cache') def image_to_embedding(img: np.ndarray = None, txt: str = None) -> np.ndarray: if img is None and not txt: return [] if img is not None: embedding = model.encode(sentences=[Image.fromarray(img)], batch_size=128) else: embedding = model.encode(sentences=[txt], batch_size=128) return embedding demo = gr.Interface(fn=image_to_embedding, inputs=["image", "textbox"], outputs="textbox", cache_examples=True) demo.launch(server_name="0.0.0.0")