import open_clip from PIL import Image import gradio as gr model, _, transform = open_clip.create_model_and_transforms( model_name="coca_ViT-L-14", pretrained="laion2B-s13B-b90k" ) def resize_background(img): width, height = img.size aspect_ratio = width / height if aspect_ratio != 1: new_img = Image.new("RGB", (512, 512), color=(255, 255, 255)) new_img.paste(img, (int((512 - width) / 2), int((512 - height) / 2))) img = new_img img = img.resize((512, 512), Image.LANCZOS) return img # load an image def generate_caption(image): im = resize_background(image) # transform the image and add a batch size dimension im = transform(im).unsqueeze(0) generated = model.generate(im) generated = generated.detach() return(open_clip.decode(generated[0]).split("")[1].split("")[0]) with gr.Blocks() as demo: gr.Markdown("## Captioning with OpenCLIP CoCa") with gr.Row(): with gr.Column(): image = gr.Image(label="Image to Caption", type="pil") out = gr.Textbox(label="Caption") btn = gr.Button("Generate caption") btn.click(fn=generate_caption,inputs=image, outputs=out) demo.launch()