Spaces:

Mediocreatmybest
/

PipelineImageCaption

Runtime error

File size: 1,345 Bytes

bca9f16
6b8d35c
44717e7
 
 
bca9f16
6b8d35c
bca9f16
 
 
 
52fd1d4
bca9f16
6b8d35c
bca9f16
44717e7
 
 
 
 
 
 
 
52fd1d4
 
 
 
 
bca9f16
 
6b8d35c
52fd1d4
 
6b8d35c
0e10238
44717e7

import torch
import gradio as gr
from PIL import Image
import requests
import io
from transformers import pipeline

CAPTION_MODELS = {
    'blip-base': 'Salesforce/blip-image-captioning-base',
    'blip-large': 'Salesforce/blip-image-captioning-large',
    'vit-gpt2-coco-en': 'ydshieh/vit-gpt2-coco-en',
    'blip2-2.7b-fp16': 'Mediocreatmybest/blip2-opt-2.7b-fp16-sharded',
}

# Simple caption creation
def caption_image(model_choice, image_input):
    if isinstance(image_input, str):  # Hopefully a URL
        image_path = image_input
    else:  # Upload a file
        image = Image.open(io.BytesIO(image_input))
        image.save('temp_image_file.jpg')
        image_path = 'temp_image_file.jpg'
    
    captioner = pipeline(task="image-to-text",
                         model=CAPTION_MODELS[model_choice],
                         max_new_tokens=30,
                         device_map="cpu", use_fast=True
                         )
    caption = captioner(image_path)[0]['generated_text']
    return str(caption).strip()

def launch(model_choice, input):
    return caption_image(model_choice, input)

model_dropdown = gr.Dropdown(choices=list(CAPTION_MODELS.keys()), label='Model Choice')
iface = gr.Interface(launch, inputs=[model_dropdown, gr.Data(type="file", label="Upload Image or Enter URL")], outputs="text")
iface.launch()