Mediocreatmybest's picture
Update app.py
44717e7
raw
history blame
1.35 kB
import torch
import gradio as gr
from PIL import Image
import requests
import io
from transformers import pipeline
CAPTION_MODELS = {
'blip-base': 'Salesforce/blip-image-captioning-base',
'blip-large': 'Salesforce/blip-image-captioning-large',
'vit-gpt2-coco-en': 'ydshieh/vit-gpt2-coco-en',
'blip2-2.7b-fp16': 'Mediocreatmybest/blip2-opt-2.7b-fp16-sharded',
}
# Simple caption creation
def caption_image(model_choice, image_input):
if isinstance(image_input, str): # Hopefully a URL
image_path = image_input
else: # Upload a file
image = Image.open(io.BytesIO(image_input))
image.save('temp_image_file.jpg')
image_path = 'temp_image_file.jpg'
captioner = pipeline(task="image-to-text",
model=CAPTION_MODELS[model_choice],
max_new_tokens=30,
device_map="cpu", use_fast=True
)
caption = captioner(image_path)[0]['generated_text']
return str(caption).strip()
def launch(model_choice, input):
return caption_image(model_choice, input)
model_dropdown = gr.Dropdown(choices=list(CAPTION_MODELS.keys()), label='Model Choice')
iface = gr.Interface(launch, inputs=[model_dropdown, gr.Data(type="file", label="Upload Image or Enter URL")], outputs="text")
iface.launch()