Spaces:

awacke1
/

Flamingo-Gradio-ImageDescribe

Runtime error

File size: 1,337 Bytes

4a7b0b2
 
 
 
c91a3e7
8e1683e
4a7b0b2
 
8e1683e
4a7b0b2
8e1683e
c91a3e7
 
4a7b0b2
 
8e1683e
c91a3e7
 
4a7b0b2
c91a3e7
4a7b0b2
 
 
 
 
 
8e1683e
4a7b0b2
 
8e1683e
c91a3e7
 
 
 
a0e9621
8e1683e
4a7b0b2
 
973f818
8e1683e
 
 
c91a3e7
 
 
 
 
 
4a7b0b2
389147e

import os
import gradio as gr
import torch
import PIL
from transformers import AutoProcessor, AutoModelForCausalLM  # Using AutoModel classes

EXAMPLES_DIR = 'examples'
DEFAULT_PROMPT = "<image>"

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load model using AutoModel with trust_remote_code=True
model = AutoModelForCausalLM.from_pretrained('dhansmair/flamingo-mini', trust_remote_code=True)
model.to(device)
model.eval()

# Initialize processor without the `device` argument
processor = AutoProcessor.from_pretrained('dhansmair/flamingo-mini')

# Setup some example images
examples = []
if os.path.isdir(EXAMPLES_DIR):
    for file in os.listdir(EXAMPLES_DIR):
        path = EXAMPLES_DIR + "/" + file
        examples.append([path, DEFAULT_PROMPT])


def predict_caption(image, prompt):
    assert isinstance(prompt, str)
    
    # Process the image using the model
    caption = model.generate(
        processor(images=image, prompt=prompt),  # Pass processed inputs to the model
        max_length=50
    )
  
    if isinstance(caption, list):
        caption = caption[0]
    
    return caption


iface = gr.Interface(
    fn=predict_caption, 
    inputs=[gr.Image(type="pil"), gr.Textbox(value=DEFAULT_PROMPT, label="Prompt")], 
    examples=examples,
    outputs="text"
)

iface.launch(debug=True)