File size: 1,065 Bytes
c63408e
4a51cdd
c63408e
 
 
b9bb3e9
 
 
c63408e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import gradio as gr

import transformers
from transformers import AutoProcessor
from transformers import AutoModelForCausalLM
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import Resize
import os
from PIL import Image
saved_folder_path = "sudeep-007/saved_model"
processor = AutoProcessor.from_pretrained(saved_folder_path)
model = AutoModelForCausalLM.from_pretrained(saved_folder_path)
def generate_caption(image):
    # Process the image
    image = Image.fromarray(image)
    #inputs = tokenizer(image, return_tensors="pt")
    inputs = processor(images=image, return_tensors="pt")#.to(device)
    pixel_values = inputs.pixel_values

    # Generate caption
    generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
    generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

    return generated_caption
interface = gr.Interface(
    fn=generate_caption,
    inputs=gr.Image(),
    outputs=gr.Textbox(),
    live=True
)
interface.queue()
interface.launch(share=True)