import gradio as gr import pandas as pd from datasets import load_dataset import transformers from transformers import AutoProcessor from transformers import AutoModelForCausalLM import torch from torch.utils.data import Dataset, DataLoader from torchvision.transforms import Resize import os from PIL import Image saved_folder_path = "sudeep-007/saved_model" processor = AutoProcessor.from_pretrained(saved_folder_path) model = AutoModelForCausalLM.from_pretrained(saved_folder_path) def generate_caption(image): # Process the image image = Image.fromarray(image) #inputs = tokenizer(image, return_tensors="pt") inputs = processor(images=image, return_tensors="pt")#.to(device) pixel_values = inputs.pixel_values # Generate caption generated_ids = model.generate(pixel_values=pixel_values, max_length=50) generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] return generated_caption interface = gr.Interface( fn=generate_caption, inputs=gr.Image(), outputs=gr.Textbox(), live=True ) interface.queue() interface.launch(share=True)