import gradio as gr import transformers from transformers import AutoProcessor from transformers import AutoModelForCausalLM import torch from torch.utils.data import Dataset, DataLoader from torchvision.transforms import Resize import os from PIL import Image saved_folder_path = "sudeep-007/saved_model" processor = AutoProcessor.from_pretrained(saved_folder_path) model = AutoModelForCausalLM.from_pretrained(saved_folder_path) def generate_caption(image): # Process the image image = Image.fromarray(image) #inputs = tokenizer(image, return_tensors="pt") inputs = processor(images=image, return_tensors="pt")#.to(device) pixel_values = inputs.pixel_values # Generate caption generated_ids = model.generate(pixel_values=pixel_values, max_length=50) generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] return generated_caption interface = gr.Interface( fn=generate_caption, inputs=gr.Image(), outputs=gr.Textbox(), live=True ) interface.queue() interface.launch(share=True)