from PIL import Image from transformers import BlipProcessor , BlipForConditionalGeneration import torch processor = BlipProcessor.from_pretrained("src/models/Caption") model = BlipForConditionalGeneration.from_pretrained("src/models/Caption") def generateCaption(image_path): image = Image.open(image_path).convert("RGB") inputs = processor(images = image , return_tensors="pt") output = model.generate(**inputs) caption = processor.decode(output[0], skip_special_tokens = True) return caption