File size: 867 Bytes
1301c16 166c58e 32fee45 75c5fec 32fee45 75c5fec e822296 32fee45 75c5fec 32fee45 75c5fec 32fee45 75c5fec 32fee45 73f9adb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
---
license: mit
library_name: transformers
pipeline_tag: image-to-text
---
# Load model
from transformers import AutoProcessor, BlipForConditionalGeneration
processor = AutoProcessor.from_pretrained("trunks/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("trunks/blip-image-captioning-base")
# prepare image for model
from PIL import Image
from IPython.display import display
img1 = Image.open("imagepath/img.jpeg")
width, height = img1.size
img1_resized = img1.resize((int(0.3 * width), int(0.3 * height))
display(img1_resized)
# testing image
inputs = processor(images=img1, return_tensors="pt")
pixel_values = inputs.pixel_values
generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
print(generated_caption)
|