import torch,sys
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration

# Load the BLIP model and processor
processor = BlipProcessor.from_pretrained("image2text")
model = BlipForConditionalGeneration.from_pretrained('image2text')

# Load the image you want to describe
image_path = sys.argv[1]
raw_image = Image.open(image_path).convert('RGB')

# Process the image and prepare the input for the model
inputs = processor(images=raw_image, return_tensors="pt")

# Generate a description for the image
with torch.no_grad():
    generated_ids = model.generate(**inputs)

# Decode the generated description
description = processor.decode(generated_ids[0], skip_special_tokens=True)

# Print the description
print("Generated Description:\n", description)