Spaces:
Running
Running
import torch | |
from transformers import AutoProcessor, AutoModelForVision2Seq | |
from PIL import Image | |
# β Define the model name from Hugging Face | |
MODEL_NAME = "deepseek-ai/deepseek-vl2-small" | |
# β Load model and processor | |
processor = AutoProcessor.from_pretrained(MODEL_NAME) | |
model = AutoModelForVision2Seq.from_pretrained(MODEL_NAME, torch_dtype=torch.float16) | |
# β Test the model with an image | |
def predict(image_path): | |
image = Image.open(image_path).convert("RGB") | |
# Process input | |
inputs = processor(images=image, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu") | |
# Generate output | |
output = model.generate(**inputs) | |
# Decode response | |
generated_text = processor.batch_decode(output, skip_special_tokens=True)[0] | |
return generated_text | |
# β Example Usage | |
if __name__ == "__main__": | |
test_image_path = "test.jpg" # Replace with an actual image path | |
print("Generated Output:", predict(test_image_path)) | |