Spaces:

Ronith55
/

OCR_deepseek-vl2

Running

OCR_deepseek-vl2 / app.py

Update app.py

dd44da8 verified 6 months ago

984 Bytes

	import torch
	from transformers import AutoProcessor, AutoModelForVision2Seq
	from PIL import Image

	# ✅ Define the model name from Hugging Face
	MODEL_NAME = "deepseek-ai/deepseek-vl2-small"

	# ✅ Load model and processor
	processor = AutoProcessor.from_pretrained(MODEL_NAME)
	model = AutoModelForVision2Seq.from_pretrained(MODEL_NAME, torch_dtype=torch.float16)

	# ✅ Test the model with an image
	def predict(image_path):
	image = Image.open(image_path).convert("RGB")

	# Process input
	inputs = processor(images=image, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")

	# Generate output
	output = model.generate(**inputs)

	# Decode response
	generated_text = processor.batch_decode(output, skip_special_tokens=True)[0]

	return generated_text

	# ✅ Example Usage
	if __name__ == "__main__":
	test_image_path = "test.jpg" # Replace with an actual image path
	print("Generated Output:", predict(test_image_path))