from PIL import Image from io import BytesIO import requests from transformers import pipeline class ImageOCRService: def __init__(self): self.pipe = pipeline("image-text-to-text", model="ds4sd/SmolDocling-256M-preview") def extract_text(self, image_url: str) -> str: response = requests.get(image_url) image = Image.open(BytesIO(response.content)).convert("RGB") result = self.pipe([{ "role": "user", "content": [ {"type": "image", "image": image}, {"type": "text", "text": "extract text from image"} ] }]) return result[0]['generated_text'] if result else ""