from PIL import Image
from io import BytesIO
import requests
from transformers import pipeline

class ImageOCRService:
    def __init__(self):
        self.pipe = pipeline("image-text-to-text", model="ds4sd/SmolDocling-256M-preview")

    def extract_text(self, image_url: str) -> str:
        response = requests.get(image_url)
        image = Image.open(BytesIO(response.content)).convert("RGB")

        result = self.pipe([{
            "role": "user",
            "content": [
                {"type": "image", "image": image},
                {"type": "text", "text": "extract text from image"}
            ]
        }])
        return result[0]['generated_text'] if result else ""