Spaces:

sundeveloper
/

OCR-img2txt

Sleeping

File size: 848 Bytes

e8a1990

# app.py

from PIL import Image
import pytesseract
from docx import Document

# Уверете се, че Tesseract пътят е правилен
pytesseract.pytesseract.tesseract_cmd = r"Tesseract-OCR/tesseract.exe"

class OCRProcessor:
    def __init__(self):
        self.languages = ["eng", "hin", "guj", "san", "tam", "tel"]

    def extract_text(self, image_path, lang="eng"):
        return pytesseract.image_to_string(Image.open(image_path), lang=lang)

    def save_as_docx(self, ocr_text, file_path="OCR_Result.docx"):
        try:
            doc = Document()
            if ocr_text.strip():
                doc.add_paragraph(ocr_text)
                doc.save(file_path)
                return file_path
            else:
                return None
        except Exception as e:
            return str(e)