File size: 848 Bytes
e8a1990
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# app.py

from PIL import Image
import pytesseract
from docx import Document

# Уверете се, че Tesseract пътят е правилен
pytesseract.pytesseract.tesseract_cmd = r"Tesseract-OCR/tesseract.exe"

class OCRProcessor:
    def __init__(self):
        self.languages = ["eng", "hin", "guj", "san", "tam", "tel"]

    def extract_text(self, image_path, lang="eng"):
        return pytesseract.image_to_string(Image.open(image_path), lang=lang)

    def save_as_docx(self, ocr_text, file_path="OCR_Result.docx"):
        try:
            doc = Document()
            if ocr_text.strip():
                doc.add_paragraph(ocr_text)
                doc.save(file_path)
                return file_path
            else:
                return None
        except Exception as e:
            return str(e)