File size: 2,269 Bytes
932e485
9f7a699
932e485
3d48a10
7550ca1
 
93c87da
 
932e485
 
 
88f28f0
932e485
 
 
3d48a10
7550ca1
 
 
 
 
 
 
 
 
 
 
 
 
 
3d48a10
932e485
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7550ca1
 
 
 
932e485
 
 
 
 
 
 
 
 
7550ca1
 
 
 
932e485
986b927
4bc7466
986b927
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from flask import Flask, render_template, request, redirect, flash, url_for
import os
from werkzeug.utils import secure_filename
from paddleocr import PaddleOCR
from PIL import Image
import gc

app = Flask(__name__)
app.secret_key = os.environ.get('SECRET_KEY', 'change-this')  # Replace in production
UPLOAD_FOLDER = os.path.join('static', 'uploads')
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER

# Ensure upload directory exists
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)

# Initialize PaddleOCR once at the start (use CPU mode)
ocr = PaddleOCR(
    use_angle_cls=False,
    use_gpu=False,
    lang='en',
    det_model_dir='/tmp/ocr_models/det',
    rec_model_dir='/tmp/ocr_models/rec',
    cls_model_dir='/tmp/ocr_models/cls'
)

# Resize image before processing to reduce memory usage
def resize_image(image_path):
    with Image.open(image_path) as img:
        img.thumbnail((1024, 1024))  # Resize to max dimension of 1024x1024
        img.save(image_path)

@app.route('/', methods=['GET', 'POST'])
def index():
    extracted_text = None
    image_file = None

    if request.method == 'POST':
        # Check file in request
        if 'image' not in request.files:
            flash('No file part in the request.')
            return redirect(request.url)
        file = request.files['image']
        if file.filename == '':
            flash('No image selected.')
            return redirect(request.url)

        # Save uploaded file
        filename = secure_filename(file.filename)
        file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
        file.save(file_path)

        # Resize the image to optimize memory usage
        resize_image(file_path)

        # Run PaddleOCR on the resized image (CPU mode)
        result = ocr.ocr(file_path, cls=False)
        # Collect recognized text lines
        lines = []
        for res_line in result:
            for box, (txt, prob) in res_line:
                lines.append(txt)
        extracted_text = "\n".join(lines)
        image_file = filename

        # Clear memory after processing
        del result
        gc.collect()

    return render_template('index.html', extracted_text=extracted_text, image_file=image_file)

if __name__ == '__main__':
    app.run(debug=True)