Spaces:
Running
Running
File size: 2,269 Bytes
932e485 9f7a699 932e485 3d48a10 7550ca1 93c87da 932e485 88f28f0 932e485 3d48a10 7550ca1 3d48a10 932e485 7550ca1 932e485 7550ca1 932e485 986b927 4bc7466 986b927 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
from flask import Flask, render_template, request, redirect, flash, url_for
import os
from werkzeug.utils import secure_filename
from paddleocr import PaddleOCR
from PIL import Image
import gc
app = Flask(__name__)
app.secret_key = os.environ.get('SECRET_KEY', 'change-this') # Replace in production
UPLOAD_FOLDER = os.path.join('static', 'uploads')
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
# Ensure upload directory exists
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
# Initialize PaddleOCR once at the start (use CPU mode)
ocr = PaddleOCR(
use_angle_cls=False,
use_gpu=False,
lang='en',
det_model_dir='/tmp/ocr_models/det',
rec_model_dir='/tmp/ocr_models/rec',
cls_model_dir='/tmp/ocr_models/cls'
)
# Resize image before processing to reduce memory usage
def resize_image(image_path):
with Image.open(image_path) as img:
img.thumbnail((1024, 1024)) # Resize to max dimension of 1024x1024
img.save(image_path)
@app.route('/', methods=['GET', 'POST'])
def index():
extracted_text = None
image_file = None
if request.method == 'POST':
# Check file in request
if 'image' not in request.files:
flash('No file part in the request.')
return redirect(request.url)
file = request.files['image']
if file.filename == '':
flash('No image selected.')
return redirect(request.url)
# Save uploaded file
filename = secure_filename(file.filename)
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
file.save(file_path)
# Resize the image to optimize memory usage
resize_image(file_path)
# Run PaddleOCR on the resized image (CPU mode)
result = ocr.ocr(file_path, cls=False)
# Collect recognized text lines
lines = []
for res_line in result:
for box, (txt, prob) in res_line:
lines.append(txt)
extracted_text = "\n".join(lines)
image_file = filename
# Clear memory after processing
del result
gc.collect()
return render_template('index.html', extracted_text=extracted_text, image_file=image_file)
if __name__ == '__main__':
app.run(debug=True)
|