Spaces:

gahanmakwana
/

my-ocr-demo

Running

App Files Files Community

gahanmakwana commited on Apr 27

Commit

6c59b39

1 Parent(s): 8a90682

better interface

Browse files

Files changed (4) hide show

app.py +41 -84
requirements.txt +36 -23
static/style.css +24 -0
templates/index.html +38 -29

app.py CHANGED Viewed

@@ -1,93 +1,50 @@
-# from flask import Flask, render_template, request, send_from_directory
-# from paddleocr import PaddleOCR
-# import os
-# app = Flask(__name__)
-# # Upload folder
-# UPLOAD_FOLDER = 'uploads'
-# app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
-# if not os.path.exists(UPLOAD_FOLDER):
-#     os.makedirs(UPLOAD_FOLDER)
-# # Initialize OCR
-# ocr = PaddleOCR(use_angle_cls=True, lang='en')
-# @app.route('/', methods=['GET', 'POST'])
-# def upload_file():
-#     text = None
-#     filename = None
-#     if request.method == 'POST':
-#         file = request.files.get('file')
-#         if not file or file.filename == '':
-#             return render_template('index.html', error="No file selected")
-#         filepath = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)
-#         file.save(filepath)
-#         # Run OCR
-#         result = ocr.ocr(filepath, cls=True)
-#         extracted_text = ""
-#         for line in result:
-#             for word_info in line:
-#                 extracted_text += word_info[1][0] + " "
-#         text = extracted_text
-#         filename = file.filename
-#     return render_template('index.html', text=text, filename=filename)
-# @app.route('/uploads/<filename>')
-# def uploaded_file(filename):
-#     return send_from_directory(app.config['UPLOAD_FOLDER'], filename)
-# if __name__ == '__main__':
-#     port = int(os.environ.get('PORT', 5000))  # <-- IMPORTANT
-#     app.run(host='0.0.0.0', port=port)
-from flask import Flask, render_template, request
 import os
-import time
 app = Flask(__name__)
-# Lightweight OCR loader
-def get_ocr():
-    from paddleocr import PaddleOCR
-    return PaddleOCR(
-        lang='en',
-        use_angle_cls=False,
-        use_gpu=False,
-        det_limit_side_len=480,
-        thread_num=1
-    )
-@app.route('/', methods=['GET', 'POST'])
-def upload_file():
-    if request.method == 'POST':
-        file = request.files.get('file')
-        if not file or file.filename == '':
-            return render_template('index.html', error="Please select a file")
-        try:
-            # Verify file size
-            file.seek(0, os.SEEK_END)
-            if file.tell() > 300000:
-                return render_template('index.html', error="Max 300KB file size")
-            file.seek(0)
-            # Process with OCR
-            ocr = get_ocr()
-            result = ocr.ocr(file.stream, cls=False)
-            text = ' '.join([word[1][0] for line in result[0] for word in line if len(word) >= 2])
-            return render_template('index.html', text=text)
-        except Exception as e:
-            return render_template('index.html', error="Processing error")
-    return render_template('index.html')
 if __name__ == '__main__':
-    port = int(os.environ.get('PORT', 5000))
-    app.run(host='0.0.0.0', port=port)

+from flask import Flask, request, render_template, redirect
+from werkzeug.utils import secure_filename
+from paddleocr import PaddleOCR
 import os
 app = Flask(__name__)
+UPLOAD_FOLDER = 'static/uploads'
+app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+os.makedirs(UPLOAD_FOLDER, exist_ok=True)
+# Allow only certain image file extensions
+ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif'}
+def allowed_file(filename):
+    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+# Initialize PaddleOCR once (loads models)
+ocr_engine = PaddleOCR(use_angle_cls=True, lang='en')
+@app.route('/', methods=['GET', 'POST'])
+def index():
+    extracted_text = ''
+    filename = None
+    if request.method == 'POST':
+        # Check if an image file was submitted
+        if 'image' not in request.files:
+            return redirect(request.url)
+        file = request.files['image']
+        if file and allowed_file(file.filename):
+            # Secure the filename and save to upload folder
+            filename = secure_filename(file.filename)
+            filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
+            file.save(filepath)
+            # Run OCR on the uploaded image
+            result = ocr_engine.ocr(filepath, cls=True)
+            # Un-nest result if PaddleOCR returns a nested list
+            if result and isinstance(result[0], list) and len(result) == 1:
+                result = result[0]
+            # Extract recognized text lines
+            extracted_text_lines = [line[1][0] for line in result]
+            extracted_text = '\n'.join(extracted_text_lines)
+    # Render the template, passing in filename and extracted text
+    return render_template('index.html', filename=filename, extracted_text=extracted_text)
 if __name__ == '__main__':
+    # Bind to PORT for Render (default 10000) on all interfaces
+    port = int(os.environ.get('PORT', 10000))
+    app.run(host='0.0.0.0', port=port)

requirements.txt CHANGED Viewed

@@ -1,23 +1,36 @@
-# flask==3.0.2
-# paddleocr==2.7.0.2
-# paddlepaddle==2.5.2
-# opencv-python-headless==4.8.1.78
-# shapely==2.0.1
-# scikit-image==0.18.3
-# imgaug==0.4.0
-# pyclipper
-# lmdb
-# tqdm
-# numpy==1.24.3
-# visualdl
-# python-Levenshtein
-# werkzeug==2.2.3
-# markupsafe
-# click
-# blinker
-flask==2.2.5
-paddleocr==2.7.0.3
-paddlepaddle==2.5.2  # Updated to compatible version
-opencv-python-headless==4.8.1.78
-numpy==1.26.0
-pyclipper==1.3.0.post6

+# Flask==3.1.0
+# Werkzeug==2.2.3
+# Jinja2==3.0.3
+# MarkupSafe==2.1.1
+# Click==8.1.7
+# Blinker==1.6.2
+# numpy==1.26.4
+# paddlepaddle==3.0.0
+# paddleocr==2.10.0
+# opencv-python-headless==4.11.0.86
+# opencv-contrib-python-headless==4.11.0.86
+# scikit-image==0.20.0
+# shapely==2.0.1
+# pyclipper==1.3.0
+# lmdb==1.4.1
+# tqdm==4.66.1
+# rapidfuzz==2.19.0
+# cython==0.29.36
+# Pillow==10.0.1
+# pyyaml==6.0
+# requests==2.31.0
+# albumentations==1.3.1
+# albucore==0.1.2
+# packaging==23.1
+Flask
+paddleocr>=2.0.1
+paddlepaddle
+opencv-python-headless
+numpy
+click
+jinja2
+markupsafe
+blinker

static/style.css CHANGED Viewed

@@ -79,3 +79,27 @@ button:hover {
     from { opacity: 0; transform: translateY(30px);}
     to { opacity: 1; transform: translateY(0);}
 }

     from { opacity: 0; transform: translateY(30px);}
     to { opacity: 1; transform: translateY(0);}
 }
+#result-img {
+  max-width: 100%;
+  height: auto;
+  margin-top: 10px;
+  border: 1px solid #ddd;
+  border-radius: 8px;
+  padding: 4px;
+}
+#extracted-text {
+  text-align: left;
+  background: #fff;
+  padding: 15px;
+  border-radius: 8px;
+  border: 1px solid #ddd;
+}
+#spinner {
+  /* Center the spinner overlay */
+  position: fixed;
+  top: 50%;
+  left: 50%;
+  transform: translate(-50%, -50%);
+  z-index: 1000;
+}

templates/index.html CHANGED Viewed

@@ -1,39 +1,48 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>OCR Application</title>
-    <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
-    <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@400;600&display=swap" rel="stylesheet">
 </head>
 <body>
-    <div class="container">
-        <h1>Image Text Extractor</h1>
-        <!-- Upload Form -->
-        <form method="POST" enctype="multipart/form-data" class="upload-form">
-            <input type="file" name="file" id="file" required>
-            <button type="submit">Upload Image</button>
-        </form>
-        {% if error %}
-            <div class="error">{{ error }}</div>
-        {% endif %}
-        {% if filename %}
-            <div class="preview">
-                <h2>Uploaded Image</h2>
-                <img src="{{ url_for('uploaded_file', filename=filename) }}" alt="Uploaded Image">
-            </div>
-        {% endif %}
-        {% if text %}
-            <div class="output">
-                <h2>Extracted Text</h2>
-                <p>{{ text }}</p>
-            </div>
-        {% endif %}
     </div>
 </body>
 </html>

 <!DOCTYPE html>
 <html lang="en">
 <head>
+  <meta charset="UTF-8">
+  <title>PaddleOCR Web App</title>
+  <!-- Bootstrap CSS -->
+  <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
+  <!-- Custom styles -->
+  <link href="{{ url_for('static', filename='style.css') }}" rel="stylesheet">
 </head>
 <body>
+  <div class="container text-center">
+    <h1 class="mt-4">OCR with PaddleOCR</h1>
+    <!-- Upload form -->
+    <form id="upload-form" method="post" action="/" enctype="multipart/form-data" class="mt-4">
+      <div class="mb-3">
+        <input type="file" class="form-control" name="image" accept="image/*" required>
+      </div>
+      <button type="submit" class="btn btn-primary">Upload Image</button>
+    </form>
+    <!-- Loading spinner (hidden by default) -->
+    <div id="spinner" class="mt-3 d-none">
+      <div class="spinner-border text-primary" role="status">
+        <span class="visually-hidden">Processing...</span>
+      </div>
+    </div>
+    {% if filename %}
+    <!-- Display the uploaded image and extracted text -->
+    <div class="mt-4">
+      <h4>Uploaded Image:</h4>
+      <img src="{{ url_for('static', filename='uploads/' + filename) }}"
+           alt="Uploaded Image" class="img-fluid" id="result-img">
+      <h4 class="mt-3">Extracted Text:</h4>
+      <pre id="extracted-text">{{ extracted_text }}</pre>
     </div>
+    {% endif %}
+  </div>
+  <!-- Show spinner on form submit -->
+  <script>
+    document.getElementById('upload-form').onsubmit = function() {
+      document.getElementById('spinner').classList.remove('d-none');
+    };
+  </script>
 </body>
 </html>