Spaces:

gahanmakwana
/

my-ocr-demo

Sleeping

App Files Files Community

gahanmakwana commited on Apr 27

Commit

9f7a699

1 Parent(s): cc4db8d

new files

Browse files

Files changed (5) hide show

.gitignore +1 -180
app.py +30 -29
requirements.txt +25 -34
static/style.css +59 -2
templates/index.html +23 -21

.gitignore CHANGED Viewed

@@ -1,187 +1,8 @@
-<<<<<<< HEAD
-venv/
 __pycache__/
 *.pyc
 *.pyo
 *.pyd
-*.db
-*.sqlite3
 uploads/
 .env
-.DS_Store
-=======
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-*$py.class
-# C extensions
-*.so
-# Distribution / packaging
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-*.py,cover
-.hypothesis/
-.pytest_cache/
-cover/
-# Translations
-*.mo
-*.pot
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-# Flask stuff:
-instance/
-.webassets-cache
-# Scrapy stuff:
-.scrapy
-# Sphinx documentation
-docs/_build/
-# PyBuilder
-.pybuilder/
-target/
-# Jupyter Notebook
-.ipynb_checkpoints
-# IPython
-profile_default/
-ipython_config.py
-# pyenv
-#   For a library or package, you might want to ignore these files since the code is
-#   intended to run in multiple environments; otherwise, check them in:
-# .python-version
-# pipenv
-#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-#   However, in case of collaboration, if having platform-specific dependencies or dependencies
-#   having no cross-platform support, pipenv may install dependencies that don't work, or not
-#   install all needed dependencies.
-#Pipfile.lock
-# UV
-#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
-#   This is especially recommended for binary packages to ensure reproducibility, and is more
-#   commonly ignored for libraries.
-#uv.lock
-# poetry
-#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
-#   This is especially recommended for binary packages to ensure reproducibility, and is more
-#   commonly ignored for libraries.
-#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
-#poetry.lock
-# pdm
-#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
-#pdm.lock
-#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
-#   in version control.
-#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
-.pdm.toml
-.pdm-python
-.pdm-build/
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
-__pypackages__/
-# Celery stuff
-celerybeat-schedule
-celerybeat.pid
-# SageMath parsed files
-*.sage.py
-# Environments
-.env
-.venv
-env/
 venv/
-ENV/
-env.bak/
-venv.bak/
-# Spyder project settings
-.spyderproject
-.spyproject
-# Rope project settings
-.ropeproject
-# mkdocs documentation
-/site
-# mypy
-.mypy_cache/
-.dmypy.json
-dmypy.json
-# Pyre type checker
-.pyre/
-# pytype static type analyzer
-.pytype/
-# Cython debug symbols
-cython_debug/
-# PyCharm
-#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
-#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
-#  and can be added to the global gitignore or merged into this file.  For a more nuclear
-#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
-# Ruff stuff:
-.ruff_cache/
-# PyPI configuration file
-.pypirc
->>>>>>> 3b30c382f1257df381611f68a53c832ba232d59a

 __pycache__/
 *.pyc
 *.pyo
 *.pyd
 uploads/
 .env
 venv/
+.DS_Store

app.py CHANGED Viewed

@@ -1,50 +1,51 @@
-from flask import Flask, request, render_template, redirect
 from werkzeug.utils import secure_filename
 from paddleocr import PaddleOCR
-import os
 app = Flask(__name__)
-UPLOAD_FOLDER = 'static/uploads'
 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 os.makedirs(UPLOAD_FOLDER, exist_ok=True)
-# Allow only certain image file extensions
 ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif'}
 def allowed_file(filename):
     return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
-# Initialize PaddleOCR once (loads models)
-ocr_engine = PaddleOCR(use_angle_cls=True, lang='en')
 @app.route('/', methods=['GET', 'POST'])
 def index():
-    extracted_text = ''
     filename = None
     if request.method == 'POST':
-        # Check if an image file was submitted
-        if 'image' not in request.files:
-            return redirect(request.url)
-        file = request.files['image']
-        if file and allowed_file(file.filename):
-            # Secure the filename and save to upload folder
-            filename = secure_filename(file.filename)
-            filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
-            file.save(filepath)
-            # Run OCR on the uploaded image
-            result = ocr_engine.ocr(filepath, cls=True)
-            # Un-nest result if PaddleOCR returns a nested list
-            if result and isinstance(result[0], list) and len(result) == 1:
-                result = result[0]
-            # Extract recognized text lines
-            extracted_text_lines = [line[1][0] for line in result]
-            extracted_text = '\n'.join(extracted_text_lines)
-    # Render the template, passing in filename and extracted text
     return render_template('index.html', filename=filename, extracted_text=extracted_text)
 if __name__ == '__main__':
-    # Bind to PORT for Render (default 10000) on all interfaces
-    port = int(os.environ.get('PORT', 10000))
     app.run(host='0.0.0.0', port=port)

+import os
+from flask import Flask, render_template, request, send_from_directory
 from werkzeug.utils import secure_filename
 from paddleocr import PaddleOCR
 app = Flask(__name__)
+# Configuration
+UPLOAD_FOLDER = 'uploads'
 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif'}
 def allowed_file(filename):
     return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+# Initialize OCR model
+ocr = PaddleOCR(use_angle_cls=True, lang='en')
 @app.route('/', methods=['GET', 'POST'])
 def index():
     filename = None
+    extracted_text = None
     if request.method == 'POST':
+        file = request.files.get('image')
+        if not file or file.filename == '' or not allowed_file(file.filename):
+            return render_template('index.html', error="Please upload a valid image file (png/jpg/jpeg/gif).")
+        # Save the uploaded file
+        filename = secure_filename(file.filename)
+        filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
+        file.save(filepath)
+        # Run OCR
+        result = ocr.ocr(filepath, cls=True)
+        # Flatten if nested
+        if isinstance(result, list) and len(result) == 1 and isinstance(result[0], list):
+            result = result[0]
+        lines = [line[1][0] for line in result]
+        extracted_text = "\n".join(lines) if lines else "No text detected."
     return render_template('index.html', filename=filename, extracted_text=extracted_text)
+@app.route('/uploads/<filename>')
+def uploaded_file(filename):
+    return send_from_directory(app.config['UPLOAD_FOLDER'], filename)
 if __name__ == '__main__':
+    port = int(os.environ.get('PORT', 5000))
     app.run(host='0.0.0.0', port=port)

requirements.txt CHANGED Viewed

@@ -1,37 +1,28 @@
-# Flask==3.1.0
-# Werkzeug==2.2.3
-# Jinja2==3.0.3
-# MarkupSafe==2.1.1
-# Click==8.1.7
-# Blinker==1.6.2
-# numpy==1.26.4
-# paddlepaddle==3.0.0
-# paddleocr==2.10.0
-# opencv-python-headless==4.11.0.86
-# opencv-contrib-python-headless==4.11.0.86
-# scikit-image==0.20.0
-# shapely==2.0.1
-# pyclipper==1.3.0
-# lmdb==1.4.1
-# tqdm==4.66.1
-# rapidfuzz==2.19.0
-# cython==0.29.36
-# Pillow==10.0.1
-# pyyaml==6.0
-# requests==2.31.0
-# albumentations==1.3.1
-# albucore==0.1.2
-# packaging==23.1
-Flask
-paddleocr>=2.0.1
-paddlepaddle
-opencv-python-headless
-numpy
-click
-jinja2
-markupsafe
-blinker
-gunicorn

+Flask==3.1.0
+Werkzeug==2.2.3
+Jinja2==3.0.3
+MarkupSafe==2.1.1
+Click==8.1.7
+Blinker==1.6.2
+numpy==1.24.3
+paddlepaddle==3.0.0
+paddleocr==2.10.0
+opencv-python-headless==4.11.0.86
+scikit-image==0.20.0
+shapely==2.0.1
+pyclipper==1.3.0
+lmdb==1.4.1
+tqdm==4.66.1
+rapidfuzz==2.19.0
+cython==0.29.36
+Pillow==10.0.1
+pyyaml==6.0
+requests==2.31.0
+albumentations==1.3.1
+albucore==0.1.2
+packaging==23.1
+gunicorn==20.1.0

static/style.css CHANGED Viewed

@@ -1,4 +1,4 @@
-body {
     font-family: 'Poppins', sans-serif;
     background: linear-gradient(135deg, #74ebd5, #ACB6E5);
     margin: 0;
@@ -96,10 +96,67 @@ button:hover {
   border: 1px solid #ddd;
 }
 #spinner {
-  /* Center the spinner overlay */
   position: fixed;
   top: 50%;
   left: 50%;
   transform: translate(-50%, -50%);
   z-index: 1000;
 }

+/* body {
     font-family: 'Poppins', sans-serif;
     background: linear-gradient(135deg, #74ebd5, #ACB6E5);
     margin: 0;
   border: 1px solid #ddd;
 }
 #spinner {
   position: fixed;
   top: 50%;
   left: 50%;
   transform: translate(-50%, -50%);
   z-index: 1000;
+} */
+body {
+  font-family: 'Poppins', sans-serif;
+  background: linear-gradient(135deg, #74ebd5, #ACB6E5);
+  min-height: 100vh;
+  display: flex;
+  justify-content: center;
+  align-items: center;
+  margin: 0;
+}
+.container {
+  background: #fff;
+  padding: 30px 40px;
+  border-radius: 20px;
+  box-shadow: 0 10px 30px rgba(0,0,0,0.2);
+  max-width: 600px;
+  width: 100%;
+  animation: fadeIn 1s ease-in;
+}
+h1 { margin-bottom: 20px; color: #333; font-weight: 600; }
+.upload-form {
+  display: flex;
+  flex-direction: column;
+  gap: 15px;
+}
+#spinner {
+  position: fixed;
+  top: 50%; left: 50%;
+  transform: translate(-50%, -50%);
+  z-index: 1000;
 }
+.preview img, #result-img {
+  max-width: 100%;
+  border-radius: 10px;
+  border: 1px solid #ddd;
+  margin-top: 10px;
+}
+.output, #extracted-text {
+  background: #f9f9f9;
+  padding: 15px;
+  border-radius: 10px;
+  text-align: left;
+  white-space: pre-wrap;
+  margin-top: 10px;
+}
+@keyframes fadeIn {
+  from { opacity: 0; transform: translateY(20px); }
+  to { opacity: 1; transform: translateY(0); }
+}

templates/index.html CHANGED Viewed

@@ -2,45 +2,47 @@
 <html lang="en">
 <head>
   <meta charset="UTF-8">
-  <title>PaddleOCR Web App</title>
-  <!-- Bootstrap CSS -->
   <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
-  <!-- Custom styles -->
   <link href="{{ url_for('static', filename='style.css') }}" rel="stylesheet">
 </head>
 <body>
   <div class="container text-center">
     <h1 class="mt-4">OCR with PaddleOCR</h1>
-    <!-- Upload form -->
-    <form id="upload-form" method="post" action="/" enctype="multipart/form-data" class="mt-4">
-      <div class="mb-3">
-        <input type="file" class="form-control" name="image" accept="image/*" required>
-      </div>
-      <button type="submit" class="btn btn-primary">Upload Image</button>
     </form>
-    <!-- Loading spinner (hidden by default) -->
-    <div id="spinner" class="mt-3 d-none">
       <div class="spinner-border text-primary" role="status">
         <span class="visually-hidden">Processing...</span>
       </div>
     </div>
     {% if filename %}
-    <!-- Display the uploaded image and extracted text -->
-    <div class="mt-4">
-      <h4>Uploaded Image:</h4>
-      <img src="{{ url_for('static', filename='uploads/' + filename) }}"
-           alt="Uploaded Image" class="img-fluid" id="result-img">
-      <h4 class="mt-3">Extracted Text:</h4>
-      <pre id="extracted-text">{{ extracted_text }}</pre>
-    </div>
     {% endif %}
   </div>
-  <!-- Show spinner on form submit -->
   <script>
-    document.getElementById('upload-form').onsubmit = function() {
       document.getElementById('spinner').classList.remove('d-none');
     };
   </script>

 <html lang="en">
 <head>
   <meta charset="UTF-8">
+  <title>OCR Application</title>
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <!-- Bootstrap 5 -->
   <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
+  <!-- Google Font -->
+  <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@400;600&display=swap" rel="stylesheet">
+  <!-- Custom Styles -->
   <link href="{{ url_for('static', filename='style.css') }}" rel="stylesheet">
 </head>
 <body>
   <div class="container text-center">
     <h1 class="mt-4">OCR with PaddleOCR</h1>
+    <form id="upload-form" method="post" enctype="multipart/form-data" class="upload-form mx-auto">
+      <input type="file" name="image" accept="image/*" class="form-control mb-3" required>
+      <button type="submit" class="btn btn-primary">Upload & Extract Text</button>
     </form>
+    <div id="spinner" class="d-none">
       <div class="spinner-border text-primary" role="status">
         <span class="visually-hidden">Processing...</span>
       </div>
     </div>
     {% if filename %}
+      <div class="preview mt-4">
+        <h4>Uploaded Image</h4>
+        <img id="result-img" src="{{ url_for('uploaded_file', filename=filename) }}" alt="Uploaded Image">
+      </div>
+    {% endif %}
+    {% if extracted_text %}
+      <div class="output mt-4">
+        <h4>Extracted Text</h4>
+        <pre id="extracted-text">{{ extracted_text }}</pre>
+      </div>
     {% endif %}
   </div>
   <script>
+    document.getElementById('upload-form').onsubmit = () => {
       document.getElementById('spinner').classList.remove('d-none');
     };
   </script>