gahanmakwana commited on
Commit
9f7a699
·
1 Parent(s): cc4db8d
Files changed (5) hide show
  1. .gitignore +1 -180
  2. app.py +30 -29
  3. requirements.txt +25 -34
  4. static/style.css +59 -2
  5. templates/index.html +23 -21
.gitignore CHANGED
@@ -1,187 +1,8 @@
1
- <<<<<<< HEAD
2
- venv/
3
  __pycache__/
4
  *.pyc
5
  *.pyo
6
  *.pyd
7
- *.db
8
- *.sqlite3
9
  uploads/
10
  .env
11
- .DS_Store
12
- =======
13
- # Byte-compiled / optimized / DLL files
14
- __pycache__/
15
- *.py[cod]
16
- *$py.class
17
-
18
- # C extensions
19
- *.so
20
-
21
- # Distribution / packaging
22
- .Python
23
- build/
24
- develop-eggs/
25
- dist/
26
- downloads/
27
- eggs/
28
- .eggs/
29
- lib/
30
- lib64/
31
- parts/
32
- sdist/
33
- var/
34
- wheels/
35
- share/python-wheels/
36
- *.egg-info/
37
- .installed.cfg
38
- *.egg
39
- MANIFEST
40
-
41
- # PyInstaller
42
- # Usually these files are written by a python script from a template
43
- # before PyInstaller builds the exe, so as to inject date/other infos into it.
44
- *.manifest
45
- *.spec
46
-
47
- # Installer logs
48
- pip-log.txt
49
- pip-delete-this-directory.txt
50
-
51
- # Unit test / coverage reports
52
- htmlcov/
53
- .tox/
54
- .nox/
55
- .coverage
56
- .coverage.*
57
- .cache
58
- nosetests.xml
59
- coverage.xml
60
- *.cover
61
- *.py,cover
62
- .hypothesis/
63
- .pytest_cache/
64
- cover/
65
-
66
- # Translations
67
- *.mo
68
- *.pot
69
-
70
- # Django stuff:
71
- *.log
72
- local_settings.py
73
- db.sqlite3
74
- db.sqlite3-journal
75
-
76
- # Flask stuff:
77
- instance/
78
- .webassets-cache
79
-
80
- # Scrapy stuff:
81
- .scrapy
82
-
83
- # Sphinx documentation
84
- docs/_build/
85
-
86
- # PyBuilder
87
- .pybuilder/
88
- target/
89
-
90
- # Jupyter Notebook
91
- .ipynb_checkpoints
92
-
93
- # IPython
94
- profile_default/
95
- ipython_config.py
96
-
97
- # pyenv
98
- # For a library or package, you might want to ignore these files since the code is
99
- # intended to run in multiple environments; otherwise, check them in:
100
- # .python-version
101
-
102
- # pipenv
103
- # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
104
- # However, in case of collaboration, if having platform-specific dependencies or dependencies
105
- # having no cross-platform support, pipenv may install dependencies that don't work, or not
106
- # install all needed dependencies.
107
- #Pipfile.lock
108
-
109
- # UV
110
- # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
111
- # This is especially recommended for binary packages to ensure reproducibility, and is more
112
- # commonly ignored for libraries.
113
- #uv.lock
114
-
115
- # poetry
116
- # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
117
- # This is especially recommended for binary packages to ensure reproducibility, and is more
118
- # commonly ignored for libraries.
119
- # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
120
- #poetry.lock
121
-
122
- # pdm
123
- # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
124
- #pdm.lock
125
- # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
126
- # in version control.
127
- # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
128
- .pdm.toml
129
- .pdm-python
130
- .pdm-build/
131
-
132
- # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
133
- __pypackages__/
134
-
135
- # Celery stuff
136
- celerybeat-schedule
137
- celerybeat.pid
138
-
139
- # SageMath parsed files
140
- *.sage.py
141
-
142
- # Environments
143
- .env
144
- .venv
145
- env/
146
  venv/
147
- ENV/
148
- env.bak/
149
- venv.bak/
150
-
151
- # Spyder project settings
152
- .spyderproject
153
- .spyproject
154
-
155
- # Rope project settings
156
- .ropeproject
157
-
158
- # mkdocs documentation
159
- /site
160
-
161
- # mypy
162
- .mypy_cache/
163
- .dmypy.json
164
- dmypy.json
165
-
166
- # Pyre type checker
167
- .pyre/
168
-
169
- # pytype static type analyzer
170
- .pytype/
171
-
172
- # Cython debug symbols
173
- cython_debug/
174
-
175
- # PyCharm
176
- # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
177
- # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
178
- # and can be added to the global gitignore or merged into this file. For a more nuclear
179
- # option (not recommended) you can uncomment the following to ignore the entire idea folder.
180
- #.idea/
181
-
182
- # Ruff stuff:
183
- .ruff_cache/
184
-
185
- # PyPI configuration file
186
- .pypirc
187
- >>>>>>> 3b30c382f1257df381611f68a53c832ba232d59a
 
 
 
1
  __pycache__/
2
  *.pyc
3
  *.pyo
4
  *.pyd
 
 
5
  uploads/
6
  .env
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  venv/
8
+ .DS_Store
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,50 +1,51 @@
1
- from flask import Flask, request, render_template, redirect
 
2
  from werkzeug.utils import secure_filename
3
  from paddleocr import PaddleOCR
4
- import os
5
 
6
  app = Flask(__name__)
7
- UPLOAD_FOLDER = 'static/uploads'
 
 
8
  app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
9
  os.makedirs(UPLOAD_FOLDER, exist_ok=True)
10
 
11
- # Allow only certain image file extensions
12
  ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif'}
13
  def allowed_file(filename):
14
  return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
15
 
16
- # Initialize PaddleOCR once (loads models)
17
- ocr_engine = PaddleOCR(use_angle_cls=True, lang='en')
18
 
19
  @app.route('/', methods=['GET', 'POST'])
20
  def index():
21
- extracted_text = ''
22
  filename = None
 
23
 
24
  if request.method == 'POST':
25
- # Check if an image file was submitted
26
- if 'image' not in request.files:
27
- return redirect(request.url)
28
- file = request.files['image']
29
- if file and allowed_file(file.filename):
30
- # Secure the filename and save to upload folder
31
- filename = secure_filename(file.filename)
32
- filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
33
- file.save(filepath)
34
-
35
- # Run OCR on the uploaded image
36
- result = ocr_engine.ocr(filepath, cls=True)
37
- # Un-nest result if PaddleOCR returns a nested list
38
- if result and isinstance(result[0], list) and len(result) == 1:
39
- result = result[0]
40
- # Extract recognized text lines
41
- extracted_text_lines = [line[1][0] for line in result]
42
- extracted_text = '\n'.join(extracted_text_lines)
43
-
44
- # Render the template, passing in filename and extracted text
45
  return render_template('index.html', filename=filename, extracted_text=extracted_text)
46
 
 
 
 
 
47
  if __name__ == '__main__':
48
- # Bind to PORT for Render (default 10000) on all interfaces
49
- port = int(os.environ.get('PORT', 10000))
50
  app.run(host='0.0.0.0', port=port)
 
1
+ import os
2
+ from flask import Flask, render_template, request, send_from_directory
3
  from werkzeug.utils import secure_filename
4
  from paddleocr import PaddleOCR
 
5
 
6
  app = Flask(__name__)
7
+
8
+ # Configuration
9
+ UPLOAD_FOLDER = 'uploads'
10
  app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
11
  os.makedirs(UPLOAD_FOLDER, exist_ok=True)
12
 
 
13
  ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif'}
14
  def allowed_file(filename):
15
  return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
16
 
17
+ # Initialize OCR model
18
+ ocr = PaddleOCR(use_angle_cls=True, lang='en')
19
 
20
  @app.route('/', methods=['GET', 'POST'])
21
  def index():
 
22
  filename = None
23
+ extracted_text = None
24
 
25
  if request.method == 'POST':
26
+ file = request.files.get('image')
27
+ if not file or file.filename == '' or not allowed_file(file.filename):
28
+ return render_template('index.html', error="Please upload a valid image file (png/jpg/jpeg/gif).")
29
+
30
+ # Save the uploaded file
31
+ filename = secure_filename(file.filename)
32
+ filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
33
+ file.save(filepath)
34
+
35
+ # Run OCR
36
+ result = ocr.ocr(filepath, cls=True)
37
+ # Flatten if nested
38
+ if isinstance(result, list) and len(result) == 1 and isinstance(result[0], list):
39
+ result = result[0]
40
+ lines = [line[1][0] for line in result]
41
+ extracted_text = "\n".join(lines) if lines else "No text detected."
42
+
 
 
 
43
  return render_template('index.html', filename=filename, extracted_text=extracted_text)
44
 
45
+ @app.route('/uploads/<filename>')
46
+ def uploaded_file(filename):
47
+ return send_from_directory(app.config['UPLOAD_FOLDER'], filename)
48
+
49
  if __name__ == '__main__':
50
+ port = int(os.environ.get('PORT', 5000))
 
51
  app.run(host='0.0.0.0', port=port)
requirements.txt CHANGED
@@ -1,37 +1,28 @@
1
- # Flask==3.1.0
2
- # Werkzeug==2.2.3
3
- # Jinja2==3.0.3
4
- # MarkupSafe==2.1.1
5
- # Click==8.1.7
6
- # Blinker==1.6.2
7
 
8
- # numpy==1.26.4
9
- # paddlepaddle==3.0.0
10
- # paddleocr==2.10.0
11
 
12
- # opencv-python-headless==4.11.0.86
13
- # opencv-contrib-python-headless==4.11.0.86
14
 
15
- # scikit-image==0.20.0
16
- # shapely==2.0.1
17
- # pyclipper==1.3.0
18
- # lmdb==1.4.1
19
- # tqdm==4.66.1
20
- # rapidfuzz==2.19.0
21
- # cython==0.29.36
22
- # Pillow==10.0.1
23
- # pyyaml==6.0
24
- # requests==2.31.0
25
- # albumentations==1.3.1
26
- # albucore==0.1.2
27
- # packaging==23.1
28
- Flask
29
- paddleocr>=2.0.1
30
- paddlepaddle
31
- opencv-python-headless
32
- numpy
33
- click
34
- jinja2
35
- markupsafe
36
- blinker
37
- gunicorn
 
1
+ Flask==3.1.0
2
+ Werkzeug==2.2.3
3
+ Jinja2==3.0.3
4
+ MarkupSafe==2.1.1
5
+ Click==8.1.7
6
+ Blinker==1.6.2
7
 
8
+ numpy==1.24.3
9
+ paddlepaddle==3.0.0
10
+ paddleocr==2.10.0
11
 
12
+ opencv-python-headless==4.11.0.86
 
13
 
14
+ scikit-image==0.20.0
15
+ shapely==2.0.1
16
+ pyclipper==1.3.0
17
+ lmdb==1.4.1
18
+ tqdm==4.66.1
19
+ rapidfuzz==2.19.0
20
+ cython==0.29.36
21
+ Pillow==10.0.1
22
+ pyyaml==6.0
23
+ requests==2.31.0
24
+ albumentations==1.3.1
25
+ albucore==0.1.2
26
+ packaging==23.1
27
+
28
+ gunicorn==20.1.0
 
 
 
 
 
 
 
 
static/style.css CHANGED
@@ -1,4 +1,4 @@
1
- body {
2
  font-family: 'Poppins', sans-serif;
3
  background: linear-gradient(135deg, #74ebd5, #ACB6E5);
4
  margin: 0;
@@ -96,10 +96,67 @@ button:hover {
96
  border: 1px solid #ddd;
97
  }
98
  #spinner {
99
- /* Center the spinner overlay */
100
  position: fixed;
101
  top: 50%;
102
  left: 50%;
103
  transform: translate(-50%, -50%);
104
  z-index: 1000;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* body {
2
  font-family: 'Poppins', sans-serif;
3
  background: linear-gradient(135deg, #74ebd5, #ACB6E5);
4
  margin: 0;
 
96
  border: 1px solid #ddd;
97
  }
98
  #spinner {
99
+
100
  position: fixed;
101
  top: 50%;
102
  left: 50%;
103
  transform: translate(-50%, -50%);
104
  z-index: 1000;
105
+ } */
106
+ body {
107
+ font-family: 'Poppins', sans-serif;
108
+ background: linear-gradient(135deg, #74ebd5, #ACB6E5);
109
+ min-height: 100vh;
110
+ display: flex;
111
+ justify-content: center;
112
+ align-items: center;
113
+ margin: 0;
114
+ }
115
+
116
+ .container {
117
+ background: #fff;
118
+ padding: 30px 40px;
119
+ border-radius: 20px;
120
+ box-shadow: 0 10px 30px rgba(0,0,0,0.2);
121
+ max-width: 600px;
122
+ width: 100%;
123
+ animation: fadeIn 1s ease-in;
124
+ }
125
+
126
+ h1 { margin-bottom: 20px; color: #333; font-weight: 600; }
127
+
128
+ .upload-form {
129
+ display: flex;
130
+ flex-direction: column;
131
+ gap: 15px;
132
+ }
133
+
134
+ #spinner {
135
+ position: fixed;
136
+ top: 50%; left: 50%;
137
+ transform: translate(-50%, -50%);
138
+ z-index: 1000;
139
  }
140
+
141
+ .preview img, #result-img {
142
+ max-width: 100%;
143
+ border-radius: 10px;
144
+ border: 1px solid #ddd;
145
+ margin-top: 10px;
146
+ }
147
+
148
+ .output, #extracted-text {
149
+ background: #f9f9f9;
150
+ padding: 15px;
151
+ border-radius: 10px;
152
+ text-align: left;
153
+ white-space: pre-wrap;
154
+ margin-top: 10px;
155
+ }
156
+
157
+ @keyframes fadeIn {
158
+ from { opacity: 0; transform: translateY(20px); }
159
+ to { opacity: 1; transform: translateY(0); }
160
+ }
161
+
162
+
templates/index.html CHANGED
@@ -2,45 +2,47 @@
2
  <html lang="en">
3
  <head>
4
  <meta charset="UTF-8">
5
- <title>PaddleOCR Web App</title>
6
- <!-- Bootstrap CSS -->
 
7
  <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
8
- <!-- Custom styles -->
 
 
9
  <link href="{{ url_for('static', filename='style.css') }}" rel="stylesheet">
10
  </head>
11
  <body>
12
  <div class="container text-center">
13
  <h1 class="mt-4">OCR with PaddleOCR</h1>
14
- <!-- Upload form -->
15
- <form id="upload-form" method="post" action="/" enctype="multipart/form-data" class="mt-4">
16
- <div class="mb-3">
17
- <input type="file" class="form-control" name="image" accept="image/*" required>
18
- </div>
19
- <button type="submit" class="btn btn-primary">Upload Image</button>
20
  </form>
21
 
22
- <!-- Loading spinner (hidden by default) -->
23
- <div id="spinner" class="mt-3 d-none">
24
  <div class="spinner-border text-primary" role="status">
25
  <span class="visually-hidden">Processing...</span>
26
  </div>
27
  </div>
28
 
29
  {% if filename %}
30
- <!-- Display the uploaded image and extracted text -->
31
- <div class="mt-4">
32
- <h4>Uploaded Image:</h4>
33
- <img src="{{ url_for('static', filename='uploads/' + filename) }}"
34
- alt="Uploaded Image" class="img-fluid" id="result-img">
35
- <h4 class="mt-3">Extracted Text:</h4>
36
- <pre id="extracted-text">{{ extracted_text }}</pre>
37
- </div>
 
 
 
38
  {% endif %}
39
  </div>
40
 
41
- <!-- Show spinner on form submit -->
42
  <script>
43
- document.getElementById('upload-form').onsubmit = function() {
44
  document.getElementById('spinner').classList.remove('d-none');
45
  };
46
  </script>
 
2
  <html lang="en">
3
  <head>
4
  <meta charset="UTF-8">
5
+ <title>OCR Application</title>
6
+ <meta name="viewport" content="width=device-width, initial-scale=1">
7
+ <!-- Bootstrap 5 -->
8
  <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
9
+ <!-- Google Font -->
10
+ <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@400;600&display=swap" rel="stylesheet">
11
+ <!-- Custom Styles -->
12
  <link href="{{ url_for('static', filename='style.css') }}" rel="stylesheet">
13
  </head>
14
  <body>
15
  <div class="container text-center">
16
  <h1 class="mt-4">OCR with PaddleOCR</h1>
17
+
18
+ <form id="upload-form" method="post" enctype="multipart/form-data" class="upload-form mx-auto">
19
+ <input type="file" name="image" accept="image/*" class="form-control mb-3" required>
20
+ <button type="submit" class="btn btn-primary">Upload & Extract Text</button>
 
 
21
  </form>
22
 
23
+ <div id="spinner" class="d-none">
 
24
  <div class="spinner-border text-primary" role="status">
25
  <span class="visually-hidden">Processing...</span>
26
  </div>
27
  </div>
28
 
29
  {% if filename %}
30
+ <div class="preview mt-4">
31
+ <h4>Uploaded Image</h4>
32
+ <img id="result-img" src="{{ url_for('uploaded_file', filename=filename) }}" alt="Uploaded Image">
33
+ </div>
34
+ {% endif %}
35
+
36
+ {% if extracted_text %}
37
+ <div class="output mt-4">
38
+ <h4>Extracted Text</h4>
39
+ <pre id="extracted-text">{{ extracted_text }}</pre>
40
+ </div>
41
  {% endif %}
42
  </div>
43
 
 
44
  <script>
45
+ document.getElementById('upload-form').onsubmit = () => {
46
  document.getElementById('spinner').classList.remove('d-none');
47
  };
48
  </script>