gahanmakwana commited on
Commit
6c59b39
·
1 Parent(s): 8a90682

better interface

Browse files
Files changed (4) hide show
  1. app.py +41 -84
  2. requirements.txt +36 -23
  3. static/style.css +24 -0
  4. templates/index.html +38 -29
app.py CHANGED
@@ -1,93 +1,50 @@
1
- # from flask import Flask, render_template, request, send_from_directory
2
- # from paddleocr import PaddleOCR
3
- # import os
4
-
5
- # app = Flask(__name__)
6
-
7
- # # Upload folder
8
- # UPLOAD_FOLDER = 'uploads'
9
- # app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
10
- # if not os.path.exists(UPLOAD_FOLDER):
11
- # os.makedirs(UPLOAD_FOLDER)
12
-
13
- # # Initialize OCR
14
- # ocr = PaddleOCR(use_angle_cls=True, lang='en')
15
-
16
- # @app.route('/', methods=['GET', 'POST'])
17
- # def upload_file():
18
- # text = None
19
- # filename = None
20
- # if request.method == 'POST':
21
- # file = request.files.get('file')
22
- # if not file or file.filename == '':
23
- # return render_template('index.html', error="No file selected")
24
-
25
- # filepath = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)
26
- # file.save(filepath)
27
-
28
- # # Run OCR
29
- # result = ocr.ocr(filepath, cls=True)
30
- # extracted_text = ""
31
- # for line in result:
32
- # for word_info in line:
33
- # extracted_text += word_info[1][0] + " "
34
-
35
- # text = extracted_text
36
- # filename = file.filename
37
-
38
- # return render_template('index.html', text=text, filename=filename)
39
-
40
- # @app.route('/uploads/<filename>')
41
- # def uploaded_file(filename):
42
- # return send_from_directory(app.config['UPLOAD_FOLDER'], filename)
43
-
44
- # if __name__ == '__main__':
45
- # port = int(os.environ.get('PORT', 5000)) # <-- IMPORTANT
46
- # app.run(host='0.0.0.0', port=port)
47
-
48
- from flask import Flask, render_template, request
49
  import os
50
- import time
51
 
52
  app = Flask(__name__)
 
 
 
53
 
54
- # Lightweight OCR loader
55
- def get_ocr():
56
- from paddleocr import PaddleOCR
57
- return PaddleOCR(
58
- lang='en',
59
- use_angle_cls=False,
60
- use_gpu=False,
61
- det_limit_side_len=480,
62
- thread_num=1
63
- )
64
-
65
- @app.route('/', methods=['GET', 'POST'])
66
- def upload_file():
67
- if request.method == 'POST':
68
- file = request.files.get('file')
69
- if not file or file.filename == '':
70
- return render_template('index.html', error="Please select a file")
71
-
72
- try:
73
- # Verify file size
74
- file.seek(0, os.SEEK_END)
75
- if file.tell() > 300000:
76
- return render_template('index.html', error="Max 300KB file size")
77
- file.seek(0)
78
 
79
- # Process with OCR
80
- ocr = get_ocr()
81
- result = ocr.ocr(file.stream, cls=False)
82
- text = ' '.join([word[1][0] for line in result[0] for word in line if len(word) >= 2])
83
-
84
- return render_template('index.html', text=text)
85
 
86
- except Exception as e:
87
- return render_template('index.html', error="Processing error")
 
 
88
 
89
- return render_template('index.html')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  if __name__ == '__main__':
92
- port = int(os.environ.get('PORT', 5000))
93
- app.run(host='0.0.0.0', port=port)
 
 
1
+ from flask import Flask, request, render_template, redirect
2
+ from werkzeug.utils import secure_filename
3
+ from paddleocr import PaddleOCR
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import os
 
5
 
6
  app = Flask(__name__)
7
+ UPLOAD_FOLDER = 'static/uploads'
8
+ app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
9
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
10
 
11
+ # Allow only certain image file extensions
12
+ ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif'}
13
+ def allowed_file(filename):
14
+ return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ # Initialize PaddleOCR once (loads models)
17
+ ocr_engine = PaddleOCR(use_angle_cls=True, lang='en')
 
 
 
 
18
 
19
+ @app.route('/', methods=['GET', 'POST'])
20
+ def index():
21
+ extracted_text = ''
22
+ filename = None
23
 
24
+ if request.method == 'POST':
25
+ # Check if an image file was submitted
26
+ if 'image' not in request.files:
27
+ return redirect(request.url)
28
+ file = request.files['image']
29
+ if file and allowed_file(file.filename):
30
+ # Secure the filename and save to upload folder
31
+ filename = secure_filename(file.filename)
32
+ filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
33
+ file.save(filepath)
34
+
35
+ # Run OCR on the uploaded image
36
+ result = ocr_engine.ocr(filepath, cls=True)
37
+ # Un-nest result if PaddleOCR returns a nested list
38
+ if result and isinstance(result[0], list) and len(result) == 1:
39
+ result = result[0]
40
+ # Extract recognized text lines
41
+ extracted_text_lines = [line[1][0] for line in result]
42
+ extracted_text = '\n'.join(extracted_text_lines)
43
+
44
+ # Render the template, passing in filename and extracted text
45
+ return render_template('index.html', filename=filename, extracted_text=extracted_text)
46
 
47
  if __name__ == '__main__':
48
+ # Bind to PORT for Render (default 10000) on all interfaces
49
+ port = int(os.environ.get('PORT', 10000))
50
+ app.run(host='0.0.0.0', port=port)
requirements.txt CHANGED
@@ -1,23 +1,36 @@
1
- # flask==3.0.2
2
- # paddleocr==2.7.0.2
3
- # paddlepaddle==2.5.2
4
- # opencv-python-headless==4.8.1.78
5
- # shapely==2.0.1
6
- # scikit-image==0.18.3
7
- # imgaug==0.4.0
8
- # pyclipper
9
- # lmdb
10
- # tqdm
11
- # numpy==1.24.3
12
- # visualdl
13
- # python-Levenshtein
14
- # werkzeug==2.2.3
15
- # markupsafe
16
- # click
17
- # blinker
18
- flask==2.2.5
19
- paddleocr==2.7.0.3
20
- paddlepaddle==2.5.2 # Updated to compatible version
21
- opencv-python-headless==4.8.1.78
22
- numpy==1.26.0
23
- pyclipper==1.3.0.post6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Flask==3.1.0
2
+ # Werkzeug==2.2.3
3
+ # Jinja2==3.0.3
4
+ # MarkupSafe==2.1.1
5
+ # Click==8.1.7
6
+ # Blinker==1.6.2
7
+
8
+ # numpy==1.26.4
9
+ # paddlepaddle==3.0.0
10
+ # paddleocr==2.10.0
11
+
12
+ # opencv-python-headless==4.11.0.86
13
+ # opencv-contrib-python-headless==4.11.0.86
14
+
15
+ # scikit-image==0.20.0
16
+ # shapely==2.0.1
17
+ # pyclipper==1.3.0
18
+ # lmdb==1.4.1
19
+ # tqdm==4.66.1
20
+ # rapidfuzz==2.19.0
21
+ # cython==0.29.36
22
+ # Pillow==10.0.1
23
+ # pyyaml==6.0
24
+ # requests==2.31.0
25
+ # albumentations==1.3.1
26
+ # albucore==0.1.2
27
+ # packaging==23.1
28
+ Flask
29
+ paddleocr>=2.0.1
30
+ paddlepaddle
31
+ opencv-python-headless
32
+ numpy
33
+ click
34
+ jinja2
35
+ markupsafe
36
+ blinker
static/style.css CHANGED
@@ -79,3 +79,27 @@ button:hover {
79
  from { opacity: 0; transform: translateY(30px);}
80
  to { opacity: 1; transform: translateY(0);}
81
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  from { opacity: 0; transform: translateY(30px);}
80
  to { opacity: 1; transform: translateY(0);}
81
  }
82
+
83
+ #result-img {
84
+ max-width: 100%;
85
+ height: auto;
86
+ margin-top: 10px;
87
+ border: 1px solid #ddd;
88
+ border-radius: 8px;
89
+ padding: 4px;
90
+ }
91
+ #extracted-text {
92
+ text-align: left;
93
+ background: #fff;
94
+ padding: 15px;
95
+ border-radius: 8px;
96
+ border: 1px solid #ddd;
97
+ }
98
+ #spinner {
99
+ /* Center the spinner overlay */
100
+ position: fixed;
101
+ top: 50%;
102
+ left: 50%;
103
+ transform: translate(-50%, -50%);
104
+ z-index: 1000;
105
+ }
templates/index.html CHANGED
@@ -1,39 +1,48 @@
1
  <!DOCTYPE html>
2
  <html lang="en">
3
  <head>
4
- <meta charset="UTF-8">
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>OCR Application</title>
7
- <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
8
- <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@400;600&display=swap" rel="stylesheet">
 
9
  </head>
10
  <body>
11
- <div class="container">
12
- <h1>Image Text Extractor</h1>
 
 
 
 
 
 
 
13
 
14
- <!-- Upload Form -->
15
- <form method="POST" enctype="multipart/form-data" class="upload-form">
16
- <input type="file" name="file" id="file" required>
17
- <button type="submit">Upload Image</button>
18
- </form>
19
-
20
- {% if error %}
21
- <div class="error">{{ error }}</div>
22
- {% endif %}
23
-
24
- {% if filename %}
25
- <div class="preview">
26
- <h2>Uploaded Image</h2>
27
- <img src="{{ url_for('uploaded_file', filename=filename) }}" alt="Uploaded Image">
28
- </div>
29
- {% endif %}
30
 
31
- {% if text %}
32
- <div class="output">
33
- <h2>Extracted Text</h2>
34
- <p>{{ text }}</p>
35
- </div>
36
- {% endif %}
 
 
37
  </div>
 
 
 
 
 
 
 
 
 
38
  </body>
39
  </html>
 
1
  <!DOCTYPE html>
2
  <html lang="en">
3
  <head>
4
+ <meta charset="UTF-8">
5
+ <title>PaddleOCR Web App</title>
6
+ <!-- Bootstrap CSS -->
7
+ <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
8
+ <!-- Custom styles -->
9
+ <link href="{{ url_for('static', filename='style.css') }}" rel="stylesheet">
10
  </head>
11
  <body>
12
+ <div class="container text-center">
13
+ <h1 class="mt-4">OCR with PaddleOCR</h1>
14
+ <!-- Upload form -->
15
+ <form id="upload-form" method="post" action="/" enctype="multipart/form-data" class="mt-4">
16
+ <div class="mb-3">
17
+ <input type="file" class="form-control" name="image" accept="image/*" required>
18
+ </div>
19
+ <button type="submit" class="btn btn-primary">Upload Image</button>
20
+ </form>
21
 
22
+ <!-- Loading spinner (hidden by default) -->
23
+ <div id="spinner" class="mt-3 d-none">
24
+ <div class="spinner-border text-primary" role="status">
25
+ <span class="visually-hidden">Processing...</span>
26
+ </div>
27
+ </div>
 
 
 
 
 
 
 
 
 
 
28
 
29
+ {% if filename %}
30
+ <!-- Display the uploaded image and extracted text -->
31
+ <div class="mt-4">
32
+ <h4>Uploaded Image:</h4>
33
+ <img src="{{ url_for('static', filename='uploads/' + filename) }}"
34
+ alt="Uploaded Image" class="img-fluid" id="result-img">
35
+ <h4 class="mt-3">Extracted Text:</h4>
36
+ <pre id="extracted-text">{{ extracted_text }}</pre>
37
  </div>
38
+ {% endif %}
39
+ </div>
40
+
41
+ <!-- Show spinner on form submit -->
42
+ <script>
43
+ document.getElementById('upload-form').onsubmit = function() {
44
+ document.getElementById('spinner').classList.remove('d-none');
45
+ };
46
+ </script>
47
  </body>
48
  </html>