gahanmakwana commited on
Commit
bbbfa2a
·
1 Parent(s): 984b417

fix: add werkzeug dependency

Browse files
Files changed (1) hide show
  1. app.py +94 -16
app.py CHANGED
@@ -1,6 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from flask import Flask, render_template, request, send_from_directory
2
  from paddleocr import PaddleOCR
3
  import os
 
4
 
5
  app = Flask(__name__)
6
 
@@ -10,38 +58,68 @@ app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
10
  if not os.path.exists(UPLOAD_FOLDER):
11
  os.makedirs(UPLOAD_FOLDER)
12
 
13
- # Initialize OCR
14
- ocr = PaddleOCR(use_angle_cls=True, lang='en')
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  @app.route('/', methods=['GET', 'POST'])
17
  def upload_file():
18
  text = None
19
  filename = None
 
 
20
  if request.method == 'POST':
21
  file = request.files.get('file')
22
  if not file or file.filename == '':
23
  return render_template('index.html', error="No file selected")
24
 
25
- filepath = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)
26
- file.save(filepath)
 
 
 
 
27
 
28
- # Run OCR
29
- result = ocr.ocr(filepath, cls=True)
30
- extracted_text = ""
31
- for line in result:
32
- for word_info in line:
33
- extracted_text += word_info[1][0] + " "
34
 
35
- text = extracted_text
36
- filename = file.filename
 
 
 
 
 
 
 
 
37
 
38
- return render_template('index.html', text=text, filename=filename)
 
 
 
 
 
 
 
39
 
40
  @app.route('/uploads/<filename>')
41
  def uploaded_file(filename):
42
  return send_from_directory(app.config['UPLOAD_FOLDER'], filename)
43
 
44
  if __name__ == '__main__':
45
- port = int(os.environ.get('PORT', 5000)) # <-- IMPORTANT
46
- app.run(host='0.0.0.0', port=port)
47
-
 
1
+ # from flask import Flask, render_template, request, send_from_directory
2
+ # from paddleocr import PaddleOCR
3
+ # import os
4
+
5
+ # app = Flask(__name__)
6
+
7
+ # # Upload folder
8
+ # UPLOAD_FOLDER = 'uploads'
9
+ # app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
10
+ # if not os.path.exists(UPLOAD_FOLDER):
11
+ # os.makedirs(UPLOAD_FOLDER)
12
+
13
+ # # Initialize OCR
14
+ # ocr = PaddleOCR(use_angle_cls=True, lang='en')
15
+
16
+ # @app.route('/', methods=['GET', 'POST'])
17
+ # def upload_file():
18
+ # text = None
19
+ # filename = None
20
+ # if request.method == 'POST':
21
+ # file = request.files.get('file')
22
+ # if not file or file.filename == '':
23
+ # return render_template('index.html', error="No file selected")
24
+
25
+ # filepath = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)
26
+ # file.save(filepath)
27
+
28
+ # # Run OCR
29
+ # result = ocr.ocr(filepath, cls=True)
30
+ # extracted_text = ""
31
+ # for line in result:
32
+ # for word_info in line:
33
+ # extracted_text += word_info[1][0] + " "
34
+
35
+ # text = extracted_text
36
+ # filename = file.filename
37
+
38
+ # return render_template('index.html', text=text, filename=filename)
39
+
40
+ # @app.route('/uploads/<filename>')
41
+ # def uploaded_file(filename):
42
+ # return send_from_directory(app.config['UPLOAD_FOLDER'], filename)
43
+
44
+ # if __name__ == '__main__':
45
+ # port = int(os.environ.get('PORT', 5000)) # <-- IMPORTANT
46
+ # app.run(host='0.0.0.0', port=port)
47
+
48
  from flask import Flask, render_template, request, send_from_directory
49
  from paddleocr import PaddleOCR
50
  import os
51
+ import time
52
 
53
  app = Flask(__name__)
54
 
 
58
  if not os.path.exists(UPLOAD_FOLDER):
59
  os.makedirs(UPLOAD_FOLDER)
60
 
61
+ # Initialize OCR with error handling and optimized settings
62
+ try:
63
+ ocr = PaddleOCR(
64
+ use_angle_cls=True,
65
+ lang='en',
66
+ use_gpu=False, # Disable GPU on Render
67
+ rec_model_dir='paddle_models/rec', # Cache models
68
+ det_model_dir='paddle_models/det',
69
+ cls_model_dir='paddle_models/cls',
70
+ enable_mkldnn=True, # CPU optimization
71
+ thread_num=2 # Limit threads to prevent OOM
72
+ )
73
+ except Exception as e:
74
+ print(f"OCR initialization failed: {str(e)}")
75
+ ocr = None
76
 
77
  @app.route('/', methods=['GET', 'POST'])
78
  def upload_file():
79
  text = None
80
  filename = None
81
+ error = None
82
+
83
  if request.method == 'POST':
84
  file = request.files.get('file')
85
  if not file or file.filename == '':
86
  return render_template('index.html', error="No file selected")
87
 
88
+ try:
89
+ # Save file with timestamp to prevent overwrites
90
+ timestamp = str(int(time.time()))
91
+ safe_filename = f"{timestamp}_{file.filename}"
92
+ filepath = os.path.join(app.config['UPLOAD_FOLDER'], safe_filename)
93
+ file.save(filepath)
94
 
95
+ # Check OCR initialization
96
+ if not ocr:
97
+ raise Exception("OCR engine not available")
 
 
 
98
 
99
+ # Run OCR with timeout safeguard
100
+ start_time = time.time()
101
+ result = ocr.ocr(filepath, cls=True)
102
+
103
+ # Process results
104
+ extracted_text = ""
105
+ if result and len(result) > 0:
106
+ for line in result[0]: # Note: result[0] contains the actual OCR data
107
+ if line and len(line) >= 2: # Check if line has text information
108
+ extracted_text += line[1][0] + " "
109
 
110
+ text = extracted_text.strip()
111
+ filename = safe_filename
112
+
113
+ except Exception as e:
114
+ error = f"Error processing file: {str(e)}"
115
+ print(error)
116
+
117
+ return render_template('index.html', text=text, filename=filename, error=error)
118
 
119
  @app.route('/uploads/<filename>')
120
  def uploaded_file(filename):
121
  return send_from_directory(app.config['UPLOAD_FOLDER'], filename)
122
 
123
  if __name__ == '__main__':
124
+ port = int(os.environ.get('PORT', 5000))
125
+ app.run(host='0.0.0.0', port=port)