prthm11 commited on
Commit
cf438e2
·
verified ·
1 Parent(s): 18ade07

Update app_main.py

Browse files
Files changed (1) hide show
  1. app_main.py +20 -1
app_main.py CHANGED
@@ -10,10 +10,22 @@ import pytesseract
10
  from transformers import AutoProcessor, AutoModelForImageTextToText
11
  from langchain_community.document_loaders.image_captions import ImageCaptionLoader
12
  from werkzeug.utils import secure_filename
13
- import tempfile
14
 
15
  app = Flask(__name__)
16
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
18
  poppler_path=r"C:\poppler-23.11.0\Library\bin"
19
 
@@ -131,7 +143,9 @@ def index():
131
  @app.route('/process_pdf', methods=['POST'])
132
  def process_pdf():
133
  try:
 
134
  if 'pdf_file' not in request.files:
 
135
  return jsonify({"error": "Missing PDF file in form-data with key 'pdf_file'"}), 400
136
 
137
  pdf_file = request.files['pdf_file']
@@ -144,16 +158,21 @@ def process_pdf():
144
  saved_pdf_path = os.path.join(temp_dir, filename)
145
  pdf_file.save(saved_pdf_path)
146
 
 
 
147
  # Extract & process
148
  json_path = None
149
  output_path, result = extract_images_from_pdf(saved_pdf_path, json_path)
150
 
 
 
151
  return jsonify({
152
  "message": "✅ PDF processed successfully",
153
  "output_json": output_path,
154
  "sprites": result
155
  })
156
  except Exception as e:
 
157
  return jsonify({"error": f"❌ Failed to process PDF: {str(e)}"}), 500
158
 
159
  if __name__ == '__main__':
 
10
  from transformers import AutoProcessor, AutoModelForImageTextToText
11
  from langchain_community.document_loaders.image_captions import ImageCaptionLoader
12
  from werkzeug.utils import secure_filename
13
+ import tempfile, logging
14
 
15
  app = Flask(__name__)
16
 
17
+ # Configure logging
18
+ logging.basicConfig(
19
+ level=logging.DEBUG, # Use INFO or ERROR in production
20
+ format="%(asctime)s [%(levelname)s] %(message)s",
21
+ handlers=[
22
+ logging.FileHandler("app.log"),
23
+ logging.StreamHandler()
24
+ ]
25
+ )
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
  pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
30
  poppler_path=r"C:\poppler-23.11.0\Library\bin"
31
 
 
143
  @app.route('/process_pdf', methods=['POST'])
144
  def process_pdf():
145
  try:
146
+ logger.info("Received request to process PDF.")
147
  if 'pdf_file' not in request.files:
148
+ logger.warning("No PDF file found in request.")
149
  return jsonify({"error": "Missing PDF file in form-data with key 'pdf_file'"}), 400
150
 
151
  pdf_file = request.files['pdf_file']
 
158
  saved_pdf_path = os.path.join(temp_dir, filename)
159
  pdf_file.save(saved_pdf_path)
160
 
161
+ logger.info(f"Saved uploaded PDF to: {saved_pdf_path}")
162
+
163
  # Extract & process
164
  json_path = None
165
  output_path, result = extract_images_from_pdf(saved_pdf_path, json_path)
166
 
167
+ logger.info("Received request to process PDF.")
168
+
169
  return jsonify({
170
  "message": "✅ PDF processed successfully",
171
  "output_json": output_path,
172
  "sprites": result
173
  })
174
  except Exception as e:
175
+ logger.exception("❌ Failed to process PDF")
176
  return jsonify({"error": f"❌ Failed to process PDF: {str(e)}"}), 500
177
 
178
  if __name__ == '__main__':