prthm11 commited on
Commit
6a97041
·
verified ·
1 Parent(s): 1fe061a

Update app_main.py

Browse files
Files changed (1) hide show
  1. app_main.py +29 -21
app_main.py CHANGED
@@ -9,6 +9,8 @@ from dotenv import load_dotenv
9
  import pytesseract
10
  from transformers import AutoProcessor, AutoModelForImageTextToText
11
  from langchain_community.document_loaders.image_captions import ImageCaptionLoader
 
 
12
 
13
  app = Flask(__name__)
14
 
@@ -126,27 +128,33 @@ def index():
126
  return render_template('app_index.html')
127
 
128
  # API endpoint
129
- @app.route('/process_static_pdf', methods=['POST'])
130
- def process_static_pdf():
131
- # Option 1: Use hardcoded static PDF
132
- pdf_path = r"E:\Pratham\2025\Harsh Sir\Scratch Vision\images\scratch_crab.pdf"
133
-
134
- # Optional: Allow override via JSON request body
135
- if request.json and "pdf_path" in request.json:
136
- pdf_path = request.json["pdf_path"]
137
-
138
- if not os.path.isfile(pdf_path):
139
- return jsonify({"error": f"File not found: {pdf_path}"}), 400
140
-
141
- # json_path = os.path.join(JSON_FOLDER_PATH, "extracted.json")
142
- json_path = None
143
- output_path, result = extract_images_from_pdf(pdf_path, json_path)
144
-
145
- return jsonify({
146
- "message": "✅ PDF processed successfully",
147
- "output_json": output_path,
148
- "sprites": result
149
- })
 
 
 
 
 
 
150
 
151
  if __name__ == '__main__':
152
  app.run(host='0.0.0.0', port=7860, debug=True)
 
9
  import pytesseract
10
  from transformers import AutoProcessor, AutoModelForImageTextToText
11
  from langchain_community.document_loaders.image_captions import ImageCaptionLoader
12
+ from werkzeug.utils import secure_filename
13
+ import tempfile
14
 
15
  app = Flask(__name__)
16
 
 
128
  return render_template('app_index.html')
129
 
130
  # API endpoint
131
+ @app.route('/process_pdf', methods=['POST'])
132
+ def process_pdf():
133
+ try:
134
+ if 'pdf_file' not in request.files:
135
+ return jsonify({"error": "Missing PDF file in form-data with key 'pdf_file'"}), 400
136
+
137
+ pdf_file = request.files['pdf_file']
138
+ if pdf_file.filename == '':
139
+ return jsonify({"error": "Empty filename"}), 400
140
+
141
+ # Save the uploaded PDF temporarily
142
+ filename = secure_filename(pdf_file.filename)
143
+ temp_dir = tempfile.mkdtemp()
144
+ saved_pdf_path = os.path.join(temp_dir, filename)
145
+ pdf_file.save(saved_pdf_path)
146
+
147
+ # Extract & process
148
+ json_path = None
149
+ output_path, result = extract_images_from_pdf(saved_pdf_path, json_path)
150
+
151
+ return jsonify({
152
+ "message": "✅ PDF processed successfully",
153
+ "output_json": output_path,
154
+ "sprites": result
155
+ })
156
+ except Exception as e:
157
+ return jsonify({"error": f"❌ Failed to process PDF: {str(e)}"}), 500
158
 
159
  if __name__ == '__main__':
160
  app.run(host='0.0.0.0', port=7860, debug=True)