prthm11 commited on
Commit
0f73e99
·
verified ·
1 Parent(s): 9179d5f

Upload 4 files

Browse files
Files changed (4) hide show
  1. dockerfile +39 -0
  2. extract_img_pdf.py +677 -0
  3. requirements.txt +6 -0
  4. templates/index.html +27 -0
dockerfile ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Base image with Python and common dependencies
2
+ FROM python:3.10-slim
3
+
4
+ # Set environment variables
5
+ ENV DEBIAN_FRONTEND=noninteractive
6
+ ENV PYTHONUNBUFFERED=1
7
+ ENV PYTHONDONTWRITEBYTECODE=1
8
+
9
+ # Install system dependencies
10
+ RUN apt-get update && apt-get install -y \
11
+ build-essential \
12
+ libglib2.0-0 \
13
+ libsm6 \
14
+ libxext6 \
15
+ libxrender-dev \
16
+ tesseract-ocr \
17
+ poppler-utils \
18
+ libgl1 \
19
+ && apt-get clean && rm -rf /var/lib/apt/lists/*
20
+
21
+ RUN pip install -r requirements.txt
22
+ RUN mkdir -p /app/cache /app/data && chmod -R 777 /app/cache /app/data
23
+ RUN mkdir -p /app/OUTPUTS
24
+ RUN chmod -R 777 /app
25
+
26
+ # Set working directory
27
+ WORKDIR /app
28
+
29
+ # Copy requirements file and install Python dependencies
30
+ COPY requirements.txt requirements.txt
31
+ COPY extract_img_pdf.py extract_img_pdf.py
32
+ COPY templates/ /app/templates
33
+ COPY .env .env
34
+
35
+ # Expose the required port for HF Spaces
36
+ EXPOSE 7860
37
+
38
+ # Set the command to run your Flask app
39
+ CMD ["python", "extract_img_pdf.py"]
extract_img_pdf.py ADDED
@@ -0,0 +1,677 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #==================================================================================#
2
+ # Find Contours from Image and Convert into PDF #
3
+ #==================================================================================#
4
+ import cv2, os
5
+ import numpy as np
6
+ from imutils.perspective import four_point_transform
7
+ from PIL import Image
8
+ from unstructured.partition.pdf import partition_pdf
9
+ import json, base64, io
10
+ from flask import Flask, render_template, flash, redirect, url_for
11
+ from dotenv import load_dotenv
12
+ import pytesseract
13
+
14
+ load_dotenv()
15
+
16
+ app = Flask(__name__)
17
+ app.secret_key = os.getenv("SECRET_KEY")
18
+ pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
19
+ poppler_path=r"C:\poppler-23.11.0\Library\bin"
20
+
21
+ count = 0
22
+
23
+ OUTPUT_FOLDER = "OUTPUTS"
24
+ # os.makedirs(OUTPUT_FOLDER, exist_ok=True)
25
+ IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_IMAGE")
26
+ # os.makedirs(IMAGE_FOLDER_PATH, exist_ok=True)
27
+
28
+ PDF_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_PDF")
29
+ # os.makedirs(PDF_FOLDER_PATH, exist_ok=True)
30
+ JSON_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "EXTRACTED_JSON")
31
+
32
+ for path in [OUTPUT_FOLDER, IMAGE_FOLDER_PATH, PDF_FOLDER_PATH, JSON_FOLDER_PATH]:
33
+ os.makedirs(path, exist_ok=True)
34
+
35
+ # --- FUNCTION: Detect document contour ---
36
+ def detect_document_contour(image):
37
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
38
+ blur = cv2.GaussianBlur(gray, (5, 5), 0)
39
+ _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
40
+
41
+ contours, _ = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
42
+ contours = sorted(contours, key=cv2.contourArea, reverse=True)
43
+
44
+ for contour in contours:
45
+ area = cv2.contourArea(contour)
46
+ if area > 1000:
47
+ peri = cv2.arcLength(contour, True)
48
+ approx = cv2.approxPolyDP(contour, 0.02 * peri, True)
49
+ if len(approx) == 4:
50
+ return approx
51
+ return None
52
+
53
+ # --- FUNCTION: Extract images from saved PDF ---
54
+ def extract_images_from_pdf(pdf_path, output_json_path):
55
+ elements = partition_pdf(
56
+ filename=pdf_path,
57
+
58
+ strategy="hi_res",
59
+ extract_image_block_types=["Image"], # or ["Image", "Table"]
60
+ extract_image_block_to_payload=True, # Set to True to get base64 in output
61
+ )
62
+ with open(output_json_path, "w") as f:
63
+ json.dump([element.to_dict() for element in elements], f, indent=4)
64
+
65
+ # Display extracted images
66
+ with open(output_json_path, 'r') as file:
67
+ file_elements = json.load(file)
68
+
69
+ for i, element in enumerate(file_elements):
70
+ if "image_base64" in element["metadata"]:
71
+ image_data = base64.b64decode(element["metadata"]["image_base64"])
72
+ image = Image.open(io.BytesIO(image_data))
73
+ image.show(title=f"Extracted Image {i+1}")
74
+
75
+ # --- Route: Home Page ---
76
+ @app.route("/")
77
+ def index():
78
+ return render_template("index.html")
79
+
80
+ # --- Route: Scan Document ---
81
+ @app.route("/scan")
82
+ def scan_document():
83
+ global count
84
+
85
+ cap = cv2.VideoCapture(0 + cv2.CAP_DSHOW)
86
+ cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
87
+ cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)
88
+
89
+ scale = 0.5
90
+ contour = None
91
+
92
+ while True:
93
+ ret, frame = cap.read()
94
+ if not ret:
95
+ flash("Camera Error!", "error")
96
+ break
97
+
98
+ frame = cv2.rotate(frame, cv2.ROTATE_180)
99
+ display = frame.copy()
100
+ contour = detect_document_contour(display)
101
+
102
+ if contour is not None:
103
+ cv2.drawContours(display, [contour], -1, (0, 255, 0), 3)
104
+
105
+ resized = cv2.resize(display, (int(scale * display.shape[1]), int(scale * display.shape[0])))
106
+ cv2.imshow("📷 Scan Document - Press 's' to Save, ESC to Exit", resized)
107
+
108
+ key = cv2.waitKey(1) & 0xFF
109
+ if key == 27: # ESC
110
+ break
111
+ elif key == ord('s') and contour is not None:
112
+ warped = four_point_transform(frame, contour.reshape(4, 2))
113
+ image_path = os.path.join(IMAGE_FOLDER_PATH, f"scanned_colored_{count}.jpg")
114
+ pdf_path = os.path.join(PDF_FOLDER_PATH, f"scanned_colored_{count}.pdf")
115
+ json_path = os.path.join(JSON_FOLDER_PATH, f"scanned_{count}.json")
116
+
117
+ cv2.imwrite(image_path, warped)
118
+ img = Image.open(image_path).convert("RGB")
119
+ img.save(pdf_path)
120
+ extract_images_from_pdf(pdf_path, json_path)
121
+
122
+ flash("✅ Document scanned and saved!", "success")
123
+ count += 1
124
+ break
125
+
126
+ cap.release()
127
+ cv2.destroyAllWindows()
128
+ return redirect(url_for("index"))
129
+
130
+
131
+ # --- Run ---
132
+ if __name__ == "__main__":
133
+ app.run(host="0.0.0.0", port=7860, debug=False)
134
+ # while True:
135
+ # ret, frame = cap.read()
136
+ # if not ret:
137
+ # break
138
+
139
+ # frame = cv2.rotate(frame, cv2.ROTATE_180)
140
+ # display = frame.copy()
141
+
142
+ # contour = detect_document_contour(display)
143
+ # if contour is not None:
144
+ # cv2.drawContours(display, [contour], -1, (0, 255, 0), 3)
145
+
146
+ # cv2.imshow("Document Scanner", cv2.resize(display, (int(scale * display.shape[1]), int(scale * display.shape[0]))))
147
+
148
+ # key = cv2.waitKey(1) & 0xFF
149
+
150
+ # if key == 27: # ESC to exit
151
+ # break
152
+
153
+ # elif key == ord('s') and contour is not None:
154
+ # warped = four_point_transform(frame, contour.reshape(4, 2))
155
+ # image_path = os.path.join(IMAGE_FOLDER_PATH, f"scanned_colored_{count}.jpg")
156
+ # pdf_path = os.path.join(PDF_FOLDER_PATH,f"scanned_colored_{count}.pdf")
157
+
158
+ # # Save the Image
159
+ # cv2.imwrite(image_path, warped)
160
+ # print(f"[INFO] Saved: {image_path}")
161
+
162
+ # # Convert to PDF
163
+ # img = Image.open(image_path)
164
+ # img_rgb = img.convert("RGB")
165
+ # img_rgb.save(pdf_path)
166
+ # print(f"[INFO] Converted to PDF: {pdf_path}")
167
+
168
+ # # Extract and show embedded images from PDF
169
+ # print(f"[INFO] Extracting embedded images from PDF...")
170
+ # # extract_images_from_pdf(pdf_path, JSON_FOLDER_PATH)
171
+
172
+ # count += 1
173
+ # cap.release()
174
+ # cv2.destroyAllWindows()
175
+
176
+
177
+ ''' Simple version Not a Flask APP '''
178
+ # import cv2, os, json, base64, io
179
+ # import numpy as np
180
+ # from imutils.perspective import four_point_transform
181
+ # from PIL import Image
182
+ # from unstructured.partition.pdf import partition_pdf
183
+ # import pytesseract
184
+
185
+ # # --- PATH CONFIGURATION ---
186
+ # pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
187
+ # POPPLER_PATH = r"C:\poppler-23.11.0\Library\bin"
188
+
189
+ # OUTPUT_FOLDER = "OUTPUTS"
190
+ # IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_IMAGE")
191
+ # PDF_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_PDF")
192
+ # JSON_OUTPUT_FOLDER = os.path.join(OUTPUT_FOLDER, "EXTRACTED_JSON")
193
+
194
+ # for path in [OUTPUT_FOLDER, IMAGE_FOLDER_PATH, PDF_FOLDER_PATH, JSON_OUTPUT_FOLDER]:
195
+ # os.makedirs(path, exist_ok=True)
196
+
197
+ # # --- FUNCTION: Detect document contour ---
198
+ # def detect_document_contour(image):
199
+ # gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
200
+ # blur = cv2.GaussianBlur(gray, (5, 5), 0)
201
+ # _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
202
+ # contours, _ = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
203
+ # contours = sorted(contours, key=cv2.contourArea, reverse=True)
204
+
205
+ # for contour in contours:
206
+ # area = cv2.contourArea(contour)
207
+ # if area > 1000:
208
+ # peri = cv2.arcLength(contour, True)
209
+ # approx = cv2.approxPolyDP(contour, 0.02 * peri, True)
210
+ # if len(approx) == 4:
211
+ # return approx
212
+ # return None
213
+
214
+ # # --- FUNCTION: Extract images from saved PDF ---
215
+ # def extract_images_from_pdf(pdf_path, output_json_path):
216
+ # elements = partition_pdf(
217
+ # filename=pdf_path,
218
+ # poppler_path=POPPLER_PATH,
219
+ # strategy="hi_res",
220
+ # extract_image_block_types=["Image"],
221
+ # extract_image_block_to_payload=True,
222
+ # )
223
+
224
+ # with open(output_json_path, "w") as f:
225
+ # json.dump([element.to_dict() for element in elements], f, indent=4)
226
+
227
+ # # Display extracted images
228
+ # with open(output_json_path, 'r') as file:
229
+ # file_elements = json.load(file)
230
+
231
+ # for i, element in enumerate(file_elements):
232
+ # if "image_base64" in element["metadata"]:
233
+ # image_data = base64.b64decode(element["metadata"]["image_base64"])
234
+ # image = Image.open(io.BytesIO(image_data))
235
+ # image.show(title=f"Extracted Image {i+1}")
236
+
237
+ # # --- WEBCAM SCANNER START ---
238
+ # # cap = cv2.VideoCapture(0 + cv2.CAP_DSHOW)
239
+ # cap = cv2.VideoCapture("http://100.71.6.36:8080/video")
240
+ # cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
241
+ # cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)
242
+
243
+ # scale = 0.5
244
+ # count = 0
245
+
246
+ # while True:
247
+ # ret, frame = cap.read()
248
+ # if not ret:
249
+ # break
250
+
251
+ # frame = cv2.rotate(frame, cv2.ROTATE_180)
252
+ # display = frame.copy()
253
+
254
+ # contour = detect_document_contour(display)
255
+ # if contour is not None:
256
+ # cv2.drawContours(display, [contour], -1, (0, 255, 0), 3)
257
+
258
+ # cv2.imshow("Document Scanner", cv2.resize(display, (int(scale * display.shape[1]), int(scale * display.shape[0]))))
259
+
260
+ # key = cv2.waitKey(1) & 0xFF
261
+
262
+ # if key == 27: # ESC to exit
263
+ # break
264
+
265
+ # elif key == ord('s') and contour is not None:
266
+ # warped = four_point_transform(frame, contour.reshape(4, 2))
267
+
268
+ # image_path = os.path.join(IMAGE_FOLDER_PATH, f"scanned_colored_{count}.jpg")
269
+ # pdf_path = os.path.join(PDF_FOLDER_PATH, f"scanned_colored_{count}.pdf")
270
+ # json_path = os.path.join(JSON_OUTPUT_FOLDER, f"embedded_images_{count}.json")
271
+
272
+ # # Save Image
273
+ # cv2.imwrite(image_path, warped)
274
+ # print(f"[INFO] Saved image: {image_path}")
275
+
276
+ # # Convert to PDF
277
+ # img = Image.open(image_path)
278
+ # img_rgb = img.convert("RGB")
279
+ # img_rgb.save(pdf_path)
280
+ # print(f"[INFO] Converted to PDF: {pdf_path}")
281
+
282
+ # # Extract and show embedded images from PDF
283
+ # print(f"[INFO] Extracting embedded images from PDF...")
284
+ # extract_images_from_pdf(pdf_path, json_path)
285
+
286
+ # count += 1
287
+
288
+ # cap.release()
289
+ # cv2.destroyAllWindows()
290
+
291
+
292
+
293
+ '''
294
+ #==================================================================================#
295
+ # Extract Images from PDF #
296
+ #==================================================================================#
297
+ from unstructured.partition.pdf import partition_pdf
298
+ import pytesseract
299
+ pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
300
+
301
+ elements = partition_pdf(
302
+ filename=r"E:\Pratham\2025\Harsh Sir\Scratch Vision\images\page1.pdf",
303
+ poppler_path=r"C:\poppler-23.11.0\Library\bin",
304
+ strategy="hi_res",
305
+ extract_image_block_types=["Image"], # or ["Image", "Table"]
306
+ extract_image_block_to_payload=True, # Set to True to get base64 in output
307
+ )
308
+
309
+ import json, base64, io, os
310
+ from PIL import Image
311
+
312
+ # Save JSON output
313
+ os.makedirs("output", exist_ok=True)
314
+ with open("output/embedded-images-tables.json", "w") as f:
315
+ json.dump([element.to_dict() for element in elements], f, indent=4)
316
+
317
+
318
+ def get_image_block_types(input_json_file_path: str):
319
+ with open(input_json_file_path, 'r') as file:
320
+ file_elements = json.load(file)
321
+
322
+ for element in file_elements:
323
+ if "image_base64" in element["metadata"]:
324
+ image_data = base64.b64decode(element["metadata"]["image_base64"])
325
+ image = Image.open(io.BytesIO(image_data))
326
+ image.show()
327
+
328
+ # Example usage:
329
+ get_image_block_types("output/embedded-images-tables.json")'''
330
+
331
+ # from unstructured_client import UnstructuredClient
332
+ # from unstructured_client.models import operations, shared
333
+ # from unstructured.staging.base import elements_from_dicts, elements_to_json
334
+
335
+ # import os
336
+ # import base64
337
+ # from PIL import Image
338
+ # import io
339
+
340
+ # if __name__ == "__main__":
341
+ # client = UnstructuredClient(
342
+ # api_key_auth=os.getenv("UNSTRUCTURED_API_KEY")
343
+ # )
344
+
345
+ # # Path to your PDF file
346
+ # local_input_filepath = "your-pdf-file.pdf"
347
+ # local_output_filepath = "output.json"
348
+
349
+ # with open(local_input_filepath, "rb") as f:
350
+ # files = shared.Files(
351
+ # content=f.read(),
352
+ # file_name=local_input_filepath
353
+ # )
354
+
355
+ # request = operations.PartitionRequest(
356
+ # shared.PartitionParameters(
357
+ # files=files,
358
+ # split_pdf_page=True,
359
+ # split_pdf_allow_failed=True,
360
+ # split_pdf_concurrency_level=15,
361
+ # # Extract Base64-encoded images and tables
362
+ # extract_image_block_types=["Image", "Table"]
363
+ # )
364
+ # )
365
+
366
+ # try:
367
+ # result = client.general.partition(request=request)
368
+
369
+ # for element in result.elements:
370
+ # if "image_base64" in element["metadata"]:
371
+ # # Decode and display the image
372
+ # image_data = base64.b64decode(element["metadata"]["image_base64"])
373
+ # image = Image.open(io.BytesIO(image_data))
374
+ # image.show() # This will open the image
375
+
376
+ # # Save results as JSON
377
+ # dict_elements = elements_from_dicts(element_dicts=result.elements)
378
+ # elements_to_json(
379
+ # elements=dict_elements,
380
+ # indent=2,
381
+ # filename=local_output_filepath
382
+ # )
383
+ # except Exception as e:
384
+ # print(e)
385
+
386
+ # -------------------------------------------------------------------------------------- #
387
+
388
+ # # STEP 1
389
+ # # import libraries
390
+ # import fitz # PyMuPDF
391
+ # import io
392
+ # from PIL import Image
393
+
394
+ # # STEP 2
395
+ # # file path you want to extract images from
396
+ # file = r"E:\Pratham\2025\Harsh Sir\Scratch Vision\images/page1_orig.pdf"
397
+
398
+ # # open the file
399
+ # pdf_file = fitz.open(file)
400
+
401
+ # # STEP 3
402
+ # # iterate over PDF pages
403
+ # for page_index in range(len(pdf_file)):
404
+
405
+ # # get the page itself
406
+ # page = pdf_file.load_page(page_index) # load the page
407
+ # image_list = page.get_images(full=True) # get images on the page
408
+
409
+ # # printing number of images found in this page
410
+ # if image_list:
411
+ # print(f"[+] Found a total of {len(image_list)} images on page {page_index}")
412
+ # else:
413
+ # print("[!] No images found on page", page_index)
414
+
415
+ # for image_index, img in enumerate(image_list, start=1):
416
+ # # get the XREF of the image
417
+ # xref = img[0]
418
+
419
+ # # extract the image bytes
420
+ # base_image = pdf_file.extract_image(xref)
421
+ # image_bytes = base_image["image"]
422
+
423
+ # # get the image extension
424
+ # image_ext = base_image["ext"]
425
+
426
+ # # save the image
427
+ # image_name = f"image{page_index+1}_{image_index}.{image_ext}"
428
+ # with open(image_name, "wb") as image_file:
429
+ # image_file.write(image_bytes)
430
+ # print(f"[+] Image saved as {image_name}")
431
+
432
+ # -------------------------------------------------------------------------------------- #
433
+
434
+ # from pdf2image import convert_from_path
435
+ # import numpy as np
436
+ # import cv2
437
+
438
+ # def extract_grid_cells_from_pdf(pdf_path, prefix="sub"):
439
+ # # Convert PDF's first page to image
440
+ # pages = convert_from_path(
441
+ # pdf_path,
442
+ # dpi=300,
443
+ # poppler_path=r"C:\poppler-23.11.0\Library\bin"
444
+ # )
445
+ # pil = pages[0]
446
+ # img = np.array(pil)[:, :, ::-1] # RGB→BGR
447
+
448
+ # gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
449
+ # _, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV)
450
+ # kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
451
+ # dil = cv2.dilate(thresh, kernel, iterations=2)
452
+
453
+ # cnts, _ = cv2.findContours(dil, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
454
+ # cells = [cv2.boundingRect(c) for c in cnts if cv2.contourArea(c) > 1000]
455
+ # cells = sorted(cells, key=lambda r: (r[1], r[0]))
456
+
457
+ # for i, (x, y, w, h) in enumerate(cells):
458
+ # crop = img[y:y+h, x:x+w]
459
+ # cv2.imwrite(f"{prefix}_{i:02d}.png", crop)
460
+ # print("Saved", f"{prefix}_{i:02d}.png")
461
+
462
+ # if __name__ == "__main__":
463
+ # extract_grid_cells_from_pdf(
464
+ # r"E:\Pratham\2025\Harsh Sir\Scratch Vision\images\page1_orig.pdf"
465
+ # )
466
+
467
+
468
+ # import cv2
469
+ # import layoutparser as lp
470
+ # from pdf2image import convert_from_path
471
+ # from reportlab.pdfgen import canvas
472
+ # from reportlab.lib.pagesizes import letter
473
+ # import numpy as np
474
+ # import tempfile
475
+ # import os
476
+
477
+ # # 1️⃣ Setup LayoutParser model
478
+ # model = lp.Detectron2LayoutModel(
479
+ # "lp://PrimaLayout/PrimaLayout/mask_rcnn_R_50_FPN_3x/config",
480
+ # label_map={0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"}
481
+ # )
482
+
483
+ # # 2️⃣ Utility to crop and save a layout region
484
+ # def crop_and_save(img, block, out_dir, idx):
485
+ # x1, y1, x2, y2 = map(int, block.block.x_1_y_2_x_2_y_2)
486
+ # cropped = img[y1:y2, x1:x2]
487
+ # path = os.path.join(out_dir, f"crop_{idx}.png")
488
+ # cv2.imwrite(path, cropped)
489
+ # return path
490
+
491
+ # # 3️⃣ Convert cropped images into multi-page PDF
492
+ # def imgs_to_pdf(img_paths, output_pdf):
493
+ # c = canvas.Canvas(output_pdf, pagesize=letter)
494
+ # w, h = letter
495
+ # for img in img_paths:
496
+ # c.drawImage(img, 0, 0, width=w, height=h)
497
+ # c.showPage()
498
+ # c.save()
499
+
500
+ # # 4️⃣ If user input is a PDF or image folder
501
+ # def process_document(pdf_path, output_pdf):
502
+ # imgs = convert_from_path(pdf_path)
503
+ # cropped_paths = []
504
+ # with tempfile.TemporaryDirectory() as tmp:
505
+ # for page_idx, pil_im in enumerate(imgs):
506
+ # img = cv2.cvtColor(np.array(pil_im), cv2.COLOR_RGB2BGR)
507
+ # layout = model.detect(img)
508
+ # for idx, block in enumerate(layout):
509
+ # path = crop_and_save(img, block, tmp, f"{page_idx}_{idx}")
510
+ # cropped_paths.append(path)
511
+ # imgs_to_pdf(cropped_paths, output_pdf)
512
+
513
+ # # 5️⃣ Real-time camera/video feed
514
+ # def process_video(output_pdf, src=0, frame_limit=100):
515
+ # cap = cv2.VideoCapture(src)
516
+ # idx = 0
517
+ # cropped_paths = []
518
+ # with tempfile.TemporaryDirectory() as tmp:
519
+ # while idx < frame_limit:
520
+ # ret, img = cap.read()
521
+ # if not ret:
522
+ # break
523
+ # layout = model.detect(img)
524
+ # for i, block in enumerate(layout):
525
+ # path = crop_and_save(img, block, tmp, f"{idx}_{i}")
526
+ # cropped_paths.append(path)
527
+ # idx += 1
528
+ # cap.release()
529
+ # imgs_to_pdf(cropped_paths, output_pdf)
530
+
531
+ # if __name__ == "__main__":
532
+ # import argparse
533
+
534
+ # ap = argparse.ArgumentParser()
535
+ # ap.add_argument("--input", required=True,
536
+ # help="path to PDF or 'cam' for camera")
537
+ # ap.add_argument("--output", required=True, help="output PDF path")
538
+ # ap.add_argument("--frames", type=int, default=50,
539
+ # help="frames to scan if using camera")
540
+ # args = ap.parse_args()
541
+
542
+ # if args.input.lower().endswith(".pdf"):
543
+ # process_document(args.input, args.output)
544
+ # elif args.input.lower() == "cam":
545
+ # process_video(args.output, src=0, frame_limit=args.frames)
546
+ # else:
547
+ # print("Unsupported input. Use PDF path or 'cam'.")
548
+
549
+ # import cv2
550
+ # from PIL import Image
551
+ # import numpy as np
552
+
553
+ # def get_contours(frame):
554
+ # gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
555
+ # # Threshold to binary
556
+ # _, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
557
+ # # Find contours
558
+ # contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
559
+ # return contours
560
+
561
+ # def extract_regions(frame, contours):
562
+ # rois = []
563
+ # for cnt in contours:
564
+ # x, y, w, h = cv2.boundingRect(cnt)
565
+ # if w*h < 1000: # skip small noise
566
+ # continue
567
+ # roi = frame[y:y+h, x:x+w]
568
+ # rois.append(roi)
569
+ # return rois
570
+
571
+ # def save_rois_as_pdf(rois, output_path):
572
+ # pil_imgs = []
573
+ # for roi in rois:
574
+ # rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
575
+ # pil = Image.fromarray(rgb)
576
+ # pil_imgs.append(pil)
577
+ # if pil_imgs:
578
+ # pil_imgs[0].save(output_path, save_all=True, append_images=pil_imgs[1:])
579
+ # print(f"Saved {len(pil_imgs)} regions to {output_path}")
580
+
581
+ # def main():
582
+ # cap = cv2.VideoCapture(0)
583
+ # all_rois = []
584
+ # print("Press 'c' to capture and extract; 'q' to quit.")
585
+
586
+ # while True:
587
+ # ret, frame = cap.read()
588
+ # if not ret:
589
+ # break
590
+ # cv2.imshow("Live Feed", frame)
591
+
592
+ # key = cv2.waitKey(1) & 0xFF
593
+ # if key == ord('c'):
594
+ # contours = get_contours(frame)
595
+ # rois = extract_regions(frame, contours)
596
+ # all_rois.extend(rois)
597
+ # print(f"Captured {len(rois)} regions.")
598
+ # elif key == ord('q'):
599
+ # break
600
+
601
+ # cap.release()
602
+ # cv2.destroyAllWindows()
603
+
604
+ # if all_rois:
605
+ # save_rois_as_pdf(all_rois, "output_contours.pdf")
606
+ # else:
607
+ # print("No regions captured.")
608
+
609
+ # if __name__ == "__main__":
610
+ # main()
611
+
612
+ # import cv2
613
+ # from PIL import Image
614
+ # import numpy as np
615
+
616
+ # def get_edge_contours(frame, low=50, high=150):
617
+ # gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
618
+ # blurred = cv2.GaussianBlur(gray, (5, 5), 1.0)
619
+ # edges = cv2.Canny(blurred, low, high)
620
+ # contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
621
+ # return contours, edges
622
+
623
+ # def extract_edge_rois(frame, contours, min_area=1000):
624
+ # rois = []
625
+ # for cnt in contours:
626
+ # x, y, w, h = cv2.boundingRect(cnt)
627
+ # if w * h < min_area:
628
+ # continue
629
+ # roi = frame[y:y+h, x:x+w]
630
+ # rois.append(roi)
631
+ # return rois
632
+
633
+ # def save_rois_as_pdf(rois, output_path):
634
+ # pil_imgs = []
635
+ # for roi in rois:
636
+ # rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
637
+ # pil_imgs.append(Image.fromarray(rgb))
638
+ # if pil_imgs:
639
+ # pil_imgs[0].save(output_path, save_all=True, append_images=pil_imgs[1:])
640
+ # print(f"✅ Saved {len(pil_imgs)} edge-region(s) to {output_path}")
641
+ # else:
642
+ # print("⚠️ No edge-based regions detected—PDF not created.")
643
+
644
+ # def main():
645
+ # cap = cv2.VideoCapture(0)
646
+ # all_rois = []
647
+ # print("Press ‘c’ to capture current edge regions, ‘q’ to quit.")
648
+
649
+ # while True:
650
+ # ret, frame = cap.read()
651
+ # if not ret:
652
+ # break
653
+
654
+ # contours, edges = get_edge_contours(frame)
655
+ # cv2.imshow("Live Feed", frame)
656
+ # cv2.imshow("Edges", edges)
657
+
658
+ # key = cv2.waitKey(1) & 0xFF
659
+ # if key == ord('c'):
660
+ # rois = extract_edge_rois(frame, contours)
661
+ # all_rois.extend(rois)
662
+ # print(f"🔄 Captured {len(rois)} edge-region(s). Total: {len(all_rois)}")
663
+ # elif key == ord('q'):
664
+ # break
665
+
666
+ # cap.release()
667
+ # cv2.destroyAllWindows()
668
+
669
+ # if all_rois:
670
+ # save_rois_as_pdf(all_rois, "edge_contours.pdf")
671
+ # else:
672
+ # print("❌ No regions captured.")
673
+
674
+ # if __name__ == "__main__":
675
+ # main()
676
+
677
+
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ flask
2
+ opencv-python
3
+ pillow
4
+ imutils
5
+ unstructured
6
+ pytesseract
templates/index.html ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8">
6
+ <title>Real-Time Document Scanner</title>
7
+ </head>
8
+
9
+ <body style="font-family:sans-serif;text-align:center">
10
+ <h1>📄 Real-Time Document Scanner</h1>
11
+
12
+ {% with messages = get_flashed_messages(with_categories=true) %}
13
+ {% for category, message in messages %}
14
+ {% if category == 'error' %}
15
+ <p style="color: red;">{{ message }}</p>
16
+ {% else %}
17
+ <p style="color: green;">{{ message }}</p>
18
+ {% endif %}
19
+ {% endfor %}
20
+ {% endwith %}
21
+
22
+ <form action="/scan">
23
+ <button style="padding:15px 30px;font-size:18px">📷 Scan Document</button>
24
+ </form>
25
+ </body>
26
+
27
+ </html>