from flask import Flask, render_template, Response, flash, redirect, url_for import cv2 from unstructured.partition.pdf import partition_pdf import json, base64, io, os from PIL import Image from imutils.perspective import four_point_transform from dotenv import load_dotenv import pytesseract load_dotenv() app = Flask(__name__) app.secret_key = os.getenv("SECRET_KEY") pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" poppler_path=r"C:\poppler-23.11.0\Library\bin" count = 0 OUTPUT_FOLDER = "OUTPUTS" IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_IMAGE") DETECTED_IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER,"DETECTED_IMAGE") PDF_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_PDF") JSON_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "EXTRACTED_JSON") for path in [OUTPUT_FOLDER, IMAGE_FOLDER_PATH, DETECTED_IMAGE_FOLDER_PATH, PDF_FOLDER_PATH, JSON_FOLDER_PATH]: os.makedirs(path, exist_ok=True) # camera = cv2.VideoCapture('rtsp://freja.hiof.no:1935/rtplive/_definst_/hessdalen03.stream') # use 0 for web camera # for cctv camera use rtsp://username:password@ip_address:554/user=username_password='password'_channel=channel_number_stream=0.sdp' instead of camera # for local webcam use camera= cv2.VideoCapture(0) # Increase resolution if supported by the webcam camera.set(cv2.CAP_PROP_FRAME_WIDTH, 1280) camera.set(cv2.CAP_PROP_FRAME_HEIGHT, 720) camera.set(cv2.CAP_PROP_FPS, 30) # --- FUNCTION: Detect document contour --- def detect_document_contour(image): gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) blur = cv2.GaussianBlur(gray, (5, 5), 0) _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) contours, _ = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) contours = sorted(contours, key=cv2.contourArea, reverse=True) for contour in contours: area = cv2.contourArea(contour) if area > 1000: peri = cv2.arcLength(contour, True) approx = cv2.approxPolyDP(contour, 0.02 * peri, True) if len(approx) == 4: return approx return None # --- FUNCTION: Extract images from saved PDF --- def extract_images_from_pdf(pdf_path, output_json_path): elements = partition_pdf( filename=pdf_path, strategy="hi_res", extract_image_block_types=["Image"], # or ["Image", "Table"] extract_image_block_to_payload=True, # Set to True to get base64 in output ) with open(output_json_path, "w") as f: json.dump([element.to_dict() for element in elements], f, indent=4) # Display extracted images with open(output_json_path, 'r') as file: file_elements = json.load(file) extracted_images_dir = os.path.join(os.path.dirname(output_json_path), "extracted_images") os.makedirs(extracted_images_dir, exist_ok=True) for i, element in enumerate(file_elements): if "image_base64" in element["metadata"]: image_data = base64.b64decode(element["metadata"]["image_base64"]) image = Image.open(io.BytesIO(image_data)) image.show(title=f"Extracted Image {i+1}") # image.save(DETECTED_IMAGE_FOLDER_PATH, f"Extracted Image {i+1}.png") display = None scale = 0.5 contour = None def gen_frames(): # generate frame by frame from camera global display while True: # Capture frame-by-frame success, frame = camera.read() # read the camera frame if not success: break else: display = frame.copy() contour = detect_document_contour(display) if contour is not None: cv2.drawContours(display, [contour], -1, (0, 255, 0), 3) resized = cv2.resize(display, (int(scale * display.shape[1]), int(scale * display.shape[0]))) cv2.imshow("📷 Scan Document - Press 's' to Save, ESC to Exit", resized) ret, buffer = cv2.imencode('.jpg', resized) frame = buffer.tobytes() yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n') # concat frame one by one and show result # --- Route: Scan Document --- @app.route("/capture", methods=['POST']) def capture_document(): global count, display if display is None: flash("❌ No frame captured!", "error") return redirect(url_for("index")) frame = display.copy() contour = detect_document_contour(frame) if contour is None: flash("❌ No document contour found!", "error") return redirect(url_for("index")) warped = four_point_transform(frame, contour.reshape(4, 2)) image_path = os.path.join(IMAGE_FOLDER_PATH, f"scanned_colored_{count}.jpg") pdf_path = os.path.join(PDF_FOLDER_PATH, f"scanned_colored_{count}.pdf") json_path = os.path.join(JSON_FOLDER_PATH, f"scanned_{count}.json") # json_path = os.path.join(DETECTED_IMAGE_FOLDER_PATH, f"scanned_{count}.json") cv2.imwrite(image_path, warped) img = Image.open(image_path).convert("RGB") img.save(pdf_path) extract_images_from_pdf(pdf_path, json_path) flash("✅ Document scanned and saved!", "success") count += 1 return redirect(url_for("index")) @app.route('/video_feed') def video_feed(): #Video streaming route. Put this in the src attribute of an img tag return Response(gen_frames(), mimetype='multipart/x-mixed-replace; boundary=frame') @app.route('/') def index(): """Video streaming home page.""" return render_template('live_streaming_index.html') if __name__ == '__main__': app.run(debug=True)