File size: 5,849 Bytes
d25f03f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
from flask import Flask, render_template, Response, flash, redirect, url_for
import cv2
from unstructured.partition.pdf import partition_pdf
import json, base64, io, os
from PIL import Image
from imutils.perspective import four_point_transform
from dotenv import load_dotenv
import pytesseract

load_dotenv()

app = Flask(__name__)
app.secret_key = os.getenv("SECRET_KEY")
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
poppler_path=r"C:\poppler-23.11.0\Library\bin"

count = 0
OUTPUT_FOLDER = "OUTPUTS"
IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_IMAGE")
DETECTED_IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER,"DETECTED_IMAGE")
PDF_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_PDF")
JSON_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "EXTRACTED_JSON")

for path in [OUTPUT_FOLDER, IMAGE_FOLDER_PATH, DETECTED_IMAGE_FOLDER_PATH, PDF_FOLDER_PATH, JSON_FOLDER_PATH]:
    os.makedirs(path, exist_ok=True)

# camera = cv2.VideoCapture('rtsp://freja.hiof.no:1935/rtplive/_definst_/hessdalen03.stream')  # use 0 for web camera
#  for cctv camera use rtsp://username:password@ip_address:554/user=username_password='password'_channel=channel_number_stream=0.sdp' instead of camera
# for local webcam use 
camera= cv2.VideoCapture(0)

# Increase resolution if supported by the webcam
camera.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
camera.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
camera.set(cv2.CAP_PROP_FPS, 30)

# --- FUNCTION: Detect document contour ---
def detect_document_contour(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (5, 5), 0)
    _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    contours, _ = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key=cv2.contourArea, reverse=True)

    for contour in contours:
        area = cv2.contourArea(contour)
        if area > 1000:
            peri = cv2.arcLength(contour, True)
            approx = cv2.approxPolyDP(contour, 0.02 * peri, True)
            if len(approx) == 4:
                return approx
    return None

# --- FUNCTION: Extract images from saved PDF ---
def extract_images_from_pdf(pdf_path, output_json_path):
    elements = partition_pdf(
        filename=pdf_path,
        strategy="hi_res",
        extract_image_block_types=["Image"],  # or ["Image", "Table"]
        extract_image_block_to_payload=True,  # Set to True to get base64 in output
    )
    with open(output_json_path, "w") as f:
        json.dump([element.to_dict() for element in elements], f, indent=4)

    # Display extracted images
    with open(output_json_path, 'r') as file:
        file_elements = json.load(file)
        
    extracted_images_dir = os.path.join(os.path.dirname(output_json_path), "extracted_images")
    os.makedirs(extracted_images_dir, exist_ok=True)
    
    for i, element in enumerate(file_elements):
        if "image_base64" in element["metadata"]:
            image_data = base64.b64decode(element["metadata"]["image_base64"])
            image = Image.open(io.BytesIO(image_data))
            image.show(title=f"Extracted Image {i+1}")
            # image.save(DETECTED_IMAGE_FOLDER_PATH, f"Extracted Image {i+1}.png")

display = None 
scale = 0.5
contour = None

def gen_frames():  # generate frame by frame from camera
    global display 
    
    while True:
        # Capture frame-by-frame
        success, frame = camera.read()  # read the camera frame
        if not success:
            break
        else:
            display = frame.copy()
            contour = detect_document_contour(display)

            if contour is not None:
                cv2.drawContours(display, [contour], -1, (0, 255, 0), 3)

            resized = cv2.resize(display, (int(scale * display.shape[1]), int(scale * display.shape[0])))
            cv2.imshow("📷 Scan Document - Press 's' to Save, ESC to Exit", resized)

            ret, buffer = cv2.imencode('.jpg', resized)
            
            frame = buffer.tobytes()
            yield (b'--frame\r\n'
                   b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')  # concat frame one by one and show result

# --- Route: Scan Document ---
@app.route("/capture", methods=['POST'])
def capture_document():
    global count, display 
    
    if display is None:
        flash("❌ No frame captured!", "error")
        return redirect(url_for("index"))
    
    frame = display.copy()
    contour = detect_document_contour(frame)
    
    if contour is None:
        flash("❌ No document contour found!", "error")
        return redirect(url_for("index"))
    
    warped = four_point_transform(frame, contour.reshape(4, 2))
    image_path = os.path.join(IMAGE_FOLDER_PATH, f"scanned_colored_{count}.jpg")
    pdf_path = os.path.join(PDF_FOLDER_PATH, f"scanned_colored_{count}.pdf")
    json_path = os.path.join(JSON_FOLDER_PATH, f"scanned_{count}.json")
    # json_path = os.path.join(DETECTED_IMAGE_FOLDER_PATH, f"scanned_{count}.json")

    cv2.imwrite(image_path, warped)
    img = Image.open(image_path).convert("RGB")
    img.save(pdf_path)
    
    extract_images_from_pdf(pdf_path, json_path)

    flash("✅ Document scanned and saved!", "success")
    count += 1
    return redirect(url_for("index"))

@app.route('/video_feed')
def video_feed():
    #Video streaming route. Put this in the src attribute of an img tag
    return Response(gen_frames(), mimetype='multipart/x-mixed-replace; boundary=frame')

@app.route('/')
def index():
    """Video streaming home page."""
    return render_template('live_streaming_index.html')

if __name__ == '__main__':
    app.run(debug=True)