Scratch_Vision_Game_dup

Sleeping

File size: 5,849 Bytes

d25f03f

from flask import Flask, render_template, Response, flash, redirect, url_for
import cv2
from unstructured.partition.pdf import partition_pdf
import json, base64, io, os
from PIL import Image
from imutils.perspective import four_point_transform
from dotenv import load_dotenv
import pytesseract

load_dotenv()

app = Flask(__name__)
app.secret_key = os.getenv("SECRET_KEY")
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
poppler_path=r"C:\poppler-23.11.0\Library\bin"

count = 0
OUTPUT_FOLDER = "OUTPUTS"
IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_IMAGE")
DETECTED_IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER,"DETECTED_IMAGE")
PDF_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_PDF")
JSON_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "EXTRACTED_JSON")

for path in [OUTPUT_FOLDER, IMAGE_FOLDER_PATH, DETECTED_IMAGE_FOLDER_PATH, PDF_FOLDER_PATH, JSON_FOLDER_PATH]:
    os.makedirs(path, exist_ok=True)

# camera = cv2.VideoCapture('rtsp://freja.hiof.no:1935/rtplive/_definst_/hessdalen03.stream')  # use 0 for web camera
#  for cctv camera use rtsp://username:password@ip_address:554/user=username_password='password'_channel=channel_number_stream=0.sdp' instead of camera
# for local webcam use 
camera= cv2.VideoCapture(0)

# Increase resolution if supported by the webcam
camera.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
camera.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
camera.set(cv2.CAP_PROP_FPS, 30)

# --- FUNCTION: Detect document contour ---
def detect_document_contour(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (5, 5), 0)
    _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    contours, _ = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key=cv2.contourArea, reverse=True)

    for contour in contours:
        area = cv2.contourArea(contour)
        if area > 1000:
            peri = cv2.arcLength(contour, True)
            approx = cv2.approxPolyDP(contour, 0.02 * peri, True)
            if len(approx) == 4:
                return approx
    return None

# --- FUNCTION: Extract images from saved PDF ---
def extract_images_from_pdf(pdf_path, output_json_path):
    elements = partition_pdf(
        filename=pdf_path,
        strategy="hi_res",
        extract_image_block_types=["Image"],  # or ["Image", "Table"]
        extract_image_block_to_payload=True,  # Set to True to get base64 in output
    )
    with open(output_json_path, "w") as f:
        json.dump([element.to_dict() for element in elements], f, indent=4)

    # Display extracted images
    with open(output_json_path, 'r') as file:
        file_elements = json.load(file)
        
    extracted_images_dir = os.path.join(os.path.dirname(output_json_path), "extracted_images")
    os.makedirs(extracted_images_dir, exist_ok=True)
    
    for i, element in enumerate(file_elements):
        if "image_base64" in element["metadata"]:
            image_data = base64.b64decode(element["metadata"]["image_base64"])
            image = Image.open(io.BytesIO(image_data))
            image.show(title=f"Extracted Image {i+1}")
            # image.save(DETECTED_IMAGE_FOLDER_PATH, f"Extracted Image {i+1}.png")

display = None 
scale = 0.5
contour = None

def gen_frames():  # generate frame by frame from camera
    global display 
    
    while True:
        # Capture frame-by-frame
        success, frame = camera.read()  # read the camera frame
        if not success:
            break
        else:
            display = frame.copy()
            contour = detect_document_contour(display)

            if contour is not None:
                cv2.drawContours(display, [contour], -1, (0, 255, 0), 3)

            resized = cv2.resize(display, (int(scale * display.shape[1]), int(scale * display.shape[0])))
            cv2.imshow("📷 Scan Document - Press 's' to Save, ESC to Exit", resized)

            ret, buffer = cv2.imencode('.jpg', resized)
            
            frame = buffer.tobytes()
            yield (b'--frame\r\n'
                   b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')  # concat frame one by one and show result

# --- Route: Scan Document ---
@app.route("/capture", methods=['POST'])
def capture_document():
    global count, display 
    
    if display is None:
        flash("❌ No frame captured!", "error")
        return redirect(url_for("index"))
    
    frame = display.copy()
    contour = detect_document_contour(frame)
    
    if contour is None:
        flash("❌ No document contour found!", "error")
        return redirect(url_for("index"))
    
    warped = four_point_transform(frame, contour.reshape(4, 2))
    image_path = os.path.join(IMAGE_FOLDER_PATH, f"scanned_colored_{count}.jpg")
    pdf_path = os.path.join(PDF_FOLDER_PATH, f"scanned_colored_{count}.pdf")
    json_path = os.path.join(JSON_FOLDER_PATH, f"scanned_{count}.json")
    # json_path = os.path.join(DETECTED_IMAGE_FOLDER_PATH, f"scanned_{count}.json")

    cv2.imwrite(image_path, warped)
    img = Image.open(image_path).convert("RGB")
    img.save(pdf_path)
    
    extract_images_from_pdf(pdf_path, json_path)

    flash("✅ Document scanned and saved!", "success")
    count += 1
    return redirect(url_for("index"))

@app.route('/video_feed')
def video_feed():
    #Video streaming route. Put this in the src attribute of an img tag
    return Response(gen_frames(), mimetype='multipart/x-mixed-replace; boundary=frame')

@app.route('/')
def index():
    """Video streaming home page."""
    return render_template('live_streaming_index.html')

if __name__ == '__main__':
    app.run(debug=True)