Spaces:
Sleeping
Sleeping
File size: 5,849 Bytes
d25f03f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
from flask import Flask, render_template, Response, flash, redirect, url_for
import cv2
from unstructured.partition.pdf import partition_pdf
import json, base64, io, os
from PIL import Image
from imutils.perspective import four_point_transform
from dotenv import load_dotenv
import pytesseract
load_dotenv()
app = Flask(__name__)
app.secret_key = os.getenv("SECRET_KEY")
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
poppler_path=r"C:\poppler-23.11.0\Library\bin"
count = 0
OUTPUT_FOLDER = "OUTPUTS"
IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_IMAGE")
DETECTED_IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER,"DETECTED_IMAGE")
PDF_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_PDF")
JSON_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "EXTRACTED_JSON")
for path in [OUTPUT_FOLDER, IMAGE_FOLDER_PATH, DETECTED_IMAGE_FOLDER_PATH, PDF_FOLDER_PATH, JSON_FOLDER_PATH]:
os.makedirs(path, exist_ok=True)
# camera = cv2.VideoCapture('rtsp://freja.hiof.no:1935/rtplive/_definst_/hessdalen03.stream') # use 0 for web camera
# for cctv camera use rtsp://username:password@ip_address:554/user=username_password='password'_channel=channel_number_stream=0.sdp' instead of camera
# for local webcam use
camera= cv2.VideoCapture(0)
# Increase resolution if supported by the webcam
camera.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
camera.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
camera.set(cv2.CAP_PROP_FPS, 30)
# --- FUNCTION: Detect document contour ---
def detect_document_contour(image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5, 5), 0)
_, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
contours, _ = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key=cv2.contourArea, reverse=True)
for contour in contours:
area = cv2.contourArea(contour)
if area > 1000:
peri = cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, 0.02 * peri, True)
if len(approx) == 4:
return approx
return None
# --- FUNCTION: Extract images from saved PDF ---
def extract_images_from_pdf(pdf_path, output_json_path):
elements = partition_pdf(
filename=pdf_path,
strategy="hi_res",
extract_image_block_types=["Image"], # or ["Image", "Table"]
extract_image_block_to_payload=True, # Set to True to get base64 in output
)
with open(output_json_path, "w") as f:
json.dump([element.to_dict() for element in elements], f, indent=4)
# Display extracted images
with open(output_json_path, 'r') as file:
file_elements = json.load(file)
extracted_images_dir = os.path.join(os.path.dirname(output_json_path), "extracted_images")
os.makedirs(extracted_images_dir, exist_ok=True)
for i, element in enumerate(file_elements):
if "image_base64" in element["metadata"]:
image_data = base64.b64decode(element["metadata"]["image_base64"])
image = Image.open(io.BytesIO(image_data))
image.show(title=f"Extracted Image {i+1}")
# image.save(DETECTED_IMAGE_FOLDER_PATH, f"Extracted Image {i+1}.png")
display = None
scale = 0.5
contour = None
def gen_frames(): # generate frame by frame from camera
global display
while True:
# Capture frame-by-frame
success, frame = camera.read() # read the camera frame
if not success:
break
else:
display = frame.copy()
contour = detect_document_contour(display)
if contour is not None:
cv2.drawContours(display, [contour], -1, (0, 255, 0), 3)
resized = cv2.resize(display, (int(scale * display.shape[1]), int(scale * display.shape[0])))
cv2.imshow("📷 Scan Document - Press 's' to Save, ESC to Exit", resized)
ret, buffer = cv2.imencode('.jpg', resized)
frame = buffer.tobytes()
yield (b'--frame\r\n'
b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n') # concat frame one by one and show result
# --- Route: Scan Document ---
@app.route("/capture", methods=['POST'])
def capture_document():
global count, display
if display is None:
flash("❌ No frame captured!", "error")
return redirect(url_for("index"))
frame = display.copy()
contour = detect_document_contour(frame)
if contour is None:
flash("❌ No document contour found!", "error")
return redirect(url_for("index"))
warped = four_point_transform(frame, contour.reshape(4, 2))
image_path = os.path.join(IMAGE_FOLDER_PATH, f"scanned_colored_{count}.jpg")
pdf_path = os.path.join(PDF_FOLDER_PATH, f"scanned_colored_{count}.pdf")
json_path = os.path.join(JSON_FOLDER_PATH, f"scanned_{count}.json")
# json_path = os.path.join(DETECTED_IMAGE_FOLDER_PATH, f"scanned_{count}.json")
cv2.imwrite(image_path, warped)
img = Image.open(image_path).convert("RGB")
img.save(pdf_path)
extract_images_from_pdf(pdf_path, json_path)
flash("✅ Document scanned and saved!", "success")
count += 1
return redirect(url_for("index"))
@app.route('/video_feed')
def video_feed():
#Video streaming route. Put this in the src attribute of an img tag
return Response(gen_frames(), mimetype='multipart/x-mixed-replace; boundary=frame')
@app.route('/')
def index():
"""Video streaming home page."""
return render_template('live_streaming_index.html')
if __name__ == '__main__':
app.run(debug=True) |