Scratch_Vision_Game_dup

Sleeping

App Files Files Community

prthm11 commited on Jul 16

Commit

1eb2852

verified ·

1 Parent(s): 3585865

Upload live_streaming_flask.py

Browse files

Files changed (1) hide show

live_streaming_flask.py +259 -151

live_streaming_flask.py CHANGED Viewed

@@ -1,151 +1,259 @@
-from flask import Flask, render_template, Response, flash, redirect, url_for
-import cv2
-from unstructured.partition.pdf import partition_pdf
-import json, base64, io, os
-from PIL import Image
-from imutils.perspective import four_point_transform
-from dotenv import load_dotenv
-import pytesseract
-load_dotenv()
-app = Flask(__name__)
-app.secret_key = os.getenv("SECRET_KEY")
-pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
-poppler_path=r"C:\poppler-23.11.0\Library\bin"
-count = 0
-OUTPUT_FOLDER = "OUTPUTS"
-IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_IMAGE")
-DETECTED_IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER,"DETECTED_IMAGE")
-PDF_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_PDF")
-JSON_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "EXTRACTED_JSON")
-for path in [OUTPUT_FOLDER, IMAGE_FOLDER_PATH, DETECTED_IMAGE_FOLDER_PATH, PDF_FOLDER_PATH, JSON_FOLDER_PATH]:
-    os.makedirs(path, exist_ok=True)
-camera = cv2.VideoCapture('rtsp://freja.hiof.no:1935/rtplive/_definst_/hessdalen03.stream')  # use 0 for web camera
-#  for cctv camera use rtsp://username:password@ip_address:554/user=username_password='password'_channel=channel_number_stream=0.sdp' instead of camera
-# for local webcam use
-# camera= cv2.VideoCapture(0)
-# Increase resolution if supported by the webcam
-camera.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
-camera.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
-camera.set(cv2.CAP_PROP_FPS, 30)
-# --- FUNCTION: Detect document contour ---
-def detect_document_contour(image):
-    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    blur = cv2.GaussianBlur(gray, (5, 5), 0)
-    _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
-    contours, _ = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
-    contours = sorted(contours, key=cv2.contourArea, reverse=True)
-    for contour in contours:
-        area = cv2.contourArea(contour)
-        if area > 1000:
-            peri = cv2.arcLength(contour, True)
-            approx = cv2.approxPolyDP(contour, 0.02 * peri, True)
-            if len(approx) == 4:
-                return approx
-    return None
-# --- FUNCTION: Extract images from saved PDF ---
-def extract_images_from_pdf(pdf_path, output_json_path):
-    elements = partition_pdf(
-        filename=pdf_path,
-        strategy="hi_res",
-        extract_image_block_types=["Image"],  # or ["Image", "Table"]
-        extract_image_block_to_payload=True,  # Set to True to get base64 in output
-    )
-    with open(output_json_path, "w") as f:
-        json.dump([element.to_dict() for element in elements], f, indent=4)
-    # Display extracted images
-    with open(output_json_path, 'r') as file:
-        file_elements = json.load(file)
-    extracted_images_dir = os.path.join(os.path.dirname(output_json_path), "extracted_images")
-    os.makedirs(extracted_images_dir, exist_ok=True)
-    for i, element in enumerate(file_elements):
-        if "image_base64" in element["metadata"]:
-            image_data = base64.b64decode(element["metadata"]["image_base64"])
-            image = Image.open(io.BytesIO(image_data))
-            image.show(title=f"Extracted Image {i+1}")
-            # image.save(DETECTED_IMAGE_FOLDER_PATH, f"Extracted Image {i+1}.png")
-display = None
-scale = 0.5
-contour = None
-def gen_frames():  # generate frame by frame from camera
-    global display
-    while True:
-        # Capture frame-by-frame
-        success, frame = camera.read()  # read the camera frame
-        if not success:
-            break
-        else:
-            display = frame.copy()
-            contour = detect_document_contour(display)
-            if contour is not None:
-                cv2.drawContours(display, [contour], -1, (0, 255, 0), 3)
-            resized = cv2.resize(display, (int(scale * display.shape[1]), int(scale * display.shape[0])))
-            cv2.imshow("📷 Scan Document - Press 's' to Save, ESC to Exit", resized)
-            ret, buffer = cv2.imencode('.jpg', resized)
-            frame = buffer.tobytes()
-            yield (b'--frame\r\n'
-                   b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')  # concat frame one by one and show result
-# --- Route: Scan Document ---
-@app.route("/capture", methods=['POST'])
-def capture_document():
-    global count, display
-    if display is None:
-        flash("❌ No frame captured!", "error")
-        return redirect(url_for("index"))
-    frame = display.copy()
-    contour = detect_document_contour(frame)
-    if contour is None:
-        flash("❌ No document contour found!", "error")
-        return redirect(url_for("index"))
-    warped = four_point_transform(frame, contour.reshape(4, 2))
-    image_path = os.path.join(IMAGE_FOLDER_PATH, f"scanned_colored_{count}.jpg")
-    pdf_path = os.path.join(PDF_FOLDER_PATH, f"scanned_colored_{count}.pdf")
-    json_path = os.path.join(JSON_FOLDER_PATH, f"scanned_{count}.json")
-    # json_path = os.path.join(DETECTED_IMAGE_FOLDER_PATH, f"scanned_{count}.json")
-    cv2.imwrite(image_path, warped)
-    img = Image.open(image_path).convert("RGB")
-    img.save(pdf_path)
-    extract_images_from_pdf(pdf_path, json_path)
-    flash("✅ Document scanned and saved!", "success")
-    count += 1
-    return redirect(url_for("index"))
-@app.route('/video_feed')
-def video_feed():
-    #Video streaming route. Put this in the src attribute of an img tag
-    return Response(gen_frames(), mimetype='multipart/x-mixed-replace; boundary=frame')
-@app.route('/')
-def index():
-    """Video streaming home page."""
-    return render_template('live_streaming_index.html')
-if __name__ == '__main__':
-    app.run(host="0.0.0.0", port=7860, debug=False)

+from flask import Flask, render_template, Response, flash, redirect, url_for
+import cv2
+import numpy as np
+from unstructured.partition.pdf import partition_pdf
+import json, base64, io, os
+from PIL import Image, ImageEnhance, ImageDraw
+from imutils.perspective import four_point_transform
+from dotenv import load_dotenv
+import pytesseract
+from transformers import BlipProcessor, BlipForConditionalGeneration
+load_dotenv()
+app = Flask(__name__)
+app.secret_key = os.getenv("SECRET_KEY")
+pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
+poppler_path=r"C:\poppler-23.11.0\Library\bin"
+count = 0
+OUTPUT_FOLDER = "OUTPUTS"
+IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_IMAGE")
+DETECTED_IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER,"DETECTED_IMAGE")
+PDF_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_PDF")
+JSON_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "EXTRACTED_JSON")
+for path in [OUTPUT_FOLDER, IMAGE_FOLDER_PATH, DETECTED_IMAGE_FOLDER_PATH, PDF_FOLDER_PATH, JSON_FOLDER_PATH]:
+    os.makedirs(path, exist_ok=True)
+# camera = cv2.VideoCapture('rtsp://freja.hiof.no:1935/rtplive/_definst_/hessdalen03.stream')  # use 0 for web camera
+#  for cctv camera use rtsp://username:password@ip_address:554/user=username_password='password'_channel=channel_number_stream=0.sdp' instead of camera
+# for local webcam use
+camera= cv2.VideoCapture(0)
+# Increase resolution if supported by the webcam
+camera.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
+camera.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
+camera.set(cv2.CAP_PROP_FPS, 30)
+camera.set(cv2.CAP_PROP_AUTOFOCUS, 1)  # Enable autofocus
+# --- FUNCTION: Detect document contour ---
+def detect_document_contour(image):
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    blur = cv2.GaussianBlur(gray, (5, 5), 0)
+    _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+    contours, _ = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
+    contours = sorted(contours, key=cv2.contourArea, reverse=True)
+    for contour in contours:
+        area = cv2.contourArea(contour)
+        if area > 1000:
+            peri = cv2.arcLength(contour, True)
+            approx = cv2.approxPolyDP(contour, 0.02 * peri, True)
+            if len(approx) == 4:
+                return approx
+    return None
+def load_image(image_path):
+    ext = os.path.splitext(image_path)[1].lower()
+    if ext in ['.png', '.jpg', '.jpeg', '.webp', '.tiff']:
+        image = cv2.imread(image_path)
+        cv2.imshow("Original Image",image)
+        print(f"Image : {image}")
+        if image is None:
+            raise ValueError(f"Failed to load image from {image_path}. The file may be corrupted or unreadable.")
+        return image
+    else:
+        raise ValueError(f"Unsupported image format: {ext}")
+# Function for upscaling image using OpenCV's INTER_CUBIC
+def upscale_image(image, scale=2):
+    height, width = image.shape[:2]
+    upscaled_image = cv2.resize(image, (width * scale, height * scale), interpolation=cv2.INTER_CUBIC)
+    print(f"UPSCALE IMAGE : {upscaled_image}")
+    return upscaled_image
+# Function to denoise the image (reduce noise)
+def reduce_noise(image):
+    return cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
+# Function to sharpen the image
+def sharpen_image(image):
+    kernel = np.array([[0, -1, 0],
+                       [-1, 5, -1],
+                       [0, -1, 0]])
+    sharpened_image = cv2.filter2D(image, -1, kernel)
+    return sharpened_image
+# Function to increase contrast and enhance details without changing color
+def enhance_image(image):
+    pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+    enhancer = ImageEnhance.Contrast(pil_img)
+    enhanced_image = enhancer.enhance(1.5)
+    enhanced_image_bgr = cv2.cvtColor(np.array(enhanced_image), cv2.COLOR_RGB2BGR)
+    return enhanced_image_bgr
+# Complete function to process image
+def process_image(image_path, scale=2):
+    # Load the image
+    image = load_image(image_path)
+    # Upscale the image
+    upscaled_image = upscale_image(image, scale)
+    # Reduce noise
+    denoised_image = reduce_noise(upscaled_image)
+    # Sharpen the image
+    sharpened_image = sharpen_image(denoised_image)
+    # Enhance the image contrast and details without changing color
+    final_image = enhance_image(sharpened_image)
+    print(f"FINAL IMAGE : {final_image}")
+    cv2.imshow("Final Image",final_image)
+    return final_image
+# BLIP : Bootstrapped Language-Image Pretraining
+""" BlipProcessor: converts Image into tensor format"""
+blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+# print(f"BLIP Processor: {blip_processor}")
+""" BlipForConditionalGeneration: Generates the Image Caption(text)"""
+blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to("cpu")
+print(f"BLIP Model: {blip_model}")
+def get_blip_description(image: Image.Image) -> str:
+    inputs = blip_processor(image, return_tensors="pt").to("cpu")
+    output = blip_model.generate(**inputs, max_new_tokens=100)
+    caption = blip_processor.decode(output[0], skip_special_tokens=True)
+    return caption
+# --- FUNCTION: Extract images from saved PDF ---
+def extract_images_from_pdf(pdf_path, output_json_path):
+    elements = partition_pdf(
+        filename=pdf_path,
+        strategy="hi_res",
+        extract_image_block_types=["Image"],  # or ["Image", "Table"]
+        extract_image_block_to_payload=True,  # Set to True to get base64 in output
+    )
+    with open(output_json_path, "w") as f:
+        json.dump([element.to_dict() for element in elements], f, indent=4)
+    # Display extracted images
+    with open(output_json_path, 'r') as file:
+        file_elements = json.load(file)
+    extracted_images_dir = os.path.join(os.path.dirname(output_json_path), "extracted_images")
+    os.makedirs(extracted_images_dir, exist_ok=True)
+     # Prepare manipulated sprite JSON structure
+    manipulated_json = {}
+    pdf_filename = os.path.basename(pdf_path)
+    pdf_dir_path = os.path.dirname(pdf_path).replace("/", "\\")  # windows-style
+    sprite_count = 1
+    for i, element in enumerate(file_elements):
+        if "image_base64" in element["metadata"]:
+            image_data = base64.b64decode(element["metadata"]["image_base64"])
+            # image = Image.open(io.BytesIO(image_data))
+            image = Image.open(io.BytesIO(image_data)).convert("RGB")
+            image.show(title=f"Extracted Image {i+1}")
+            # image.save(DETECTED_IMAGE_FOLDER_PATH, f"Extracted Image {i+1}.png")
+            description = get_blip_description(image)
+            manipulated_json[f"Sprite {sprite_count}"] = {
+                "name": pdf_filename,
+                "base64": element["metadata"]["image_base64"],
+                "file-path": pdf_dir_path,
+                "description":description
+            }
+            sprite_count += 1
+    # Save manipulated JSON
+    manipulated_json_path = output_json_path.replace(".json", "_sprites.json")
+    with open(manipulated_json_path, "w") as sprite_file:
+        json.dump(manipulated_json, sprite_file, indent=4)
+    print(f"✅ Manipulated sprite JSON saved: {manipulated_json_path}")
+display = None
+scale = 0.5
+contour = None
+def gen_frames():  # generate frame by frame from camera
+    global display
+    while True:
+        # Capture frame-by-frame
+        success, frame = camera.read()  # read the camera frame
+        if not success:
+            break
+        else:
+            display = frame.copy()
+            contour = detect_document_contour(display)
+            if contour is not None:
+                cv2.drawContours(display, [contour], -1, (0, 255, 0), 3)
+            resized = cv2.resize(display, (int(scale * display.shape[1]), int(scale * display.shape[0])))
+            cv2.imshow("📷 Scan Document - Press 's' to Save, ESC to Exit", resized)
+            ret, buffer = cv2.imencode('.jpg', resized)
+            frame = buffer.tobytes()
+            yield (b'--frame\r\n'
+                   b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')  # concat frame one by one and show result
+# --- Route: Scan Document ---
+@app.route("/capture", methods=['POST'])
+def capture_document():
+    global count, display
+    if display is None:
+        flash("❌ No frame captured!", "error")
+        return redirect(url_for("index"))
+    frame = display.copy()
+    contour = detect_document_contour(frame)
+    if contour is None:
+        flash("❌ No document contour found!", "error")
+        return redirect(url_for("index"))
+    warped = four_point_transform(frame, contour.reshape(4, 2))
+    image_path = os.path.join(IMAGE_FOLDER_PATH, f"scanned_colored_{count}.jpg")
+    pdf_path = os.path.join(PDF_FOLDER_PATH, f"scanned_colored_{count}.pdf")
+    json_path = os.path.join(JSON_FOLDER_PATH, f"scanned_{count}.json")
+    # json_path = os.path.join(DETECTED_IMAGE_FOLDER_PATH, f"scanned_{count}.json")
+    cv2.imwrite(image_path, warped)
+    # img = process_image(image_path)
+    # # img = Image.open(image_path).convert("RGB")
+    # img.save(pdf_path)
+    img = process_image(image_path)
+    pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
+    pil_img.save(pdf_path)
+    extract_images_from_pdf(pdf_path, json_path)
+    flash("✅ Document scanned and saved!", "success")
+    count += 1
+    return redirect(url_for("index"))
+@app.route('/video_feed')
+def video_feed():
+    #Video streaming route. Put this in the src attribute of an img tag
+    return Response(gen_frames(), mimetype='multipart/x-mixed-replace; boundary=frame')
+@app.route('/')
+def index():
+    """Video streaming home page."""
+    return render_template('live_streaming_index.html')
+if __name__ == '__main__':
+    app.run(debug=True)