Spaces:

Ash2505
/

EEE515-HW3

Sleeping

App Files Files Community

Ash2505 commited on Mar 29

Commit

e28b51d

verified ·

1 Parent(s): d3e5ae0

Update app.py

Browse files

Files changed (1) hide show

app.py +166 -154

app.py CHANGED Viewed

@@ -1,200 +1,212 @@
-import gradio as gr
-from PIL import Image, ImageFilter
-# import matplotlib.pyplot as plt
-import torch
 import cv2
 import numpy as np
 from torchvision import transforms
-from transformers import AutoModelForImageSegmentation, DepthProImageProcessorFast, DepthProForDepthEstimation
-import requests
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-birefnet = AutoModelForImageSegmentation.from_pretrained('ZhengPeng7/BiRefNet', trust_remote_code=True)
-torch.set_float32_matmul_precision(['high', 'highest'][0])
-birefnet.to(device)
-birefnet.eval()
-birefnet.half()
-def extract_object(image, t1, t2):
-    # Data settings
-    imageResized = image.resize((512, 512))
-    image_size = (1024, 1024)
-    transform_image = transforms.Compose([
-        transforms.Resize(image_size),
-        transforms.ToTensor(),
-        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
-    ])
-    # image = Image.open(imagepath)
-    image1 = image.copy()
-    input_images = transform_image(image1).unsqueeze(0).to(device).half()
-    # Prediction
     with torch.no_grad():
-        preds = birefnet(input_images)[-1].sigmoid().cpu()
     pred = preds[0].squeeze()
     pred_pil = transforms.ToPILImage()(pred)
-    mask = pred_pil.resize(image1.size)
-    image1.putalpha(mask)
-    blurredBg = cv2.GaussianBlur(np.array(imageResized), (0, 0), sigmaX=15, sigmaY=15)
-    mask = np.array(result[1].convert("L"))
-    _, maskBinary = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)
     img = cv2.cvtColor(np.array(imageResized), cv2.COLOR_RGB2BGR)
     maskInv = cv2.bitwise_not(maskBinary)
     maskInv3 = cv2.cvtColor(maskInv, cv2.COLOR_GRAY2BGR)
     foreground = cv2.bitwise_and(img, cv2.bitwise_not(maskInv3))
     background = cv2.bitwise_and(blurredBg, maskInv3)
     finalImg = cv2.add(cv2.cvtColor(foreground, cv2.COLOR_BGR2RGB), background)
-# plt.figure(figsize=(15, 5))
-    # return image1, mask
-# def depth_estimation():
-    imageProcessor = DepthProImageProcessorFast.from_pretrained("apple/DepthPro-hf")
-    model = DepthProForDepthEstimation.from_pretrained("apple/DepthPro-hf").to(device)
-    inputs = imageProcessor(images=imageResized, return_tensors="pt").to(device)
     with torch.no_grad():
-        outputs = model(**inputs)
-    post_processed_output = imageProcessor.post_process_depth_estimation(
-        outputs, target_sizes=[(imageResized.height, imageResized.width)],
     )
-    field_of_view = post_processed_output[0]["field_of_view"]
-    focal_length = post_processed_output[0]["focal_length"]
     depth = post_processed_output[0]["predicted_depth"]
     depth = (depth - depth.min()) / (depth.max() - depth.min())
     depth = depth * 255.
     depth = depth.detach().cpu().numpy()
-    # print(depth)
-    depthImg = Image.fromarray(depth.astype("uint8"))
-    # threshold1 = 255 / 20      # ~85
-    # threshold2 = 2 * 255 / 3   # ~170
-    threshold1 = (t1/10) * 255
-    threshold2 = (t2/10) * 255
-    # Precompute blurred versions for each region
     img_foreground = img.copy()  # No blur for foreground
     img_middleground = cv2.GaussianBlur(img, (0, 0), sigmaX=7, sigmaY=7)
     img_background = cv2.GaussianBlur(img, (0, 0), sigmaX=15, sigmaY=15)
-    # Create masks for each region (as float arrays for proper blending)
-    mask_fg = (depth < threshold1).astype(np.float32)
-    mask_mg = ((depth >= threshold1) & (depth < threshold2)).astype(np.float32)
-    mask_bg = (depth >= threshold2).astype(np.float32)
-    # Expand masks to 3 channels (H, W, 3)
-    mask_fg = np.stack([mask_fg]*3, axis=-1)
-    mask_mg = np.stack([mask_mg]*3, axis=-1)
-    mask_bg = np.stack([mask_bg]*3, axis=-1)
-    # Combine the images using the masks in a vectorized manner.
-    final_img = (img_foreground * mask_fg +
-                 img_middleground * mask_mg +
-                 img_background * mask_bg).astype(np.uint8)
-    # Convert the result back to RGB for display with matplotlib.
     final_img_rgb = cv2.cvtColor(final_img, cv2.COLOR_BGR2RGB)
-    print("BOTH OUTPUT COMPUTED")
-    return image1, final_img
-# Visualization
-# plt.axis("off")
-# subplots for 3 images: original, segmented, mask
-# plt.figure(figsize=(15, 5))
-# image = Image.open('/content/drive/MyDrive/eee515-hw3/hw3-q24.jpg')
-# #resize the image to 512x512
-# imageResized = image.resize((512, 512))
-# result = extract_object(birefnet, imageResized)
-# plt.subplot(1, 3, 1)
-# plt.title("Original Resized Image")
-# plt.imshow(imageResized)
-# plt.subplot(1, 3, 2)
-# plt.title("Segmented Image")
-# plt.imshow(result[0])
-# plt.subplot(1, 3, 3)
-# plt.title("Mask")
-# plt.imshow(result[1], cmap="gray")
-# plt.show()
-# Create a Gradio interface
-def build_interface(image1, image2):
-    """Build UI for gradio app
     """
-    title = "Bokeh and Lens Blur"
-    with gr.Blocks(theme=gr.themes.Soft(), title=title, fill_width=True) as interface:
-        with gr.Row():
-            # with gr.Column(scale=3):
-            #     with gr.Group():
-            #         input_text_box = gr.Textbox(
-            #             value=None,
-            #             label="Prompt",
-            #             lines=2,
-            #         )
-            #         # gr.Markdown("### Set the values for Middleground and Background")
-            #         # fg = gr.Slider(minimum=0, maximum=99, step=1, value=33, label="Middleground")
-            #         # mg = gr.Slider(minimum=0, maximum=99, step=1, value=66, label="Background")
-            #     with gr.Row():
-            #         submit_button = gr.Button("Submit", variant="primary")
-            with gr.Column(scale=3):
-                model3d = gr.Model3D(
-                    label="Output", height="45em", interactive=False
-                )
-            with gr.Column(scale=3):
-                model3d = gr.Model3D(
-                    label="Output", height="45em", interactive=False
-                )
-        submit_button.click(
-            handle_text_prompt,
-            inputs=[
-                input_text_box,
-                variance
-            ],
-            outputs=[
-                model3d
-            ]
-        )
-    return interface
-# demo = gr.Interface(sepia, gr.Image(), "image")
-title = "Gaussian Blur Background App"
 description = (
-    "Upload an image to apply a realistic background blur effect. "
-    "The app segments the foreground using RMBG-2.0 and then applies a Gaussian "
-    "blur (σ=15) to the background, simulating a video conferencing blur effect."
 )
 demo = gr.Interface(
-    fn=extract_object,
-    inputs=[gr.Image(type="pil", label="Input Image"), gr.Slider(minimum=0, maximum=40, step=1, value=33, label="Middleground"), gr.Slider(minimum=40, maximum=99, step=1, value=66, label="Background")],
-    outputs=[gr.Image(type="pil", label="Bokeh Image"), gr.Image(type="pil", label="Lens Blur Image")],
     title=title,
     description=description,
     allow_flagging="never"
 )
-# demo = build_interface()
-# demo.queue(default_concurrency_limit=1)
-demo.launch()

 import cv2
 import numpy as np
+from PIL import Image, ImageFilter
+import torch
+import gradio as gr
 from torchvision import transforms
+from transformers import (
+    AutoModelForImageSegmentation,
+    DepthProImageProcessorFast,
+    DepthProForDepthEstimation,
+)
+# Set device
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# -----------------------------
+# Load Segmentation Model (RMBG-2.0 by briaai)
+# -----------------------------
+seg_model = AutoModelForImageSegmentation.from_pretrained(
+    "briaai/RMBG-2.0", trust_remote_code=True
+)
+# Set higher precision for matmul if desired
+torch.set_float32_matmul_precision(["high", "highest"][0])
+seg_model.to(device)
+seg_model.eval()
+# Define segmentation image size and transform
+seg_image_size = (1024, 1024)
+seg_transform = transforms.Compose([
+    transforms.Resize(seg_image_size),
+    transforms.ToTensor(),
+    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+])
+# -----------------------------
+# Load Depth Estimation Model (DepthPro by Apple)
+# -----------------------------
+depth_processor = DepthProImageProcessorFast.from_pretrained("apple/DepthPro-hf")
+depth_model = DepthProForDepthEstimation.from_pretrained("apple/DepthPro-hf")
+depth_model.to(device)
+depth_model.eval()
+# -----------------------------
+# Define the Segmentation-Based Blur Effect
+# -----------------------------
+def segmentation_blur_effect(input_image: Image.Image):
+    """
+    Creates a segmentation mask using RMBG-2.0 and applies a Gaussian blur (sigma=15)
+    to the background while keeping the foreground sharp.
+    Returns:
+      - final segmented and blurred image (PIL Image)
+      - segmentation mask (PIL Image)
+      - blurred background image (PIL Image) [optional display]
+    """
+    # Resize input for segmentation processing
+    imageResized = input_image.resize(seg_image_size)
+    input_tensor = seg_transform(imageResized).unsqueeze(0).to(device)
     with torch.no_grad():
+        preds = seg_model(input_tensor)[-1].sigmoid().cpu()
     pred = preds[0].squeeze()
+    # Convert predicted mask to a PIL image and resize to original input size
     pred_pil = transforms.ToPILImage()(pred)
+    mask = pred_pil.resize(input_image.size)
+    # Create a binary mask (convert to grayscale, then threshold)
+    mask_np = np.array(mask.convert("L"))
+    _, maskBinary = cv2.threshold(mask_np, 127, 255, cv2.THRESH_BINARY)
+    # Convert the resized image to an OpenCV BGR array
     img = cv2.cvtColor(np.array(imageResized), cv2.COLOR_RGB2BGR)
+    # Apply Gaussian blur (sigmaX=15, sigmaY=15)
+    blurredBg = cv2.GaussianBlur(np.array(imageResized), (0, 0), sigmaX=15, sigmaY=15)
+    # Create the inverse mask and convert to 3 channels
     maskInv = cv2.bitwise_not(maskBinary)
     maskInv3 = cv2.cvtColor(maskInv, cv2.COLOR_GRAY2BGR)
+    # Extract the foreground and background separately
     foreground = cv2.bitwise_and(img, cv2.bitwise_not(maskInv3))
     background = cv2.bitwise_and(blurredBg, maskInv3)
+    # Combine the two components
     finalImg = cv2.add(cv2.cvtColor(foreground, cv2.COLOR_BGR2RGB), background)
+    finalImg_pil = Image.fromarray(finalImg)
+    blurredBg_pil = Image.fromarray(cv2.cvtColor(blurredBg, cv2.COLOR_BGR2RGB))
+    return finalImg_pil, mask, blurredBg_pil
+# -----------------------------
+# Define the Depth-Based Lens Blur Effect
+# -----------------------------
+def lens_blur_effect(input_image: Image.Image):
+    """
+    Uses DepthPro to estimate a depth map and applies a dynamic lens blur effect
+    by precomputing three versions of the image (foreground, middleground, background)
+    with increasing blur. Regions are blended based on the estimated depth.
+    Returns:
+      - Depth map (PIL Image)
+      - Final lens-blurred image (PIL Image)
+      - Foreground mask (PIL Image)
+      - Middleground mask (PIL Image)
+      - Background mask (PIL Image)
+    """
+    # Process the image with the depth estimation model
+    inputs = depth_processor(images=input_image, return_tensors="pt").to(device)
     with torch.no_grad():
+        outputs = depth_model(**inputs)
+    post_processed_output = depth_processor.post_process_depth_estimation(
+        outputs, target_sizes=[(input_image.height, input_image.width)]
     )
     depth = post_processed_output[0]["predicted_depth"]
+    # Normalize depth to [0, 255]
     depth = (depth - depth.min()) / (depth.max() - depth.min())
     depth = depth * 255.
     depth = depth.detach().cpu().numpy()
+    depth_map = depth.astype(np.uint8)
+    depthImg = Image.fromarray(depth_map)
+    # Convert input image to OpenCV BGR format
+    img = cv2.cvtColor(np.array(input_image), cv2.COLOR_RGB2BGR)
+    # Precompute three blurred versions of the image
     img_foreground = img.copy()  # No blur for foreground
     img_middleground = cv2.GaussianBlur(img, (0, 0), sigmaX=7, sigmaY=7)
     img_background = cv2.GaussianBlur(img, (0, 0), sigmaX=15, sigmaY=15)
+    # Define depth thresholds (using 1/3 and 2/3 of 255)
+    threshold1 = 255 / 3      # ~85
+    threshold2 = 2 * 255 / 3  # ~170
+    # Create masks for the three regions based on depth
+    mask_fg = (depth_map < threshold1).astype(np.float32)
+    mask_mg = ((depth_map >= threshold1) & (depth_map < threshold2)).astype(np.float32)
+    mask_bg = (depth_map >= threshold2).astype(np.float32)
+    # Expand masks to 3 channels to match image dimensions
+    mask_fg_3 = np.stack([mask_fg]*3, axis=-1)
+    mask_mg_3 = np.stack([mask_mg]*3, axis=-1)
+    mask_bg_3 = np.stack([mask_bg]*3, axis=-1)
+    # Combine the images using the masks (vectorized blending)
+    final_img = (img_foreground * mask_fg_3 +
+                 img_middleground * mask_mg_3 +
+                 img_background * mask_bg_3).astype(np.uint8)
     final_img_rgb = cv2.cvtColor(final_img, cv2.COLOR_BGR2RGB)
+    lensBlurImage = Image.fromarray(final_img_rgb)
+    # Create mask images (scaled to 0-255)
+    mask_fg_img = Image.fromarray((mask_fg * 255).astype(np.uint8))
+    mask_mg_img = Image.fromarray((mask_mg * 255).astype(np.uint8))
+    mask_bg_img = Image.fromarray((mask_bg * 255).astype(np.uint8))
+    return depthImg, lensBlurImage, mask_fg_img, mask_mg_img, mask_bg_img
+# -----------------------------
+# Gradio App: Process Image and Display Multiple Effects
+# -----------------------------
+def process_image(input_image: Image.Image):
     """
+    Processes the uploaded image to generate:
+      1. Segmentation-based Gaussian blur effect.
+      2. Segmentation mask.
+      3. Depth map.
+      4. Depth-based lens blur effect.
+      5. Depth-based masks for foreground, middleground, and background.
+    """
+    seg_blur, seg_mask, _ = segmentation_blur_effect(input_image)
+    depth_map_img, lens_blur_img, mask_fg_img, mask_mg_img, mask_bg_img = lens_blur_effect(input_image)
+    return (
+        seg_blur,
+        seg_mask,
+        depth_map_img,
+        lens_blur_img,
+        mask_fg_img,
+        mask_mg_img,
+        mask_bg_img
+    )
+title = "Blur Effects: Gaussian Blur & Depth-Based Lens Blur"
 description = (
+    "Upload an image to apply two distinct effects:\n\n"
+    "1. A segmentation-based Gaussian blur that blurs the background (using RMBG-2.0).\n"
+    "2. A depth-based lens blur effect that simulates realistic lens blur based on depth (using DepthPro).\n\n"
+    "Outputs include the blurred image, segmentation mask, depth map, lens-blurred image, and depth masks."
 )
 demo = gr.Interface(
+    fn=process_image,
+    inputs=gr.Image(type="pil", label="Input Image"),
+    outputs=[
+        gr.Image(type="pil", label="Segmentation-Based Blur"),
+        gr.Image(type="pil", label="Segmentation Mask"),
+        gr.Image(type="pil", label="Depth Map"),
+        gr.Image(type="pil", label="Depth-Based Lens Blur"),
+        gr.Image(type="pil", label="Foreground Depth Mask"),
+        gr.Image(type="pil", label="Middleground Depth Mask"),
+        gr.Image(type="pil", label="Background Depth Mask")
+    ],
     title=title,
     description=description,
     allow_flagging="never"
 )
+if __name__ == "__main__":
+    demo.launch()