Spaces:

Ash2505
/

EEE515-HW3

Runtime error

File size: 6,946 Bytes

import gradio as gr
from PIL import Image, ImageFilter
import matplotlib.pyplot as plt
import torch
import cv2
import numpy as np
from torchvision import transforms
from transformers import AutoModelForImageSegmentation, DepthProImageProcessorFast, DepthProForDepthEstimation
import requests

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

birefnet = AutoModelForImageSegmentation.from_pretrained('ZhengPeng7/BiRefNet', trust_remote_code=True)
torch.set_float32_matmul_precision(['high', 'highest'][0])
birefnet.to('cuda')
birefnet.eval()
birefnet.half()

def extract_object(image, t1, t2):
    # Data settings
    image_size = (1024, 1024)
    transform_image = transforms.Compose([
        transforms.Resize(image_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    # image = Image.open(imagepath)
    image1 = image.copy()
    input_images = transform_image(image1).unsqueeze(0).to('cuda').half()

    # Prediction
    with torch.no_grad():
        preds = birefnet(input_images)[-1].sigmoid().cpu()
    pred = preds[0].squeeze()
    pred_pil = transforms.ToPILImage()(pred)
    mask = pred_pil.resize(image1.size)
    image1.putalpha(mask)

    blurredBg = cv2.GaussianBlur(np.array(imageResized), (0, 0), sigmaX=15, sigmaY=15)

    mask = np.array(result[1].convert("L"))
    _, maskBinary = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)
    img = cv2.cvtColor(np.array(imageResized), cv2.COLOR_RGB2BGR)
    
    maskInv = cv2.bitwise_not(maskBinary)
    maskInv3 = cv2.cvtColor(maskInv, cv2.COLOR_GRAY2BGR)
    
    foreground = cv2.bitwise_and(img, cv2.bitwise_not(maskInv3))
    background = cv2.bitwise_and(blurredBg, maskInv3)
    finalImg = cv2.add(cv2.cvtColor(foreground, cv2.COLOR_BGR2RGB), background)

# plt.figure(figsize=(15, 5))
    # return image1, mask

# def depth_estimation():
    imageProcessor = DepthProImageProcessorFast.from_pretrained("apple/DepthPro-hf")
    model = DepthProForDepthEstimation.from_pretrained("apple/DepthPro-hf").to(device)
    
    inputs = imageProcessor(images=imageResized, return_tensors="pt").to(device)
    
    with torch.no_grad():
        outputs = model(**inputs)
    
    post_processed_output = imageProcessor.post_process_depth_estimation(
        outputs, target_sizes=[(imageResized.height, imageResized.width)],
    )
    
    field_of_view = post_processed_output[0]["field_of_view"]
    focal_length = post_processed_output[0]["focal_length"]
    depth = post_processed_output[0]["predicted_depth"]
    depth = (depth - depth.min()) / (depth.max() - depth.min())
    depth = depth * 255.
    depth = depth.detach().cpu().numpy()
    # print(depth)
    depthImg = Image.fromarray(depth.astype("uint8"))
    
    # threshold1 = 255 / 20      # ~85
    # threshold2 = 2 * 255 / 3   # ~170

    threshold1 = (t1/10) * 255
    threshold2 = (t2/10) * 255
    
    # Precompute blurred versions for each region
    img_foreground = img.copy()  # No blur for foreground
    img_middleground = cv2.GaussianBlur(img, (0, 0), sigmaX=7, sigmaY=7)
    img_background = cv2.GaussianBlur(img, (0, 0), sigmaX=15, sigmaY=15)
    
    # Create masks for each region (as float arrays for proper blending)
    mask_fg = (depth < threshold1).astype(np.float32)
    mask_mg = ((depth >= threshold1) & (depth < threshold2)).astype(np.float32)
    mask_bg = (depth >= threshold2).astype(np.float32)
    
    # Expand masks to 3 channels (H, W, 3)
    mask_fg = np.stack([mask_fg]*3, axis=-1)
    mask_mg = np.stack([mask_mg]*3, axis=-1)
    mask_bg = np.stack([mask_bg]*3, axis=-1)
    
    # Combine the images using the masks in a vectorized manner.
    final_img = (img_foreground * mask_fg + 
                 img_middleground * mask_mg + 
                 img_background * mask_bg).astype(np.uint8)
    
    # Convert the result back to RGB for display with matplotlib.
    final_img_rgb = cv2.cvtColor(final_img, cv2.COLOR_BGR2RGB)

    return image1, final_img

# Visualization
# plt.axis("off")
# subplots for 3 images: original, segmented, mask

# plt.figure(figsize=(15, 5))

# image = Image.open('/content/drive/MyDrive/eee515-hw3/hw3-q24.jpg')
# #resize the image to 512x512
# imageResized = image.resize((512, 512))

# result = extract_object(birefnet, imageResized)
# plt.subplot(1, 3, 1)
# plt.title("Original Resized Image")
# plt.imshow(imageResized)

# plt.subplot(1, 3, 2)
# plt.title("Segmented Image")
# plt.imshow(result[0])

# plt.subplot(1, 3, 3)
# plt.title("Mask")
# plt.imshow(result[1], cmap="gray")
# plt.show()

# Create a Gradio interface


def build_interface(image1, image2):
    """Build UI for gradio app
    """
    title = "Bokeh and Lens Blur"
    with gr.Blocks(theme=gr.themes.Soft(), title=title, fill_width=True) as interface:
        with gr.Row():
            # with gr.Column(scale=3):
            #     with gr.Group():
            #         input_text_box = gr.Textbox(
            #             value=None,
            #             label="Prompt",
            #             lines=2,
            #         )
            #         # gr.Markdown("### Set the values for Middleground and Background")
            #         # fg = gr.Slider(minimum=0, maximum=99, step=1, value=33, label="Middleground")
            #         # mg = gr.Slider(minimum=0, maximum=99, step=1, value=66, label="Background")
            #     with gr.Row():
            #         submit_button = gr.Button("Submit", variant="primary")
            with gr.Column(scale=3):
                model3d = gr.Model3D(
                    label="Output", height="45em", interactive=False
                )

            with gr.Column(scale=3):
                model3d = gr.Model3D(
                    label="Output", height="45em", interactive=False
                )
    
        submit_button.click(
            handle_text_prompt,
            inputs=[
                input_text_box,
                variance
            ],
            outputs=[
                model3d
            ]
        )
                
    return interface

# demo = gr.Interface(sepia, gr.Image(), "image")

title = "Gaussian Blur Background App"
description = (
    "Upload an image to apply a realistic background blur effect. "
    "The app segments the foreground using RMBG-2.0 and then applies a Gaussian "
    "blur (σ=15) to the background, simulating a video conferencing blur effect."
)

iface = gr.Interface(
    fn=apply_blur_effect,
    inputs=[gr.Image(type="pil", label="Input Image"), gr.Slider(minimum=0, maximum=40, step=1, value=33, label="Middleground"), gr.Slider(minimum=40, maximum=99, step=1, value=66, label="Background")],
    outputs=[gr.Image(type="pil", label="Bokeh Image"), gr.Image(type="pil", label="Lens Blur Image")],
    title=title,
    description=description,
    allow_flagging="never"
)

demo = build_interface()
demo.queue(default_concurrency_limit=1)
demo.launch()