gchallar's picture
Update app.py
6ebec3f verified
raw
history blame
5.58 kB
import gradio as gr
from PIL import Image, ImageFilter
import numpy as np
import torch
import cv2
from transformers import AutoImageProcessor, AutoModelForDepthEstimation, OneFormerProcessor, OneFormerForUniversalSegmentation
# Load depth estimation model
image_processor = AutoImageProcessor.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf")
depth_model = AutoModelForDepthEstimation.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf")
# Load OneFormer processor and model
processor = OneFormerProcessor.from_pretrained("shi-labs/oneformer_coco_swin_large")
segmentation_model = OneFormerForUniversalSegmentation.from_pretrained("shi-labs/oneformer_coco_swin_large")
def apply_gaussian_blur(image, foreground_label='person'):
"""Applies Gaussian blur to the background based on a segmentation mask for the foreground."""
# Prepare input for semantic segmentation
inputs = processor(images=image, task_inputs=["semantic"], return_tensors="pt")
# Semantic segmentation
with torch.no_grad():
outputs = segmentation_model(**inputs)
# Processing semantic segmentation output
predicted_semantic_map = processor.post_process_semantic_segmentation(outputs, target_sizes=[image.size[::-1]])[0]
segmentation_mask = predicted_semantic_map.cpu().numpy()
# Get the mapping of class IDs to labels from the processor
id2label = segmentation_model.config.id2label
foreground_class_id = None
for id, label in id2label.items():
if label == foreground_label:
foreground_class_id = id
break
if foreground_class_id is None:
print(f"Error: Could not find the label '{foreground_label}' in the model's class mapping.")
return image # Return original image if foreground label is not found
# Create a black background mask and set the pixels corresponding to the foreground object to white
output_mask_array = np.zeros(segmentation_mask.shape, dtype=np.uint8)
output_mask_array[segmentation_mask == foreground_class_id] = 255
# Convert the output mask to a PIL Image (Grayscale)
mask_pil = Image.fromarray(output_mask_array, mode='L')
# Resize the mask to match the image size
mask_pil = mask_pil.resize(image.size)
output_mask_array = np.array(mask_pil)
# Create a blurred version of the input image
blurred_background = image.filter(ImageFilter.GaussianBlur(radius=15))
# Convert images to NumPy arrays
img_array = np.array(image)
blurred_array = np.array(blurred_background)
# Create a boolean mask (foreground = True, background = False)
foreground_mask = output_mask_array > 0
foreground_mask_3d = np.stack([foreground_mask] * 3, axis=-1)
# Blend the original image with the blurred background
final_image_array = np.where(foreground_mask_3d, img_array, blurred_array)
final_image = Image.fromarray(final_image_array.astype(np.uint8))
return final_image
def apply_lens_blur(image):
"""Applies depth-based lens blur using a pre-trained model."""
# Resize image to 512x512 for processing
resized_image = image.resize((512, 512))
image_np = np.array(resized_image)
# Prepare image for the model
inputs = image_processor(images=resized_image, return_tensors="pt")
with torch.no_grad():
outputs = depth_model(**inputs)
predicted_depth = outputs.predicted_depth
# Interpolate to the original size
prediction = torch.nn.functional.interpolate(
predicted_depth.unsqueeze(1),
size=resized_image.size[::-1],
mode="bicubic",
align_corners=False,
).squeeze()
# Convert prediction to a NumPy array
depth_map = prediction.cpu().numpy()
# Normalize the depth map to the range 0-1
depth_norm = (depth_map - np.min(depth_map)) / (np.max(depth_map) - np.min(depth_map))
num_blur_levels = 5
blurred_layers = []
for i in range(num_blur_levels):
sigma = i * 0.5
if sigma == 0:
blurred = image_np
else:
blurred = cv2.GaussianBlur(image_np, (15, 15), sigmaX=sigma, sigmaY=sigma, borderType=cv2.BORDER_REPLICATE)
blurred_layers.append(blurred)
depth_indices = ((1 - depth_norm) * (num_blur_levels - 1)).astype(np.uint8)
final_blurred_image = np.zeros_like(image_np)
for y in range(image_np.shape[0]):
for x in range(image_np.shape[1]):
depth_index = depth_indices[y, x]
final_blurred_image[y, x] = blurred_layers[depth_index][y, x]
# Convert the final blurred image back to a PIL Image
final_blurred_pil_image = Image.fromarray(final_blurred_image)
return final_blurred_pil_image
def process_image(image, blur_type, foreground_label='person'):
"""Processes the image based on the selected blur type."""
if blur_type == "Gaussian Blur":
return apply_gaussian_blur(image, foreground_label=foreground_label)
else:
return apply_lens_blur(image)
interface = gr.Interface(
fn=process_image,
inputs=[
gr.Image(type="pil", label="Upload an Image"),
gr.Radio(["Gaussian Blur", "Lens Blur"], label="Choose Blur Effect"),
gr.Textbox(label="Foreground Label (for Gaussian Blur)", default="person")
],
outputs=[gr.Image(type="pil"), gr.Image(type="pil")],
title="Gaussian & Lens Blur Effects",
description="Upload an image and select either Gaussian blur (with foreground segmentation) or depth-based lens blur."
)
if __name__ == "__main__":
interface.launch()