import gradio as gr import torch import numpy as np from PIL import Image from scipy.ndimage import gaussian_filter from transformers import pipeline def preprocess_image(image): """Resize and convert image to PIL format if needed.""" if isinstance(image, np.ndarray): image = Image.fromarray(image) # Resize to 512x512 while maintaining aspect ratio image = image.resize((512, 512)) return image def segment_image(image, model_name="yolov8n-seg"): """ Perform instance segmentation on the input image using YOLO segmentation model. Args: image (PIL.Image): Input image model_name (str): Name of the YOLO segmentation model Returns: numpy.ndarray: Segmentation mask with instance segmentation """ from ultralytics import YOLO import numpy as np import torch # Load the YOLO segmentation model model = YOLO(model_name) # Run inference results = model(image) # Create a blank mask mask = np.zeros(image.size[::-1], dtype=np.uint8) # Process each detected object for result in results: # Get masks for all detected objects masks = result.masks if masks is not None: # Convert masks to numpy and add to the overall mask for single_mask in masks: # Convert mask to numpy and resize if needed mask_array = single_mask.data.cpu().numpy().squeeze() mask_array = (mask_array > 0.5).astype(np.uint8) # If mask size doesn't match image, resize if mask_array.shape != mask.shape: from PIL import Image mask_array = np.array( Image.fromarray(mask_array).resize( image.size[::-1], Image.NEAREST ) ) # Add this mask to the overall mask mask = np.maximum(mask, mask_array) return mask def process_image(image, blur_type, sigma=15): """Process image based on blur type.""" # Preprocess image pil_image = preprocess_image(image) # Apply appropriate blur if blur_type == "Gaussian Background Blur": # Get segmentation mask segmentation_mask = segment_image(pil_image) # Convert to 3-channel mask mask_3d = np.stack([segmentation_mask] * 3, axis=2) # Apply Gaussian blur image_array = np.array(pil_image) blurred = np.zeros_like(image_array) for channel in range(3): blurred[:, :, channel] = gaussian_filter(image_array[:, :, channel], sigma=sigma) # Combine original and blurred images result = image_array * mask_3d + blurred * (1 - mask_3d) result = Image.fromarray(result.astype(np.uint8)) elif blur_type == "Depth-Aware Lens Blur": result = apply_depth_aware_blur(pil_image, max_sigma=sigma) else: result = pil_image return result def apply_gaussian_blur(image, sigma=15): """Apply Gaussian blur to the background.""" # Convert image to numpy array image_array = np.array(image) # Create segmentation mask (assuming we want to keep the foreground) segmentation_mask = segment_image(image) # Choose a prominent object class (e.g., person with ID 24 in Cityscapes) foreground_mask = (segmentation_mask == 24).astype(np.uint8) # Prepare blurred version blurred = np.zeros_like(image_array) for channel in range(3): blurred[:, :, channel] = gaussian_filter(image_array[:, :, channel], sigma=sigma) # Combine original and blurred images based on mask mask_3d = np.stack([foreground_mask] * 3, axis=2) result = image_array * mask_3d + blurred * (1 - mask_3d) return Image.fromarray(result.astype(np.uint8)) def estimate_depth(image, model_name="depth-anything/Depth-Anything-V2-Small-hf"): """Estimate depth of the image.""" depth_estimator = pipeline( task="depth-estimation", model=model_name, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32 ) depth_output = depth_estimator(image) depth_map = np.array(depth_output["depth"]) # Normalize depth map depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min()) return depth_map def apply_depth_aware_blur(image, max_sigma=10, min_sigma=0): """Apply depth-aware blur to the image (REVERSED version).""" # Estimate depth depth_map = estimate_depth(image) image_array = np.array(image) blurred = np.zeros_like(image_array, dtype=np.float32) # REVERSED: Now we use depth_map directly (no inversion) so farther objects get more blur sigmas = np.interp(depth_map, [0, 1], [min_sigma, max_sigma]) # Precompute blurred layers blur_stack = {} for sigma in np.unique(sigmas): if sigma > 0: blurred_layer = np.zeros_like(image_array, dtype=np.float32) for channel in range(3): blurred_layer[:, :, channel] = gaussian_filter( image_array[:, :, channel].astype(np.float32), sigma=sigma ) blur_stack[sigma] = blurred_layer # Blend based on depth for sigma in np.unique(sigmas): if sigma > 0: mask = (sigmas == sigma) mask_3d = np.stack([mask] * 3, axis=2) blurred += mask_3d * blur_stack[sigma] else: mask = (sigmas == 0) mask_3d = np.stack([mask] * 3, axis=2) blurred += mask_3d * image_array return Image.fromarray(blurred.astype(np.uint8)) # Gradio Interface def create_blur_app(): with gr.Blocks() as demo: gr.Markdown("# Image Blur Effects") with gr.Row(): input_image = gr.Image(label="Input Image", type="pil") output_image = gr.Image(label="Processed Image") with gr.Row(): blur_type = gr.Dropdown( choices=[ "Gaussian Background Blur", "Depth-Aware Lens Blur" ], label="Blur Type" ) sigma = gr.Slider( minimum=0, maximum=30, value=15, label="Blur Intensity" ) process_btn = gr.Button("Apply Blur Effect") process_btn.click( fn=process_image, inputs=[input_image, blur_type, sigma], outputs=output_image ) return demo # Launch the app if __name__ == "__main__": demo = create_blur_app() demo.launch()