Spaces:

sjagird1
/

Vision

Sleeping

File size: 6,910 Bytes

import gradio as gr
import torch
import numpy as np
from PIL import Image
from scipy.ndimage import gaussian_filter
from transformers import pipeline

def preprocess_image(image):
    """Resize and convert image to PIL format if needed."""
    if isinstance(image, np.ndarray):
        image = Image.fromarray(image)
    
    # Resize to 512x512 while maintaining aspect ratio
    image = image.resize((512, 512))
    return image

def segment_image(image, model_name="yolov8n-seg"):
    """
    Perform instance segmentation on the input image using YOLO segmentation model.
    
    Args:
        image (PIL.Image): Input image
        model_name (str): Name of the YOLO segmentation model
    
    Returns:
        numpy.ndarray: Segmentation mask with instance segmentation
    """
    from ultralytics import YOLO
    import numpy as np
    import torch
    
    # Load the YOLO segmentation model
    model = YOLO(model_name)
    
    # Run inference
    results = model(image)
    
    # Create a blank mask
    mask = np.zeros(image.size[::-1], dtype=np.uint8)
    
    # Process each detected object
    for result in results:
        # Get masks for all detected objects
        masks = result.masks
        
        if masks is not None:
            # Convert masks to numpy and add to the overall mask
            for single_mask in masks:
                # Convert mask to numpy and resize if needed
                mask_array = single_mask.data.cpu().numpy().squeeze()
                mask_array = (mask_array > 0.5).astype(np.uint8)
                
                # If mask size doesn't match image, resize
                if mask_array.shape != mask.shape:
                    from PIL import Image
                    mask_array = np.array(
                        Image.fromarray(mask_array).resize(
                            image.size[::-1], 
                            Image.NEAREST
                        )
                    )
                
                # Add this mask to the overall mask
                mask = np.maximum(mask, mask_array)
    
    return mask

def process_image(image, blur_type, sigma=15):
    """Process image based on blur type."""
    # Preprocess image
    pil_image = preprocess_image(image)
    
    # Apply appropriate blur
    if blur_type == "Gaussian Background Blur":
        # Get segmentation mask
        segmentation_mask = segment_image(pil_image)
        
        # Convert to 3-channel mask
        mask_3d = np.stack([segmentation_mask] * 3, axis=2)
        
        # Apply Gaussian blur
        image_array = np.array(pil_image)
        blurred = np.zeros_like(image_array)
        for channel in range(3):
            blurred[:, :, channel] = gaussian_filter(image_array[:, :, channel], sigma=sigma)
        
        # Combine original and blurred images
        result = image_array * mask_3d + blurred * (1 - mask_3d)
        result = Image.fromarray(result.astype(np.uint8))
    
    elif blur_type == "Depth-Aware Lens Blur":
        result = apply_depth_aware_blur(pil_image, max_sigma=sigma)
    else:
        result = pil_image
    
    return result

def apply_gaussian_blur(image, sigma=15):
    """Apply Gaussian blur to the background."""
    # Convert image to numpy array
    image_array = np.array(image)
    
    # Create segmentation mask (assuming we want to keep the foreground)
    segmentation_mask = segment_image(image)
    
    # Choose a prominent object class (e.g., person with ID 24 in Cityscapes)
    foreground_mask = (segmentation_mask == 24).astype(np.uint8)
    
    # Prepare blurred version
    blurred = np.zeros_like(image_array)
    for channel in range(3):
        blurred[:, :, channel] = gaussian_filter(image_array[:, :, channel], sigma=sigma)
    
    # Combine original and blurred images based on mask
    mask_3d = np.stack([foreground_mask] * 3, axis=2)
    result = image_array * mask_3d + blurred * (1 - mask_3d)
    
    return Image.fromarray(result.astype(np.uint8))

def estimate_depth(image, model_name="depth-anything/Depth-Anything-V2-Small-hf"):
    """Estimate depth of the image."""
    depth_estimator = pipeline(
        task="depth-estimation", 
        model=model_name,
        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
    )
    
    depth_output = depth_estimator(image)
    depth_map = np.array(depth_output["depth"])
    
    # Normalize depth map
    depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
    
    return depth_map

def apply_depth_aware_blur(image, max_sigma=10, min_sigma=0):
    """Apply depth-aware blur to the image (REVERSED version)."""
    # Estimate depth
    depth_map = estimate_depth(image)
    
    image_array = np.array(image)
    blurred = np.zeros_like(image_array, dtype=np.float32)
    
    # REVERSED: Now we use depth_map directly (no inversion) so farther objects get more blur
    sigmas = np.interp(depth_map, [0, 1], [min_sigma, max_sigma])
    
    # Precompute blurred layers
    blur_stack = {}
    for sigma in np.unique(sigmas):
        if sigma > 0:
            blurred_layer = np.zeros_like(image_array, dtype=np.float32)
            for channel in range(3):
                blurred_layer[:, :, channel] = gaussian_filter(
                    image_array[:, :, channel].astype(np.float32),
                    sigma=sigma
                )
            blur_stack[sigma] = blurred_layer
    
    # Blend based on depth
    for sigma in np.unique(sigmas):
        if sigma > 0:
            mask = (sigmas == sigma)
            mask_3d = np.stack([mask] * 3, axis=2)
            blurred += mask_3d * blur_stack[sigma]
        else:
            mask = (sigmas == 0)
            mask_3d = np.stack([mask] * 3, axis=2)
            blurred += mask_3d * image_array
    
    return Image.fromarray(blurred.astype(np.uint8))



# Gradio Interface
def create_blur_app():
    with gr.Blocks() as demo:
        gr.Markdown("# Image Blur Effects")
        
        with gr.Row():
            input_image = gr.Image(label="Input Image", type="pil")
            output_image = gr.Image(label="Processed Image")
        
        with gr.Row():
            blur_type = gr.Dropdown(
                choices=[
                    "Gaussian Background Blur", 
                    "Depth-Aware Lens Blur"
                ], 
                label="Blur Type"
            )
            sigma = gr.Slider(
                minimum=0, 
                maximum=30, 
                value=15, 
                label="Blur Intensity"
            )
        
        process_btn = gr.Button("Apply Blur Effect")
        
        process_btn.click(
            fn=process_image, 
            inputs=[input_image, blur_type, sigma], 
            outputs=output_image
        )
    
    return demo

# Launch the app
if __name__ == "__main__":
    demo = create_blur_app()
    demo.launch()