Spaces:

gchallar
/

Vision_Transformer

Running

File size: 5,576 Bytes

da08054
 
 
 
 
6ebec3f
da08054
 
 
6ebec3f
da08054
6ebec3f
 
 
 
 
 
949b21d
6ebec3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
949b21d
6ebec3f
 
 
da08054
6ebec3f
da08054
 
 
 
 
 
 
6ebec3f
da08054
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ebec3f
da08054
 
6ebec3f
da08054
 
 
 
 
6ebec3f
da08054
 
 
 
6ebec3f
da08054
 
6ebec3f
da08054
6ebec3f
da08054
 
 
 
 
 
 
 
 
 
 
6ebec3f
da08054
 
 
 
 
 
 
 
 
 
6ebec3f
da08054
 
6ebec3f
da08054
6ebec3f
da08054
 
 
 
 
6ebec3f
 
da08054
6ebec3f
da08054
6ebec3f
da08054
 
 
438cbb1

import gradio as gr
from PIL import Image, ImageFilter
import numpy as np
import torch
import cv2
from transformers import AutoImageProcessor, AutoModelForDepthEstimation, OneFormerProcessor, OneFormerForUniversalSegmentation

# Load depth estimation model
image_processor = AutoImageProcessor.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf")
depth_model = AutoModelForDepthEstimation.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf")

# Load OneFormer processor and model
processor = OneFormerProcessor.from_pretrained("shi-labs/oneformer_coco_swin_large")
segmentation_model = OneFormerForUniversalSegmentation.from_pretrained("shi-labs/oneformer_coco_swin_large")

def apply_gaussian_blur(image, foreground_label='person'):
    """Applies Gaussian blur to the background based on a segmentation mask for the foreground."""
    
    # Prepare input for semantic segmentation
    inputs = processor(images=image, task_inputs=["semantic"], return_tensors="pt")

    # Semantic segmentation
    with torch.no_grad():
        outputs = segmentation_model(**inputs)

    # Processing semantic segmentation output
    predicted_semantic_map = processor.post_process_semantic_segmentation(outputs, target_sizes=[image.size[::-1]])[0]
    segmentation_mask = predicted_semantic_map.cpu().numpy()

    # Get the mapping of class IDs to labels from the processor
    id2label = segmentation_model.config.id2label
    foreground_class_id = None
    for id, label in id2label.items():
        if label == foreground_label:
            foreground_class_id = id
            break

    if foreground_class_id is None:
        print(f"Error: Could not find the label '{foreground_label}' in the model's class mapping.")
        return image  # Return original image if foreground label is not found

    # Create a black background mask and set the pixels corresponding to the foreground object to white
    output_mask_array = np.zeros(segmentation_mask.shape, dtype=np.uint8)
    output_mask_array[segmentation_mask == foreground_class_id] = 255

    # Convert the output mask to a PIL Image (Grayscale)
    mask_pil = Image.fromarray(output_mask_array, mode='L')
    
    # Resize the mask to match the image size
    mask_pil = mask_pil.resize(image.size)
    output_mask_array = np.array(mask_pil)

    # Create a blurred version of the input image
    blurred_background = image.filter(ImageFilter.GaussianBlur(radius=15))

    # Convert images to NumPy arrays
    img_array = np.array(image)
    blurred_array = np.array(blurred_background)

    # Create a boolean mask (foreground = True, background = False)
    foreground_mask = output_mask_array > 0
    foreground_mask_3d = np.stack([foreground_mask] * 3, axis=-1)

    # Blend the original image with the blurred background
    final_image_array = np.where(foreground_mask_3d, img_array, blurred_array)
    final_image = Image.fromarray(final_image_array.astype(np.uint8))

    return final_image

def apply_lens_blur(image):
    """Applies depth-based lens blur using a pre-trained model."""
    
    # Resize image to 512x512 for processing
    resized_image = image.resize((512, 512))
    image_np = np.array(resized_image)

    # Prepare image for the model
    inputs = image_processor(images=resized_image, return_tensors="pt")

    with torch.no_grad():
        outputs = depth_model(**inputs)
        predicted_depth = outputs.predicted_depth

    # Interpolate to the original size
    prediction = torch.nn.functional.interpolate(
        predicted_depth.unsqueeze(1),
        size=resized_image.size[::-1],
        mode="bicubic",
        align_corners=False,
    ).squeeze()

    # Convert prediction to a NumPy array
    depth_map = prediction.cpu().numpy()

    # Normalize the depth map to the range 0-1
    depth_norm = (depth_map - np.min(depth_map)) / (np.max(depth_map) - np.min(depth_map))

    num_blur_levels = 5
    blurred_layers = []
    
    for i in range(num_blur_levels):
        sigma = i * 0.5
        if sigma == 0:
            blurred = image_np
        else:
            blurred = cv2.GaussianBlur(image_np, (15, 15), sigmaX=sigma, sigmaY=sigma, borderType=cv2.BORDER_REPLICATE)
        blurred_layers.append(blurred)

    depth_indices = ((1 - depth_norm) * (num_blur_levels - 1)).astype(np.uint8)

    final_blurred_image = np.zeros_like(image_np)
    
    for y in range(image_np.shape[0]):
        for x in range(image_np.shape[1]):
            depth_index = depth_indices[y, x]
            final_blurred_image[y, x] = blurred_layers[depth_index][y, x]

    # Convert the final blurred image back to a PIL Image
    final_blurred_pil_image = Image.fromarray(final_blurred_image)

    return final_blurred_pil_image

def process_image(image, blur_type, foreground_label='person'):
    """Processes the image based on the selected blur type."""
    if blur_type == "Gaussian Blur":
        return apply_gaussian_blur(image, foreground_label=foreground_label)
    else:
        return apply_lens_blur(image)

interface = gr.Interface(
    fn=process_image,
    inputs=[
        gr.Image(type="pil", label="Upload an Image"),
        gr.Radio(["Gaussian Blur", "Lens Blur"], label="Choose Blur Effect"),
        gr.Textbox(label="Foreground Label (for Gaussian Blur)", default="person")
    ],
    outputs=[gr.Image(type="pil"), gr.Image(type="pil")],
    title="Gaussian & Lens Blur Effects",
    description="Upload an image and select either Gaussian blur (with foreground segmentation) or depth-based lens blur."
)

if __name__ == "__main__":
    interface.launch()