Vision / app.py
sjagird1's picture
Update app.py
c092fae verified
raw
history blame
6.91 kB
import gradio as gr
import torch
import numpy as np
from PIL import Image
from scipy.ndimage import gaussian_filter
from transformers import pipeline
def preprocess_image(image):
"""Resize and convert image to PIL format if needed."""
if isinstance(image, np.ndarray):
image = Image.fromarray(image)
# Resize to 512x512 while maintaining aspect ratio
image = image.resize((512, 512))
return image
def segment_image(image, model_name="yolov8n-seg"):
"""
Perform instance segmentation on the input image using YOLO segmentation model.
Args:
image (PIL.Image): Input image
model_name (str): Name of the YOLO segmentation model
Returns:
numpy.ndarray: Segmentation mask with instance segmentation
"""
from ultralytics import YOLO
import numpy as np
import torch
# Load the YOLO segmentation model
model = YOLO(model_name)
# Run inference
results = model(image)
# Create a blank mask
mask = np.zeros(image.size[::-1], dtype=np.uint8)
# Process each detected object
for result in results:
# Get masks for all detected objects
masks = result.masks
if masks is not None:
# Convert masks to numpy and add to the overall mask
for single_mask in masks:
# Convert mask to numpy and resize if needed
mask_array = single_mask.data.cpu().numpy().squeeze()
mask_array = (mask_array > 0.5).astype(np.uint8)
# If mask size doesn't match image, resize
if mask_array.shape != mask.shape:
from PIL import Image
mask_array = np.array(
Image.fromarray(mask_array).resize(
image.size[::-1],
Image.NEAREST
)
)
# Add this mask to the overall mask
mask = np.maximum(mask, mask_array)
return mask
def process_image(image, blur_type, sigma=15):
"""Process image based on blur type."""
# Preprocess image
pil_image = preprocess_image(image)
# Apply appropriate blur
if blur_type == "Gaussian Background Blur":
# Get segmentation mask
segmentation_mask = segment_image(pil_image)
# Convert to 3-channel mask
mask_3d = np.stack([segmentation_mask] * 3, axis=2)
# Apply Gaussian blur
image_array = np.array(pil_image)
blurred = np.zeros_like(image_array)
for channel in range(3):
blurred[:, :, channel] = gaussian_filter(image_array[:, :, channel], sigma=sigma)
# Combine original and blurred images
result = image_array * mask_3d + blurred * (1 - mask_3d)
result = Image.fromarray(result.astype(np.uint8))
elif blur_type == "Depth-Aware Lens Blur":
result = apply_depth_aware_blur(pil_image, max_sigma=sigma)
else:
result = pil_image
return result
def apply_gaussian_blur(image, sigma=15):
"""Apply Gaussian blur to the background."""
# Convert image to numpy array
image_array = np.array(image)
# Create segmentation mask (assuming we want to keep the foreground)
segmentation_mask = segment_image(image)
# Choose a prominent object class (e.g., person with ID 24 in Cityscapes)
foreground_mask = (segmentation_mask == 24).astype(np.uint8)
# Prepare blurred version
blurred = np.zeros_like(image_array)
for channel in range(3):
blurred[:, :, channel] = gaussian_filter(image_array[:, :, channel], sigma=sigma)
# Combine original and blurred images based on mask
mask_3d = np.stack([foreground_mask] * 3, axis=2)
result = image_array * mask_3d + blurred * (1 - mask_3d)
return Image.fromarray(result.astype(np.uint8))
def estimate_depth(image, model_name="depth-anything/Depth-Anything-V2-Small-hf"):
"""Estimate depth of the image."""
depth_estimator = pipeline(
task="depth-estimation",
model=model_name,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
)
depth_output = depth_estimator(image)
depth_map = np.array(depth_output["depth"])
# Normalize depth map
depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
return depth_map
def apply_depth_aware_blur(image, max_sigma=10, min_sigma=0):
"""Apply depth-aware blur to the image (REVERSED version)."""
# Estimate depth
depth_map = estimate_depth(image)
image_array = np.array(image)
blurred = np.zeros_like(image_array, dtype=np.float32)
# REVERSED: Now we use depth_map directly (no inversion) so farther objects get more blur
sigmas = np.interp(depth_map, [0, 1], [min_sigma, max_sigma])
# Precompute blurred layers
blur_stack = {}
for sigma in np.unique(sigmas):
if sigma > 0:
blurred_layer = np.zeros_like(image_array, dtype=np.float32)
for channel in range(3):
blurred_layer[:, :, channel] = gaussian_filter(
image_array[:, :, channel].astype(np.float32),
sigma=sigma
)
blur_stack[sigma] = blurred_layer
# Blend based on depth
for sigma in np.unique(sigmas):
if sigma > 0:
mask = (sigmas == sigma)
mask_3d = np.stack([mask] * 3, axis=2)
blurred += mask_3d * blur_stack[sigma]
else:
mask = (sigmas == 0)
mask_3d = np.stack([mask] * 3, axis=2)
blurred += mask_3d * image_array
return Image.fromarray(blurred.astype(np.uint8))
# Gradio Interface
def create_blur_app():
with gr.Blocks() as demo:
gr.Markdown("# Image Blur Effects")
with gr.Row():
input_image = gr.Image(label="Input Image", type="pil")
output_image = gr.Image(label="Processed Image")
with gr.Row():
blur_type = gr.Dropdown(
choices=[
"Gaussian Background Blur",
"Depth-Aware Lens Blur"
],
label="Blur Type"
)
sigma = gr.Slider(
minimum=0,
maximum=30,
value=15,
label="Blur Intensity"
)
process_btn = gr.Button("Apply Blur Effect")
process_btn.click(
fn=process_image,
inputs=[input_image, blur_type, sigma],
outputs=output_image
)
return demo
# Launch the app
if __name__ == "__main__":
demo = create_blur_app()
demo.launch()