Spaces:

gchallar
/

Vision_Transformer

Running

App Files Files Community

Vision_Transformer / app.py

gchallar

Update app.py

6ebec3f verified 4 months ago

raw

history blame

5.58 kB

	import gradio as gr
	from PIL import Image, ImageFilter
	import numpy as np
	import torch
	import cv2
	from transformers import AutoImageProcessor, AutoModelForDepthEstimation, OneFormerProcessor, OneFormerForUniversalSegmentation

	# Load depth estimation model
	image_processor = AutoImageProcessor.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf")
	depth_model = AutoModelForDepthEstimation.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf")

	# Load OneFormer processor and model
	processor = OneFormerProcessor.from_pretrained("shi-labs/oneformer_coco_swin_large")
	segmentation_model = OneFormerForUniversalSegmentation.from_pretrained("shi-labs/oneformer_coco_swin_large")

	def apply_gaussian_blur(image, foreground_label='person'):
	"""Applies Gaussian blur to the background based on a segmentation mask for the foreground."""

	# Prepare input for semantic segmentation
	inputs = processor(images=image, task_inputs=["semantic"], return_tensors="pt")

	# Semantic segmentation
	with torch.no_grad():
	outputs = segmentation_model(**inputs)

	# Processing semantic segmentation output
	predicted_semantic_map = processor.post_process_semantic_segmentation(outputs, target_sizes=[image.size[::-1]])[0]
	segmentation_mask = predicted_semantic_map.cpu().numpy()

	# Get the mapping of class IDs to labels from the processor
	id2label = segmentation_model.config.id2label
	foreground_class_id = None
	for id, label in id2label.items():
	if label == foreground_label:
	foreground_class_id = id
	break

	if foreground_class_id is None:
	print(f"Error: Could not find the label '{foreground_label}' in the model's class mapping.")
	return image # Return original image if foreground label is not found

	# Create a black background mask and set the pixels corresponding to the foreground object to white
	output_mask_array = np.zeros(segmentation_mask.shape, dtype=np.uint8)
	output_mask_array[segmentation_mask == foreground_class_id] = 255

	# Convert the output mask to a PIL Image (Grayscale)
	mask_pil = Image.fromarray(output_mask_array, mode='L')

	# Resize the mask to match the image size
	mask_pil = mask_pil.resize(image.size)
	output_mask_array = np.array(mask_pil)

	# Create a blurred version of the input image
	blurred_background = image.filter(ImageFilter.GaussianBlur(radius=15))

	# Convert images to NumPy arrays
	img_array = np.array(image)
	blurred_array = np.array(blurred_background)

	# Create a boolean mask (foreground = True, background = False)
	foreground_mask = output_mask_array > 0
	foreground_mask_3d = np.stack([foreground_mask] * 3, axis=-1)

	# Blend the original image with the blurred background
	final_image_array = np.where(foreground_mask_3d, img_array, blurred_array)
	final_image = Image.fromarray(final_image_array.astype(np.uint8))

	return final_image

	def apply_lens_blur(image):
	"""Applies depth-based lens blur using a pre-trained model."""

	# Resize image to 512x512 for processing
	resized_image = image.resize((512, 512))
	image_np = np.array(resized_image)

	# Prepare image for the model
	inputs = image_processor(images=resized_image, return_tensors="pt")

	with torch.no_grad():
	outputs = depth_model(**inputs)
	predicted_depth = outputs.predicted_depth

	# Interpolate to the original size
	prediction = torch.nn.functional.interpolate(
	predicted_depth.unsqueeze(1),
	size=resized_image.size[::-1],
	mode="bicubic",
	align_corners=False,
	).squeeze()

	# Convert prediction to a NumPy array
	depth_map = prediction.cpu().numpy()

	# Normalize the depth map to the range 0-1
	depth_norm = (depth_map - np.min(depth_map)) / (np.max(depth_map) - np.min(depth_map))

	num_blur_levels = 5
	blurred_layers = []

	for i in range(num_blur_levels):
	sigma = i * 0.5
	if sigma == 0:
	blurred = image_np
	else:
	blurred = cv2.GaussianBlur(image_np, (15, 15), sigmaX=sigma, sigmaY=sigma, borderType=cv2.BORDER_REPLICATE)
	blurred_layers.append(blurred)

	depth_indices = ((1 - depth_norm) * (num_blur_levels - 1)).astype(np.uint8)

	final_blurred_image = np.zeros_like(image_np)

	for y in range(image_np.shape[0]):
	for x in range(image_np.shape[1]):
	depth_index = depth_indices[y, x]
	final_blurred_image[y, x] = blurred_layers[depth_index][y, x]

	# Convert the final blurred image back to a PIL Image
	final_blurred_pil_image = Image.fromarray(final_blurred_image)

	return final_blurred_pil_image

	def process_image(image, blur_type, foreground_label='person'):
	"""Processes the image based on the selected blur type."""
	if blur_type == "Gaussian Blur":
	return apply_gaussian_blur(image, foreground_label=foreground_label)
	else:
	return apply_lens_blur(image)

	interface = gr.Interface(
	fn=process_image,
	inputs=[
	gr.Image(type="pil", label="Upload an Image"),
	gr.Radio(["Gaussian Blur", "Lens Blur"], label="Choose Blur Effect"),
	gr.Textbox(label="Foreground Label (for Gaussian Blur)", default="person")
	],
	outputs=[gr.Image(type="pil"), gr.Image(type="pil")],
	title="Gaussian & Lens Blur Effects",
	description="Upload an image and select either Gaussian blur (with foreground segmentation) or depth-based lens blur."
	)

	if __name__ == "__main__":
	interface.launch()