Spaces:

Ash2505
/

EEE515-HW3

Runtime error

App Files Files Community

EEE515-HW3 / app.py

Ash2505

Update app.py

323a842 verified 4 months ago

raw

history blame

6.95 kB

	import gradio as gr
	from PIL import Image, ImageFilter
	import matplotlib.pyplot as plt
	import torch
	import cv2
	import numpy as np
	from torchvision import transforms
	from transformers import AutoModelForImageSegmentation, DepthProImageProcessorFast, DepthProForDepthEstimation
	import requests

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	birefnet = AutoModelForImageSegmentation.from_pretrained('ZhengPeng7/BiRefNet', trust_remote_code=True)
	torch.set_float32_matmul_precision(['high', 'highest'][0])
	birefnet.to('cuda')
	birefnet.eval()
	birefnet.half()

	def extract_object(image, t1, t2):
	# Data settings
	image_size = (1024, 1024)
	transform_image = transforms.Compose([
	transforms.Resize(image_size),
	transforms.ToTensor(),
	transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
	])

	# image = Image.open(imagepath)
	image1 = image.copy()
	input_images = transform_image(image1).unsqueeze(0).to('cuda').half()

	# Prediction
	with torch.no_grad():
	preds = birefnet(input_images)[-1].sigmoid().cpu()
	pred = preds[0].squeeze()
	pred_pil = transforms.ToPILImage()(pred)
	mask = pred_pil.resize(image1.size)
	image1.putalpha(mask)

	blurredBg = cv2.GaussianBlur(np.array(imageResized), (0, 0), sigmaX=15, sigmaY=15)

	mask = np.array(result[1].convert("L"))
	_, maskBinary = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)
	img = cv2.cvtColor(np.array(imageResized), cv2.COLOR_RGB2BGR)

	maskInv = cv2.bitwise_not(maskBinary)
	maskInv3 = cv2.cvtColor(maskInv, cv2.COLOR_GRAY2BGR)

	foreground = cv2.bitwise_and(img, cv2.bitwise_not(maskInv3))
	background = cv2.bitwise_and(blurredBg, maskInv3)
	finalImg = cv2.add(cv2.cvtColor(foreground, cv2.COLOR_BGR2RGB), background)

	# plt.figure(figsize=(15, 5))
	# return image1, mask

	# def depth_estimation():
	imageProcessor = DepthProImageProcessorFast.from_pretrained("apple/DepthPro-hf")
	model = DepthProForDepthEstimation.from_pretrained("apple/DepthPro-hf").to(device)

	inputs = imageProcessor(images=imageResized, return_tensors="pt").to(device)

	with torch.no_grad():
	outputs = model(**inputs)

	post_processed_output = imageProcessor.post_process_depth_estimation(
	outputs, target_sizes=[(imageResized.height, imageResized.width)],
	)

	field_of_view = post_processed_output[0]["field_of_view"]
	focal_length = post_processed_output[0]["focal_length"]
	depth = post_processed_output[0]["predicted_depth"]
	depth = (depth - depth.min()) / (depth.max() - depth.min())
	depth = depth * 255.
	depth = depth.detach().cpu().numpy()
	# print(depth)
	depthImg = Image.fromarray(depth.astype("uint8"))

	# threshold1 = 255 / 20 # ~85
	# threshold2 = 2 * 255 / 3 # ~170

	threshold1 = (t1/10) * 255
	threshold2 = (t2/10) * 255

	# Precompute blurred versions for each region
	img_foreground = img.copy() # No blur for foreground
	img_middleground = cv2.GaussianBlur(img, (0, 0), sigmaX=7, sigmaY=7)
	img_background = cv2.GaussianBlur(img, (0, 0), sigmaX=15, sigmaY=15)

	# Create masks for each region (as float arrays for proper blending)
	mask_fg = (depth < threshold1).astype(np.float32)
	mask_mg = ((depth >= threshold1) & (depth < threshold2)).astype(np.float32)
	mask_bg = (depth >= threshold2).astype(np.float32)

	# Expand masks to 3 channels (H, W, 3)
	mask_fg = np.stack([mask_fg]*3, axis=-1)
	mask_mg = np.stack([mask_mg]*3, axis=-1)
	mask_bg = np.stack([mask_bg]*3, axis=-1)

	# Combine the images using the masks in a vectorized manner.
	final_img = (img_foreground * mask_fg +
	img_middleground * mask_mg +
	img_background * mask_bg).astype(np.uint8)

	# Convert the result back to RGB for display with matplotlib.
	final_img_rgb = cv2.cvtColor(final_img, cv2.COLOR_BGR2RGB)

	return image1, final_img

	# Visualization
	# plt.axis("off")
	# subplots for 3 images: original, segmented, mask

	# plt.figure(figsize=(15, 5))

	# image = Image.open('/content/drive/MyDrive/eee515-hw3/hw3-q24.jpg')
	# #resize the image to 512x512
	# imageResized = image.resize((512, 512))

	# result = extract_object(birefnet, imageResized)
	# plt.subplot(1, 3, 1)
	# plt.title("Original Resized Image")
	# plt.imshow(imageResized)

	# plt.subplot(1, 3, 2)
	# plt.title("Segmented Image")
	# plt.imshow(result[0])

	# plt.subplot(1, 3, 3)
	# plt.title("Mask")
	# plt.imshow(result[1], cmap="gray")
	# plt.show()

	# Create a Gradio interface


	def build_interface(image1, image2):
	"""Build UI for gradio app
	"""
	title = "Bokeh and Lens Blur"
	with gr.Blocks(theme=gr.themes.Soft(), title=title, fill_width=True) as interface:
	with gr.Row():
	# with gr.Column(scale=3):
	# with gr.Group():
	# input_text_box = gr.Textbox(
	# value=None,
	# label="Prompt",
	# lines=2,
	# )
	# # gr.Markdown("### Set the values for Middleground and Background")
	# # fg = gr.Slider(minimum=0, maximum=99, step=1, value=33, label="Middleground")
	# # mg = gr.Slider(minimum=0, maximum=99, step=1, value=66, label="Background")
	# with gr.Row():
	# submit_button = gr.Button("Submit", variant="primary")
	with gr.Column(scale=3):
	model3d = gr.Model3D(
	label="Output", height="45em", interactive=False
	)

	with gr.Column(scale=3):
	model3d = gr.Model3D(
	label="Output", height="45em", interactive=False
	)

	submit_button.click(
	handle_text_prompt,
	inputs=[
	input_text_box,
	variance
	],
	outputs=[
	model3d
	]
	)

	return interface

	# demo = gr.Interface(sepia, gr.Image(), "image")

	title = "Gaussian Blur Background App"
	description = (
	"Upload an image to apply a realistic background blur effect. "
	"The app segments the foreground using RMBG-2.0 and then applies a Gaussian "
	"blur (σ=15) to the background, simulating a video conferencing blur effect."
	)

	iface = gr.Interface(
	fn=apply_blur_effect,
	inputs=[gr.Image(type="pil", label="Input Image"), gr.Slider(minimum=0, maximum=40, step=1, value=33, label="Middleground"), gr.Slider(minimum=40, maximum=99, step=1, value=66, label="Background")],
	outputs=[gr.Image(type="pil", label="Bokeh Image"), gr.Image(type="pil", label="Lens Blur Image")],
	title=title,
	description=description,
	allow_flagging="never"
	)

	demo = build_interface()
	demo.queue(default_concurrency_limit=1)
	demo.launch()