Spaces:

briaai
/

BRIA-4B-Adapt-ControlNet-Union

Running on Zero

App Files Files Community

BRIA-4B-Adapt-ControlNet-Union / app.py

TalHach61

Update app.py

f950302 verified 5 months ago

raw

history blame

10.2 kB

	import sys
	sys.path.append('./')

	import gradio as gr
	import spaces
	import os
	import sys
	import subprocess
	import numpy as np
	from PIL import Image
	import cv2
	import torch
	import random

	os.system("pip install -e ./controlnet_aux")

	from controlnet_aux import OpenposeDetector #, CannyDetector
	from depth_anything_v2.dpt import DepthAnythingV2

	from huggingface_hub import hf_hub_download

	from huggingface_hub import login
	hf_token = os.environ.get("HF_TOKEN")
	login(token=hf_token)

	MAX_SEED = np.iinfo(np.int32).max

	def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
	if randomize_seed:
	seed = random.randint(0, MAX_SEED)
	return seed

	DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
	model_configs = {
	'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
	'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
	'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
	'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
	}

	ratios_map = {
	0.5:{"width":704,"height":1408},
	0.57:{"width":768,"height":1344},
	0.68:{"width":832,"height":1216},
	0.72:{"width":832,"height":1152},
	0.78:{"width":896,"height":1152},
	0.82:{"width":896,"height":1088},
	0.88:{"width":960,"height":1088},
	0.94:{"width":960,"height":1024},
	1.00:{"width":1024,"height":1024},
	1.13:{"width":1088,"height":960},
	1.21:{"width":1088,"height":896},
	1.29:{"width":1152,"height":896},
	1.38:{"width":1152,"height":832},
	1.46:{"width":1216,"height":832},
	1.67:{"width":1280,"height":768},
	1.75:{"width":1344,"height":768},
	2.00:{"width":1408,"height":704}
	}
	ratios = np.array(list(ratios_map.keys()))

	encoder = 'vitl'
	model = DepthAnythingV2(**model_configs[encoder])
	filepath = hf_hub_download(repo_id=f"depth-anything/Depth-Anything-V2-Large", filename=f"depth_anything_v2_vitl.pth", repo_type="model")
	state_dict = torch.load(filepath, map_location="cpu")
	model.load_state_dict(state_dict)
	model = model.to(DEVICE).eval()

	from huggingface_hub import hf_hub_download
	import os
	import torch
	from diffusers.utils import load_image
	from controlnet_bria import BriaControlNetModel, BriaMultiControlNetModel
	from pipeline_bria_controlnet import BriaControlNetPipeline
	import PIL.Image as Image

	base_model = 'briaai/BRIA-4B-Adapt'
	controlnet_model = 'briaai/BRIA-4B-Adapt-ControlNet-Union'

	controlnet = BriaControlNetModel.from_pretrained(controlnet_model, torch_dtype=torch.bfloat16)
	controlnet = BriaMultiControlNetModel([controlnet])

	pipe = BriaControlNetPipeline.from_pretrained(base_model, controlnet=controlnet, torch_dtype=torch.bfloat16, trust_remote_code=True)
	pipe.to("cuda")

	mode_mapping = {
	"depth": 0,
	"canny": 1,
	"colorgrid": 2,
	"recolor": 3,
	"tile": 4,
	"pose": 5,
	}
	strength_mapping = {
	"depth": 1.0,
	"canny": 1.0,
	"colorgrid": 1.0,
	"recolor": 1.0,
	"tile": 1.0,
	"pose": 1.0,
	}
	open_pose = OpenposeDetector.from_pretrained("lllyasviel/Annotators")

	torch.backends.cuda.matmul.allow_tf32 = True
	pipe.enable_model_cpu_offload() # for saving memory

	def convert_from_image_to_cv2(img: Image) -> np.ndarray:
	return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)

	def convert_from_cv2_to_image(img: np.ndarray) -> Image:
	return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

	def extract_depth(image):
	image = np.asarray(image)
	depth = model.infer_image(image[:, :, ::-1])
	depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
	depth = depth.astype(np.uint8)
	gray_depth = Image.fromarray(depth).convert('RGB')
	return gray_depth

	def extract_openpose(img):
	processed_image_open_pose = open_pose(img, hand_and_face=True)
	return processed_image_open_pose

	def extract_canny(input_image):
	image = np.array(input_image)
	image = cv2.Canny(image, 100, 200)
	image = image[:, :, None]
	image = np.concatenate([image, image, image], axis=2)
	canny_image = Image.fromarray(image)
	return canny_image


	def convert_to_grayscale(image):
	image = convert_from_image_to_cv2(image)
	gray_image = convert_from_cv2_to_image(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY))
	return gray_image

	def tile_old(input_image, resolution=768):
	input_image = convert_from_image_to_cv2(input_image)
	H, W, C = input_image.shape
	H = float(H)
	W = float(W)
	k = float(resolution) / min(H, W)
	H *= k
	W *= k
	H = int(np.round(H / 16.0)) * 16
	W = int(np.round(W / 16.0)) * 16
	img = cv2.resize(input_image, (W, H), interpolation=cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA)
	img = convert_from_cv2_to_image(img)
	return img

	def tile(downscale_factor, input_image):
	control_image = input_image.resize((input_image.size[0] // downscale_factor, input_image.size[1] // downscale_factor)).resize(input_image.size, Image.NEAREST)
	return control_image

	def get_size(init_image):
	w,h=init_image.size
	curr_ratio = w/h
	ind = np.argmin(np.abs(curr_ratio-ratios))
	ratio = ratios[ind]
	chosen_ratio = ratios_map[ratio]
	w,h = chosen_ratio['width'], chosen_ratio['height']
	return w,h

	def resize_img(image):
	image = image.convert('RGB')
	w,h = get_size(image)
	resized_image = image.resize((w, h))
	return resized_image

	@spaces.GPU(duration=180)
	def infer(cond_in, image_in, prompt, inference_steps, guidance_scale, control_mode, control_strength, seed, progress=gr.Progress(track_tqdm=True)):
	control_mode_num = mode_mapping[control_mode]

	if cond_in is None:
	if image_in is not None:
	image_in = resize_img(load_image(image_in))
	if control_mode == "canny":
	control_image = extract_canny(image_in)
	elif control_mode == "depth":
	control_image = extract_depth(image_in)
	elif control_mode == "pose":
	control_image = extract_openpose(image_in)
	elif control_mode == "colorgrid":
	control_image = tile(64, image_in)
	elif control_mode == "recolor":
	control_image = convert_to_grayscale(image_in)
	elif control_mode == "tile":
	control_image = tile(16, image_in)
	else:
	control_image = resize_img(load_image(cond_in))

	width, height = control_image.size

	image = pipe(
	prompt,
	control_image=[control_image],
	control_mode=[control_mode_num],
	width=width,
	height=height,
	controlnet_conditioning_scale=[control_strength],
	num_inference_steps=inference_steps,
	guidance_scale=guidance_scale,
	generator=torch.manual_seed(seed),
	).images[0]

	torch.cuda.empty_cache()

	return image, control_image, gr.update(visible=True)


	css="""
	#col-container{
	margin: 0 auto;
	max-width: 1080px;
	}
	"""
	with gr.Blocks(css=css) as demo:
	with gr.Column(elem_id="col-container"):
	gr.Markdown("""
	# BRIA-4B-Adapt-ControlNet-Union
	A unified ControlNet for BRIA-4B-Adapt model from Bria.ai. BRIA-4B-Adapt improve the generation of humans and illustrations compared to BRIA 2.3 while still trained on licensed data, and so provides full legal liability coverage for copyright and privacy infringement. Model card: [BRIA-4B-Adapt-ControlNet-Union](https://huggingface.co/briaai/BRIA-4B-Adapt-ControlNet-Union). <br />
	""")

	with gr.Column():

	with gr.Row():
	with gr.Column():

	with gr.Row(equal_height=True):
	cond_in = gr.Image(label="Upload a processed control image", sources=["upload"], type="filepath")
	image_in = gr.Image(label="Extract condition from a reference image (Optional)", sources=["upload"], type="filepath")

	prompt = gr.Textbox(label="Prompt", value="best quality")

	with gr.Accordion("Controlnet"):
	control_mode = gr.Radio(
	["depth", "canny", "colorgrid", "recolor", "tile", "pose"], label="Mode", value="canny",
	info="select the control mode, one for all"
	)

	control_strength = gr.Slider(
	label="control strength",
	minimum=0,
	maximum=1.0,
	step=0.05,
	value=0.9,
	)

	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=42,
	)
	randomize_seed = gr.Checkbox(label="Randomize seed", value=True)

	with gr.Accordion("Advanced settings", open=False):
	with gr.Column():
	with gr.Row():
	inference_steps = gr.Slider(label="Inference steps", minimum=1, maximum=50, step=1, value=24)
	guidance_scale = gr.Slider(label="Guidance scale", minimum=1.0, maximum=10.0, step=0.1, value=3.5)

	submit_btn = gr.Button("Submit")

	with gr.Column():
	result = gr.Image(label="Result")
	processed_cond = gr.Image(label="Preprocessed Cond")

	submit_btn.click(
	fn=randomize_seed_fn,
	inputs=[seed, randomize_seed],
	outputs=seed,
	queue=False,
	api_name=False
	).then(
	fn = infer,
	inputs = [cond_in, image_in, prompt, inference_steps, guidance_scale, control_mode, control_strength, seed],
	outputs = [result, processed_cond],
	show_api=False
	)

	demo.queue(api_open=False)
	demo.launch()