Spaces:

naonauno
/

GroundBi

Paused

App Files Files Community

GroundBi / app.py

naonauno

Update app.py

a84d2fb verified 6 months ago

raw

history blame

3.46 kB

	import gradio as gr
	import torch
	import numpy as np
	import cv2
	from diffusers import StableDiffusionPipeline, UniPCMultistepScheduler
	from model import UNet2DConditionModelEx
	from pipeline import StableDiffusionControlLoraV3Pipeline
	from PIL import Image
	import os
	from huggingface_hub import login

	# Login using the token
	login(token=os.environ.get("HF_TOKEN"))

	# Initialize the models
	base_model = "runwayml/stable-diffusion-v1-5"
	dtype = torch.float16 # A100 works better with float16

	try:
	# Check if CUDA is available
	device = "cuda" if torch.cuda.is_available() else "cpu"
	except:
	device = "cpu"

	# Load the custom UNet
	unet = UNet2DConditionModelEx.from_pretrained(
	base_model,
	subfolder="unet",
	torch_dtype=dtype,
	device_map="auto" # Let the model handle device placement
	)

	# Add conditioning with ow-gbi-control-lora
	unet = unet.add_extra_conditions("ow-gbi-control-lora")

	# Create the pipeline with custom UNet
	pipe = StableDiffusionControlLoraV3Pipeline.from_pretrained(
	base_model,
	unet=unet,
	torch_dtype=dtype,
	device_map="auto" # Let the model handle device placement
	)

	# Use a faster scheduler
	pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)

	# Load the ControlLoRA weights
	pipe.load_lora_weights(
	"models",
	weight_name="40kHalf.safetensors"
	)

	def get_canny_image(image, low_threshold=100, high_threshold=200):
	if isinstance(image, Image.Image):
	image = np.array(image)

	if image.shape[2] == 4:
	image = image[..., :3]

	canny_image = cv2.Canny(image, low_threshold, high_threshold)
	canny_image = np.stack([canny_image] * 3, axis=-1)
	return Image.fromarray(canny_image)

	def generate_image(input_image, prompt, negative_prompt, guidance_scale, steps, low_threshold, high_threshold):
	canny_image = get_canny_image(input_image, low_threshold, high_threshold)

	with torch.no_grad():
	image = pipe(
	prompt=prompt,
	negative_prompt=negative_prompt,
	num_inference_steps=steps,
	guidance_scale=guidance_scale,
	image=canny_image,
	extra_condition_scale=1.0
	).images[0]

	return canny_image, image

	# Create the Gradio interface
	with gr.Blocks() as demo:
	with gr.Row():
	with gr.Column():
	input_image = gr.Image(label="Input Image", type="numpy")
	prompt = gr.Textbox(label="Prompt")
	negative_prompt = gr.Textbox(label="Negative Prompt")
	with gr.Row():
	low_threshold = gr.Slider(minimum=1, maximum=255, value=100, label="Canny Low Threshold")
	high_threshold = gr.Slider(minimum=1, maximum=255, value=200, label="Canny High Threshold")
	guidance_scale = gr.Slider(minimum=1, maximum=20, value=7.5, label="Guidance Scale")
	steps = gr.Slider(minimum=1, maximum=100, value=50, label="Steps")
	generate = gr.Button("Generate")

	with gr.Column():
	canny_output = gr.Image(label="Canny Edge Detection")
	result = gr.Image(label="Generated Image")

	generate.click(
	fn=generate_image,
	inputs=[
	input_image,
	prompt,
	negative_prompt,
	guidance_scale,
	steps,
	low_threshold,
	high_threshold
	],
	outputs=[canny_output, result]
	)

	demo.launch()