imagencpu / controlnet_pipeline.py
ovedrive's picture
merge controlnet
0443b19
raw
history blame
2.96 kB
import torch
import numpy as np
from PIL import Image
from transformers import pipeline
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
from diffusers.utils import load_image
import os
import huggingface_hub
import spaces
import config
class ControlNetPipeline:
def __init__(self):
"""Initialize the ControlNet pipeline with lazy loading"""
self.depth_estimator = None
self.pipe = None
self.controlnet = None
self.is_initialized = False
@spaces.GPU
def initialize(self):
"""Initialize the models with GPU acceleration"""
if self.is_initialized:
return
# Load depth estimator
self.depth_estimator = pipeline('depth-estimation')
# Load ControlNet model
self.controlnet = ControlNetModel.from_pretrained(
config.CONTROLNET_MODEL,
torch_dtype=torch.float16
)
# Load Stable Diffusion pipeline with ControlNet
self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
config.BASE_MODEL,
controlnet=self.controlnet,
safety_checker=None,
torch_dtype=torch.float16
)
# Use more efficient scheduler
self.pipe.scheduler = UniPCMultistepScheduler.from_config(self.pipe.scheduler.config)
# Enable memory optimizations
try:
self.pipe.enable_xformers_memory_efficient_attention()
except:
print("xformers not available, using default attention mechanism")
self.pipe.enable_model_cpu_offload()
self.is_initialized = True
@spaces.GPU
def process_image(self, image):
"""Process the input image to generate depth map"""
# Ensure model is initialized
if not self.is_initialized:
self.initialize()
# Generate depth map
depth = self.depth_estimator(image)['depth']
depth_array = np.array(depth)
depth_array = depth_array[:, :, None]
depth_array = np.concatenate([depth_array, depth_array, depth_array], axis=2)
depth_image = Image.fromarray(depth_array)
return depth_image
@spaces.GPU
def generate(self, prompt, image, negative_prompt=None, guidance_scale=7.5, num_inference_steps=20):
"""Generate an image using ControlNet with the provided prompt and input image"""
# Ensure model is initialized
if not self.is_initialized:
self.initialize()
# Process image to get depth map
depth_image = self.process_image(image)
# Generate the image
output = self.pipe(
prompt=prompt,
image=depth_image,
negative_prompt=negative_prompt,
guidance_scale=float(guidance_scale),
num_inference_steps=int(num_inference_steps)
)
return output.images[0]