Spaces:
Runtime error
Runtime error
lixiang46
commited on
Commit
·
08f2519
1
Parent(s):
88a3aee
add ipa
Browse files- app.py +41 -16
- image/bird.png +0 -3
- image/dog.png +0 -3
app.py
CHANGED
|
@@ -23,15 +23,21 @@ device = "cuda"
|
|
| 23 |
ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")
|
| 24 |
ckpt_dir_depth = snapshot_download(repo_id="Kwai-Kolors/Kolors-ControlNet-Depth")
|
| 25 |
ckpt_dir_canny = snapshot_download(repo_id="Kwai-Kolors/Kolors-ControlNet-Canny")
|
|
|
|
| 26 |
|
| 27 |
text_encoder = ChatGLMModel.from_pretrained(f'{ckpt_dir}/text_encoder', torch_dtype=torch.float16).half().to(device)
|
| 28 |
tokenizer = ChatGLMTokenizer.from_pretrained(f'{ckpt_dir}/text_encoder')
|
| 29 |
vae = AutoencoderKL.from_pretrained(f"{ckpt_dir}/vae", revision=None).half().to(device)
|
| 30 |
scheduler = EulerDiscreteScheduler.from_pretrained(f"{ckpt_dir}/scheduler")
|
| 31 |
unet = UNet2DConditionModel.from_pretrained(f"{ckpt_dir}/unet", revision=None).half().to(device)
|
|
|
|
| 32 |
controlnet_depth = ControlNetModel.from_pretrained(f"{ckpt_dir_depth}", revision=None).half().to(device)
|
| 33 |
controlnet_canny = ControlNetModel.from_pretrained(f"{ckpt_dir_canny}", revision=None).half().to(device)
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
pipe_depth = StableDiffusionXLControlNetImg2ImgPipeline(
|
| 36 |
vae=vae,
|
| 37 |
controlnet = controlnet_depth,
|
|
@@ -52,6 +58,14 @@ pipe_canny = StableDiffusionXLControlNetImg2ImgPipeline(
|
|
| 52 |
force_zeros_for_empty_prompt=False
|
| 53 |
)
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
@spaces.GPU
|
| 56 |
def process_canny_condition(image, canny_threods=[100,200]):
|
| 57 |
np_image = image.copy()
|
|
@@ -77,6 +91,7 @@ MAX_IMAGE_SIZE = 1024
|
|
| 77 |
@spaces.GPU
|
| 78 |
def infer_depth(prompt,
|
| 79 |
image = None,
|
|
|
|
| 80 |
negative_prompt = "nsfw,脸部阴影,低分辨率,jpeg伪影、模糊、糟糕,黑脸,霓虹灯",
|
| 81 |
seed = 397886929,
|
| 82 |
randomize_seed = False,
|
|
@@ -84,19 +99,22 @@ def infer_depth(prompt,
|
|
| 84 |
num_inference_steps = 50,
|
| 85 |
controlnet_conditioning_scale = 0.7,
|
| 86 |
control_guidance_end = 0.9,
|
| 87 |
-
strength = 1.0
|
|
|
|
| 88 |
):
|
| 89 |
if randomize_seed:
|
| 90 |
seed = random.randint(0, MAX_SEED)
|
| 91 |
generator = torch.Generator().manual_seed(seed)
|
| 92 |
init_image = resize_image(image, MAX_IMAGE_SIZE)
|
| 93 |
-
pipe = pipe_depth.to("cuda")
|
|
|
|
| 94 |
condi_img = process_depth_condition_midas( np.array(init_image), MAX_IMAGE_SIZE)
|
| 95 |
image = pipe(
|
| 96 |
prompt= prompt ,
|
| 97 |
image = init_image,
|
| 98 |
controlnet_conditioning_scale = controlnet_conditioning_scale,
|
| 99 |
control_guidance_end = control_guidance_end,
|
|
|
|
| 100 |
strength= strength ,
|
| 101 |
control_image = condi_img,
|
| 102 |
negative_prompt= negative_prompt ,
|
|
@@ -110,6 +128,7 @@ def infer_depth(prompt,
|
|
| 110 |
@spaces.GPU
|
| 111 |
def infer_canny(prompt,
|
| 112 |
image = None,
|
|
|
|
| 113 |
negative_prompt = "nsfw,脸部阴影,低分辨率,jpeg伪影、模糊、糟糕,黑脸,霓虹灯",
|
| 114 |
seed = 397886929,
|
| 115 |
randomize_seed = False,
|
|
@@ -117,19 +136,22 @@ def infer_canny(prompt,
|
|
| 117 |
num_inference_steps = 50,
|
| 118 |
controlnet_conditioning_scale = 0.7,
|
| 119 |
control_guidance_end = 0.9,
|
| 120 |
-
strength = 1.0
|
|
|
|
| 121 |
):
|
| 122 |
if randomize_seed:
|
| 123 |
seed = random.randint(0, MAX_SEED)
|
| 124 |
generator = torch.Generator().manual_seed(seed)
|
| 125 |
init_image = resize_image(image, MAX_IMAGE_SIZE)
|
| 126 |
-
pipe = pipe_canny.to("cuda")
|
|
|
|
| 127 |
condi_img = process_canny_condition(np.array(init_image))
|
| 128 |
image = pipe(
|
| 129 |
prompt= prompt ,
|
| 130 |
image = init_image,
|
| 131 |
controlnet_conditioning_scale = controlnet_conditioning_scale,
|
| 132 |
control_guidance_end = control_guidance_end,
|
|
|
|
| 133 |
strength= strength ,
|
| 134 |
control_image = condi_img,
|
| 135 |
negative_prompt= negative_prompt ,
|
|
@@ -141,17 +163,13 @@ def infer_canny(prompt,
|
|
| 141 |
return [condi_img, image], seed
|
| 142 |
|
| 143 |
canny_examples = [
|
| 144 |
-
["
|
| 145 |
-
"image/
|
| 146 |
-
["全景,一只可爱的白色小狗坐在杯子里,看向镜头,动漫风格,3d渲染,辛烷值渲染",
|
| 147 |
-
"image/dog.png"]
|
| 148 |
]
|
| 149 |
|
| 150 |
depth_examples = [
|
| 151 |
-
["
|
| 152 |
-
"image/
|
| 153 |
-
["一只颜色鲜艳的小鸟,高品质,超清晰,色彩鲜艳,超高分辨率,最佳品质,8k,高清,4K",
|
| 154 |
-
"image/bird.png"]
|
| 155 |
]
|
| 156 |
|
| 157 |
css="""
|
|
@@ -239,6 +257,13 @@ with gr.Blocks(css=css) as Kolors:
|
|
| 239 |
step=0.1,
|
| 240 |
value=1.0,
|
| 241 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
with gr.Row():
|
| 243 |
canny_button = gr.Button("Canny", elem_id="button")
|
| 244 |
depth_button = gr.Button("Depth", elem_id="button")
|
|
@@ -251,7 +276,7 @@ with gr.Blocks(css=css) as Kolors:
|
|
| 251 |
gr.Examples(
|
| 252 |
fn = infer_canny,
|
| 253 |
examples = canny_examples,
|
| 254 |
-
inputs = [prompt, image],
|
| 255 |
outputs = [result, seed_used],
|
| 256 |
label = "Canny"
|
| 257 |
)
|
|
@@ -259,20 +284,20 @@ with gr.Blocks(css=css) as Kolors:
|
|
| 259 |
gr.Examples(
|
| 260 |
fn = infer_depth,
|
| 261 |
examples = depth_examples,
|
| 262 |
-
inputs = [prompt, image],
|
| 263 |
outputs = [result, seed_used],
|
| 264 |
label = "Depth"
|
| 265 |
)
|
| 266 |
|
| 267 |
canny_button.click(
|
| 268 |
fn = infer_canny,
|
| 269 |
-
inputs = [prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
|
| 270 |
outputs = [result, seed_used]
|
| 271 |
)
|
| 272 |
|
| 273 |
depth_button.click(
|
| 274 |
fn = infer_depth,
|
| 275 |
-
inputs = [prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
|
| 276 |
outputs = [result, seed_used]
|
| 277 |
)
|
| 278 |
|
|
|
|
| 23 |
ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")
|
| 24 |
ckpt_dir_depth = snapshot_download(repo_id="Kwai-Kolors/Kolors-ControlNet-Depth")
|
| 25 |
ckpt_dir_canny = snapshot_download(repo_id="Kwai-Kolors/Kolors-ControlNet-Canny")
|
| 26 |
+
ckpt_dir_ipa = snapshot_download(repo_id="Kwai-Kolors/Kolors-IP-Adapter-Plus")
|
| 27 |
|
| 28 |
text_encoder = ChatGLMModel.from_pretrained(f'{ckpt_dir}/text_encoder', torch_dtype=torch.float16).half().to(device)
|
| 29 |
tokenizer = ChatGLMTokenizer.from_pretrained(f'{ckpt_dir}/text_encoder')
|
| 30 |
vae = AutoencoderKL.from_pretrained(f"{ckpt_dir}/vae", revision=None).half().to(device)
|
| 31 |
scheduler = EulerDiscreteScheduler.from_pretrained(f"{ckpt_dir}/scheduler")
|
| 32 |
unet = UNet2DConditionModel.from_pretrained(f"{ckpt_dir}/unet", revision=None).half().to(device)
|
| 33 |
+
|
| 34 |
controlnet_depth = ControlNetModel.from_pretrained(f"{ckpt_dir_depth}", revision=None).half().to(device)
|
| 35 |
controlnet_canny = ControlNetModel.from_pretrained(f"{ckpt_dir_canny}", revision=None).half().to(device)
|
| 36 |
|
| 37 |
+
image_encoder = CLIPVisionModelWithProjection.from_pretrained(f'{ckpt_dir_ipa}/weights/Kolors-IP-Adapter-Plus/image_encoder', ignore_mismatched_sizes=True).to(dtype=torch.float16, device=device)
|
| 38 |
+
ip_img_size = 336
|
| 39 |
+
clip_image_processor = CLIPImageProcessor(size=ip_img_size, crop_size=ip_img_size )
|
| 40 |
+
|
| 41 |
pipe_depth = StableDiffusionXLControlNetImg2ImgPipeline(
|
| 42 |
vae=vae,
|
| 43 |
controlnet = controlnet_depth,
|
|
|
|
| 58 |
force_zeros_for_empty_prompt=False
|
| 59 |
)
|
| 60 |
|
| 61 |
+
@spaces.GPU
|
| 62 |
+
def load_ipa(pipe):
|
| 63 |
+
if hasattr(pipe.unet, 'encoder_hid_proj'):
|
| 64 |
+
pipe.unet.text_encoder_hid_proj = pipe.unet.encoder_hid_proj
|
| 65 |
+
|
| 66 |
+
pipe.load_ip_adapter( f'{ckpt_dir_ipa}/weights/Kolors-IP-Adapter-Plus' , subfolder="", weight_name=["ip_adapter_plus_general.bin"])
|
| 67 |
+
return pipe
|
| 68 |
+
|
| 69 |
@spaces.GPU
|
| 70 |
def process_canny_condition(image, canny_threods=[100,200]):
|
| 71 |
np_image = image.copy()
|
|
|
|
| 91 |
@spaces.GPU
|
| 92 |
def infer_depth(prompt,
|
| 93 |
image = None,
|
| 94 |
+
ipa_img = None,
|
| 95 |
negative_prompt = "nsfw,脸部阴影,低分辨率,jpeg伪影、模糊、糟糕,黑脸,霓虹灯",
|
| 96 |
seed = 397886929,
|
| 97 |
randomize_seed = False,
|
|
|
|
| 99 |
num_inference_steps = 50,
|
| 100 |
controlnet_conditioning_scale = 0.7,
|
| 101 |
control_guidance_end = 0.9,
|
| 102 |
+
strength = 1.0,
|
| 103 |
+
ip_scale = 0.5,
|
| 104 |
):
|
| 105 |
if randomize_seed:
|
| 106 |
seed = random.randint(0, MAX_SEED)
|
| 107 |
generator = torch.Generator().manual_seed(seed)
|
| 108 |
init_image = resize_image(image, MAX_IMAGE_SIZE)
|
| 109 |
+
pipe = load_ipa(pipe_depth).to("cuda")
|
| 110 |
+
pipe.set_ip_adapter_scale([ip_scale])
|
| 111 |
condi_img = process_depth_condition_midas( np.array(init_image), MAX_IMAGE_SIZE)
|
| 112 |
image = pipe(
|
| 113 |
prompt= prompt ,
|
| 114 |
image = init_image,
|
| 115 |
controlnet_conditioning_scale = controlnet_conditioning_scale,
|
| 116 |
control_guidance_end = control_guidance_end,
|
| 117 |
+
ip_adapter_image=[ipa_img],
|
| 118 |
strength= strength ,
|
| 119 |
control_image = condi_img,
|
| 120 |
negative_prompt= negative_prompt ,
|
|
|
|
| 128 |
@spaces.GPU
|
| 129 |
def infer_canny(prompt,
|
| 130 |
image = None,
|
| 131 |
+
ipa_img = None,
|
| 132 |
negative_prompt = "nsfw,脸部阴影,低分辨率,jpeg伪影、模糊、糟糕,黑脸,霓虹灯",
|
| 133 |
seed = 397886929,
|
| 134 |
randomize_seed = False,
|
|
|
|
| 136 |
num_inference_steps = 50,
|
| 137 |
controlnet_conditioning_scale = 0.7,
|
| 138 |
control_guidance_end = 0.9,
|
| 139 |
+
strength = 1.0,
|
| 140 |
+
ip_scale = 0.5,
|
| 141 |
):
|
| 142 |
if randomize_seed:
|
| 143 |
seed = random.randint(0, MAX_SEED)
|
| 144 |
generator = torch.Generator().manual_seed(seed)
|
| 145 |
init_image = resize_image(image, MAX_IMAGE_SIZE)
|
| 146 |
+
pipe = load_ipa(pipe_canny).to("cuda")
|
| 147 |
+
pipe.set_ip_adapter_scale([ip_scale])
|
| 148 |
condi_img = process_canny_condition(np.array(init_image))
|
| 149 |
image = pipe(
|
| 150 |
prompt= prompt ,
|
| 151 |
image = init_image,
|
| 152 |
controlnet_conditioning_scale = controlnet_conditioning_scale,
|
| 153 |
control_guidance_end = control_guidance_end,
|
| 154 |
+
ip_adapter_image=[ipa_img],
|
| 155 |
strength= strength ,
|
| 156 |
control_image = condi_img,
|
| 157 |
negative_prompt= negative_prompt ,
|
|
|
|
| 163 |
return [condi_img, image], seed
|
| 164 |
|
| 165 |
canny_examples = [
|
| 166 |
+
["一个红色头发的女孩,唯美风景,清新明亮,斑驳的光影,最好的质量,超细节,8K画质",
|
| 167 |
+
"image/woman_2.png", "image/2.png"],
|
|
|
|
|
|
|
| 168 |
]
|
| 169 |
|
| 170 |
depth_examples = [
|
| 171 |
+
["一个漂亮的女孩,最好的质量,超细节,8K画质",
|
| 172 |
+
"image/1.png","image/woman_1.png"],
|
|
|
|
|
|
|
| 173 |
]
|
| 174 |
|
| 175 |
css="""
|
|
|
|
| 257 |
step=0.1,
|
| 258 |
value=1.0,
|
| 259 |
)
|
| 260 |
+
ip_scale = gr.Slider(
|
| 261 |
+
label="IP_Scale",
|
| 262 |
+
minimum=0.0,
|
| 263 |
+
maximum=1.0,
|
| 264 |
+
step=0.1,
|
| 265 |
+
value=0.5,
|
| 266 |
+
)
|
| 267 |
with gr.Row():
|
| 268 |
canny_button = gr.Button("Canny", elem_id="button")
|
| 269 |
depth_button = gr.Button("Depth", elem_id="button")
|
|
|
|
| 276 |
gr.Examples(
|
| 277 |
fn = infer_canny,
|
| 278 |
examples = canny_examples,
|
| 279 |
+
inputs = [prompt, image, ipa_image],
|
| 280 |
outputs = [result, seed_used],
|
| 281 |
label = "Canny"
|
| 282 |
)
|
|
|
|
| 284 |
gr.Examples(
|
| 285 |
fn = infer_depth,
|
| 286 |
examples = depth_examples,
|
| 287 |
+
inputs = [prompt, image, ipa_image],
|
| 288 |
outputs = [result, seed_used],
|
| 289 |
label = "Depth"
|
| 290 |
)
|
| 291 |
|
| 292 |
canny_button.click(
|
| 293 |
fn = infer_canny,
|
| 294 |
+
inputs = [prompt, image, ipa_image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength, ip_scale],
|
| 295 |
outputs = [result, seed_used]
|
| 296 |
)
|
| 297 |
|
| 298 |
depth_button.click(
|
| 299 |
fn = infer_depth,
|
| 300 |
+
inputs = [prompt, image, ipa_image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength, ip_scale],
|
| 301 |
outputs = [result, seed_used]
|
| 302 |
)
|
| 303 |
|
image/bird.png
DELETED
Git LFS Details
|
image/dog.png
DELETED
Git LFS Details
|