Lifeinhockey commited on
Commit
6d38fd5
·
verified ·
1 Parent(s): ad96a1c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -707
app.py CHANGED
@@ -1,555 +1,65 @@
1
  import gradio as gr
2
  import numpy as np
3
- import torch
4
- from diffusers import (
5
- StableDiffusionPipeline,
6
- ControlNetModel,
7
- StableDiffusionControlNetPipeline,
8
- StableDiffusionControlNetImg2ImgPipeline,
9
- AutoPipelineForImage2Image,
10
- DDIMScheduler,
11
- UniPCMultistepScheduler)
12
- from transformers import pipeline
13
- from diffusers.utils import load_image, make_image_grid
14
- from peft import PeftModel, LoraConfig
15
- import os
16
- from PIL import Image
17
-
18
- MAX_SEED = np.iinfo(np.int32).max
19
- MAX_IMAGE_SIZE = 1024
20
- IP_ADAPTER = 'h94/IP-Adapter'
21
- WEIGHT_NAME = "ip-adapter_sd15.bin"
22
- WEIGHT_NAME_plus = "ip-adapter-plus_sd15.bin"
23
- WEIGHT_NAME_face = "ip-adapter-full-face_sd15.bin"
24
-
25
- model_default = "stable-diffusion-v1-5/stable-diffusion-v1-5"
26
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
27
- torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
28
-
29
- def get_lora_sd_pipeline(
30
- lora_dir='lora_man_animestyle',
31
- base_model_name_or_path=None,
32
- dtype=torch.float16,
33
- adapter_name="default"
34
- ):
35
- unet_sub_dir = os.path.join(lora_dir, "unet")
36
- text_encoder_sub_dir = os.path.join(lora_dir, "text_encoder")
37
-
38
- if os.path.exists(text_encoder_sub_dir) and base_model_name_or_path is None:
39
- config = LoraConfig.from_pretrained(text_encoder_sub_dir)
40
- base_model_name_or_path = config.base_model_name_or_path
41
-
42
- if base_model_name_or_path is None:
43
- raise ValueError("Укажите название базовой модели или путь к ней")
44
 
45
- pipe = StableDiffusionPipeline.from_pretrained(base_model_name_or_path, torch_dtype=dtype)
46
- before_params = pipe.unet.parameters()
47
- pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name)
48
- pipe.unet.set_adapter(adapter_name)
49
- after_params = pipe.unet.parameters()
50
-
51
- if os.path.exists(text_encoder_sub_dir):
52
- pipe.text_encoder = PeftModel.from_pretrained(pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name)
53
-
54
- if dtype in (torch.float16, torch.bfloat16):
55
- pipe.unet.half()
56
- pipe.text_encoder.half()
57
-
58
- return pipe
59
-
60
- def long_prompt_encoder(prompt, tokenizer, text_encoder, max_length=77):
61
- tokens = tokenizer(prompt, truncation=False, return_tensors="pt")["input_ids"]
62
- part_s = [tokens[:, i:i + max_length] for i in range(0, tokens.shape[1], max_length)]
63
- with torch.no_grad():
64
- embeds = [text_encoder(part.to(text_encoder.device))[0] for part in part_s]
65
- return torch.cat(embeds, dim=1)
66
 
67
- def align_embeddings(prompt_embeds, negative_prompt_embeds):
68
- max_length = max(prompt_embeds.shape[1], negative_prompt_embeds.shape[1])
69
- return torch.nn.functional.pad(prompt_embeds, (0, 0, 0, max_length - prompt_embeds.shape[1])), \
70
- torch.nn.functional.pad(negative_prompt_embeds, (0, 0, 0, max_length - negative_prompt_embeds.shape[1]))
71
 
72
- def preprocess_image(image, target_width, target_height, resize_to_224=False):
73
- if isinstance(image, np.ndarray):
74
- image = Image.fromarray(image)
 
75
 
76
- # Если resize_to_224=True, изменяем размер до 224x224
77
- if resize_to_224:
78
- image = image.resize((224, 224), Image.LANCZOS)
79
- else:
80
- image = image.resize((target_width, target_height), Image.LANCZOS)
81
 
82
- image = np.array(image).astype(np.float32) / 255.0 # Нормализация [0, 1]
83
- image = image[None].transpose(0, 3, 1, 2) # Преобразуем в (batch, channels, height, width)
84
- image = torch.from_numpy(image).to(device)
85
- return image
86
 
87
- def get_depth_map(image, depth_estimator):
88
- # Преобразуем изображение в PIL, если это необходимо
89
- if isinstance(image, np.ndarray):
90
- image = Image.fromarray(image)
91
- elif isinstance(image, torch.Tensor):
92
- image = Image.fromarray(image.cpu().numpy())
93
- # Получаем карту глубины
94
- depth_map = depth_estimator(image)["depth"]
95
- depth_map = np.array(depth_map)
96
- depth_map = depth_map[:, :, None] # Добавляем третье измерение
97
- depth_map = np.concatenate([depth_map, depth_map, depth_map], axis=2) # Преобразуем в 3 канала
98
- depth_map = torch.from_numpy(depth_map).float() / 255.0 # Нормализация [0, 1]
99
- depth_map = depth_map.permute(2, 0, 1) # Меняем порядок осей (C, H, W)
100
- return depth_map
101
-
102
- pipe_default = get_lora_sd_pipeline(lora_dir='lora_man_animestyle', base_model_name_or_path=model_default, dtype=torch_dtype).to(device)
103
 
104
- # ----------------------------------------------------------------------------------------------------------------------------------------------------
105
  def infer(
106
- prompt,
107
- negative_prompt,
108
- model=model_default,
109
- width=512,
110
- height=512,
111
- num_inference_steps=50,
112
- seed=4,
113
- guidance_scale=7.5,
114
- lora_scale=0.7,
115
- use_control_net=False, # Параметр для включения ControlNet
116
- control_mode=None, # Параметр для выбора режима ControlNet
117
- strength_cn=0.5, # Коэфф. зашумления ControlNet
118
- control_strength=0.5, # Сила влияния ControlNet
119
- cn_source_image=None, # Исходное изображение ControlNet
120
- control_image=None, # Контрольное изображение ControlNet
121
- use_ip_adapter=False, # Параметр для включения IP_adapter
122
- ip_adapter_mode=None, # Параметр для выбора режима IP_adapter
123
- strength_ip=0.5, # Коэфф. зашумления IP_adapter
124
- ip_adapter_strength=0.5,# Сила влияния IP_adapter
125
- controlnet_conditioning_scale=0.5, # Сила влияния ControlNet
126
- ip_source_image=None, # Исходное изображение IP_adapter
127
- ip_adapter_image=None, # Контрольное изображение IP_adapter
128
- progress=gr.Progress(track_tqdm=True)
129
  ):
130
 
131
- # Генерация изображений с Ip_Adapter ------------------------------------------------------------------------------------------------------------------
132
- if use_ip_adapter and ip_source_image is not None and ip_adapter_image is not None:
133
-
134
- if ip_adapter_mode == "pose_estimation":
135
-
136
- print('ip_adapter_mode = ', ip_adapter_mode)
137
-
138
- # Инициализация ControlNet
139
- controlnet_model_path = "lllyasviel/sd-controlnet-openpose"
140
- controlnet = ControlNetModel.from_pretrained(controlnet_model_path, torch_dtype=torch_dtype)
141
-
142
- generator = torch.Generator(device).manual_seed(seed)
143
-
144
- pipe_ip_adapter = StableDiffusionControlNetPipeline.from_pretrained(
145
- model_default,
146
- controlnet=controlnet,
147
- torch_dtype=torch_dtype
148
- ).to(device)
149
-
150
- # Загрузка IP-Adapter
151
- pipe_ip_adapter.load_ip_adapter(IP_ADAPTER, subfolder="models", weight_name=WEIGHT_NAME_plus)
152
- pipe_ip_adapter.set_ip_adapter_scale(ip_adapter_strength)
153
-
154
- # Преобразование изображений для IP-Adapter (размер 224x224)
155
- ip_source_image = preprocess_image(ip_source_image, width, height, resize_to_224=True)
156
- ip_adapter_image = preprocess_image(ip_adapter_image, width, height, resize_to_224=True)
157
-
158
- # Создаём пайплайн IP_adapter с LoRA, если он ещё не создан
159
- if not hasattr(pipe_ip_adapter, 'lora_loaded') or not pipe_ip_adapter.lora_loaded:
160
- # Загружаем LoRA для UNet
161
- pipe_ip_adapter.unet = PeftModel.from_pretrained(
162
- pipe_ip_adapter.unet,
163
- 'lora_man_animestyle/unet',
164
- adapter_name="default"
165
- )
166
- pipe_ip_adapter.unet.set_adapter("default")
167
-
168
- # Загружаем LoRA для Text Encoder, если она существует
169
- text_encoder_lora_path = 'lora_man_animestyle/text_encoder'
170
- if os.path.exists(text_encoder_lora_path):
171
- pipe_ip_adapter.text_encoder = PeftModel.from_pretrained(
172
- pipe_ip_adapter.text_encoder,
173
- text_encoder_lora_path,
174
- adapter_name="default"
175
- )
176
- pipe_ip_adapter.text_encoder.set_adapter("default")
177
-
178
- # Объединяем LoRA с основной моделью
179
- pipe_ip_adapter.fuse_lora(lora_scale=lora_scale)
180
- pipe_ip_adapter.lora_loaded = True # Помечаем, что LoRA загружена
181
-
182
- # Убедимся, что параметры имеют тип float
183
- ip_adapter_strength = float(ip_adapter_strength)
184
- controlnet_conditioning_scale = float(controlnet_conditioning_scale)
185
-
186
- # Используем IP-Adapter с LoRA
187
- prompt_embeds = long_prompt_encoder(prompt, pipe_ip_adapter.tokenizer, pipe_ip_adapter.text_encoder)
188
- negative_prompt_embeds = long_prompt_encoder(negative_prompt, pipe_ip_adapter.tokenizer, pipe_ip_adapter.text_encoder)
189
- prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
190
- image = pipe_ip_adapter(
191
- prompt_embeds=prompt_embeds,
192
- negative_prompt_embeds=negative_prompt_embeds,
193
- image=ip_adapter_image,
194
- ip_adapter_image=ip_source_image,
195
- strength=strength_ip,
196
- width=width,
197
- height=height,
198
- num_inference_steps=num_inference_steps,
199
- guidance_scale=guidance_scale,
200
- controlnet_conditioning_scale=controlnet_conditioning_scale,
201
- generator=generator,
202
- ).images[0]
203
- else:
204
-
205
- if ip_adapter_mode == "edge_detection":
206
-
207
- print('ip_adapter_mode = ', ip_adapter_mode)
208
-
209
- # Инициализация ControlNet
210
- controlnet_model_path = "lllyasviel/control_v11f1p_sd15_depth"
211
- controlnet = ControlNetModel.from_pretrained(controlnet_model_path, torch_dtype=torch_dtype)
212
-
213
- generator = torch.Generator(device).manual_seed(seed)
214
-
215
- pipe_ip_adapter = StableDiffusionControlNetPipeline.from_pretrained(
216
- model_default,
217
- controlnet=controlnet,
218
- torch_dtype=torch_dtype
219
- ).to(device)
220
-
221
- # Загрузка IP-Adapter
222
- #pipe_ip_adapter.load_ip_adapter(IP_ADAPTER, subfolder="models", weight_name=WEIGHT_NAME_face)
223
- pipe_ip_adapter.load_ip_adapter(IP_ADAPTER, subfolder="models", weight_name=WEIGHT_NAME_plus)
224
- pipe_ip_adapter.set_ip_adapter_scale(ip_adapter_strength)
225
-
226
- # Преобразование изображений для IP-Adapter (размер 224x224)
227
- ip_source_image = preprocess_image(ip_source_image, width, height, resize_to_224=True)
228
- ip_adapter_image = preprocess_image(ip_adapter_image, width, height, resize_to_224=True)
229
-
230
- # Создаём пайплайн IP_adapter с LoRA, если он ещё не создан
231
- if not hasattr(pipe_ip_adapter, 'lora_loaded') or not pipe_ip_adapter.lora_loaded:
232
- # Загружаем LoRA для UNet
233
- pipe_ip_adapter.unet = PeftModel.from_pretrained(
234
- pipe_ip_adapter.unet,
235
- 'lora_man_animestyle/unet',
236
- adapter_name="default"
237
- )
238
- pipe_ip_adapter.unet.set_adapter("default")
239
-
240
- # Загружаем LoRA для Text Encoder, если она существует
241
- text_encoder_lora_path = 'lora_man_animestyle/text_encoder'
242
- if os.path.exists(text_encoder_lora_path):
243
- pipe_ip_adapter.text_encoder = PeftModel.from_pretrained(
244
- pipe_ip_adapter.text_encoder,
245
- text_encoder_lora_path,
246
- adapter_name="default"
247
- )
248
- pipe_ip_adapter.text_encoder.set_adapter("default")
249
-
250
- # Объединяем LoRA с основной моделью
251
- pipe_ip_adapter.fuse_lora(lora_scale=lora_scale)
252
- pipe_ip_adapter.lora_loaded = True # Помечаем, что LoRA загружена
253
-
254
- # Убедимся, что параметры имеют тип float
255
- ip_adapter_strength = float(ip_adapter_strength)
256
- controlnet_conditioning_scale = float(controlnet_conditioning_scale)
257
-
258
- # Используем IP-Adapter с LoRA
259
- prompt_embeds = long_prompt_encoder(prompt, pipe_ip_adapter.tokenizer, pipe_ip_adapter.text_encoder)
260
- negative_prompt_embeds = long_prompt_encoder(negative_prompt, pipe_ip_adapter.tokenizer, pipe_ip_adapter.text_encoder)
261
- prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
262
-
263
- # scale = { # по умолчанию в остальных блоках везде 0.
264
- # "down": {
265
- # "block_0": [0.0, 1.0],
266
- # "block_1": [0.0, 1.0],
267
- # },
268
- # "up": {
269
- # "block_0": [0.0, 1.0, 0.0],
270
- # "block_1": [0.0, 1.0, 0.0],
271
- # },
272
- # }
273
- # scale = {
274
- # "down": {"block_2": [0.0, 1.0]},
275
- # "up": {"block_0": [0.0, 1.0, 0.0]},
276
- # }
277
- # pipe_ip_adapter.set_ip_adapter_scale(scale)
278
-
279
- image = pipe_ip_adapter(
280
- prompt_embeds=prompt_embeds,
281
- negative_prompt_embeds=negative_prompt_embeds,
282
- image=ip_adapter_image,
283
- ip_adapter_image=ip_source_image,
284
- strength=strength_ip,
285
- width=width,
286
- height=height,
287
- num_inference_steps=num_inference_steps,
288
- guidance_scale=guidance_scale,
289
- controlnet_conditioning_scale=controlnet_conditioning_scale,
290
- generator=generator,
291
- ).images[0]
292
- else:
293
-
294
- if ip_adapter_mode == "depth_map":
295
-
296
- print('ip_adapter_mode = ', ip_adapter_mode)
297
-
298
- # Убедимся, что параметры имеют тип float
299
- controlnet_conditioning_scale = float(controlnet_conditioning_scale)
300
-
301
- # Инициализация ControlNet
302
- controlnet_model_path = "lllyasviel/control_v11f1p_sd15_depth"
303
- controlnet = ControlNetModel.from_pretrained(controlnet_model_path, torch_dtype=torch_dtype)
304
-
305
- generator = torch.Generator(device).manual_seed(seed)
306
-
307
- # Преобразование изображений для IP-Adapter (размер 224x224)
308
- ip_source_image = preprocess_image(ip_source_image, width, height, resize_to_224=True)
309
- ip_adapter_image = preprocess_image(ip_adapter_image, width, height, resize_to_224=True)
310
-
311
- pipe_ip_adapter = StableDiffusionControlNetPipeline.from_pretrained(
312
- model_default,
313
- controlnet=controlnet,
314
- torch_dtype=torch_dtype
315
- ).to(device)
316
- pipe_ip_adapter.load_ip_adapter(IP_ADAPTER, subfolder="models", weight_name=WEIGHT_NAME)
317
-
318
- pipe_ip_adapter.set_ip_adapter_scale(ip_adapter_strength)
319
- image = pipe_ip_adapter(
320
- prompt=prompt,
321
- negative_prompt=negative_prompt,
322
- image=ip_source_image,
323
- width=width,
324
- height=height,
325
- ip_adapter_image=ip_adapter_image,
326
- num_inference_steps=num_inference_steps,
327
- strength=strength_ip,
328
- guidance_scale=guidance_scale,
329
- controlnet_conditioning_scale=controlnet_conditioning_scale,
330
- generator=generator,
331
- ).images[0]
332
- else:
333
-
334
- if ip_adapter_mode == "face_model":
335
-
336
- print('ip_adapter_mode = ', ip_adapter_mode)
337
-
338
- # Преобразование изображений для IP-Adapter (размер 224x224)
339
- ip_source_image = preprocess_image(ip_source_image, width, height, resize_to_224=True)
340
- ip_adapter_image = preprocess_image(ip_adapter_image, width, height, resize_to_224=True)
341
-
342
- pipe_ip_adapter = StableDiffusionPipeline.from_pretrained(
343
- model_default,
344
- torch_dtype=torch_dtype,
345
- ).to(device)
346
-
347
- pipe_ip_adapter.scheduler = DDIMScheduler.from_config(pipe_ip_adapter.scheduler.config)
348
- pipe_ip_adapter.load_ip_adapter(IP_ADAPTER, subfolder="models", weight_name=WEIGHT_NAME_face)
349
-
350
- generator = torch.Generator(device).manual_seed(seed)
351
-
352
- pipe_ip_adapter.set_ip_adapter_scale(ip_adapter_strength)
353
- image = pipe_ip_adapter(
354
- prompt=prompt,
355
- negative_prompt=negative_prompt,
356
- ip_adapter_image=ip_adapter_image,
357
- width=width,
358
- height=height,
359
- guidance_scale=guidance_scale,
360
- num_inference_steps=num_inference_steps,
361
- generator=generator,
362
- ).images[0]
363
- else:
364
- # Генерация изображений с ControlNet ----------------------------------------------------------------------------------------------------------------
365
-
366
- if use_control_net and control_image is not None and cn_source_image is not None:
367
-
368
- if control_mode == "pose_estimation":
369
-
370
- print('control_mode = ', control_mode)
371
-
372
- # Инициализация ControlNet
373
- controlnet_model_path = "lllyasviel/sd-controlnet-openpose"
374
- controlnet = ControlNetModel.from_pretrained(controlnet_model_path, torch_dtype=torch_dtype)
375
-
376
- generator = torch.Generator(device).manual_seed(seed)
377
-
378
- pipe_controlnet = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
379
- model_default,
380
- controlnet=controlnet,
381
- torch_dtype=torch_dtype
382
- ).to(device)
383
-
384
- # Преобразуем изображения
385
- cn_source_image = preprocess_image(cn_source_image, width, height)
386
- control_image = preprocess_image(control_image, width, height)
387
-
388
- # Создаём пайплайн ControlNet с LoRA, если он ещё не создан
389
- if not hasattr(pipe_controlnet, 'lora_loaded') or not pipe_controlnet.lora_loaded:
390
- # Загружаем LoRA для UNet
391
- pipe_controlnet.unet = PeftModel.from_pretrained(
392
- pipe_controlnet.unet,
393
- 'lora_man_animestyle/unet',
394
- adapter_name="default"
395
- )
396
- pipe_controlnet.unet.set_adapter("default")
397
-
398
- # Загружаем LoRA для Text Encoder, если она существует
399
- text_encoder_lora_path = 'lora_man_animestyle/text_encoder'
400
- if os.path.exists(text_encoder_lora_path):
401
- pipe_controlnet.text_encoder = PeftModel.from_pretrained(
402
- pipe_controlnet.text_encoder,
403
- text_encoder_lora_path,
404
- adapter_name="default"
405
- )
406
- pipe_controlnet.text_encoder.set_adapter("default")
407
-
408
- # Объединяем LoRA с основной моделью
409
- pipe_controlnet.fuse_lora(lora_scale=lora_scale)
410
- pipe_controlnet.lora_loaded = True # Помечаем, что LoRA загружена
411
-
412
- # Убедимся, что control_strength имеет тип float
413
- control_strength = float(control_strength)
414
- #strength_sn = float(strength_sn)
415
-
416
- # Используем ControlNet с LoRA
417
- prompt_embeds = long_prompt_encoder(prompt, pipe_controlnet.tokenizer, pipe_controlnet.text_encoder)
418
- negative_prompt_embeds = long_prompt_encoder(negative_prompt, pipe_controlnet.tokenizer, pipe_controlnet.text_encoder)
419
- prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
420
- image = pipe_controlnet(
421
- prompt_embeds=prompt_embeds,
422
- negative_prompt_embeds=negative_prompt_embeds,
423
- image=cn_source_image,
424
- control_image=control_image,
425
- strength=strength_cn,
426
- width=width,
427
- height=height,
428
- num_inference_steps=num_inference_steps,
429
- guidance_scale=guidance_scale,
430
- controlnet_conditioning_scale=control_strength,
431
- generator=generator
432
- ).images[0]
433
- else:
434
 
435
- if control_mode == "edge_detection":
436
 
437
- print('control_mode = ', control_mode)
 
 
 
 
 
 
 
 
438
 
439
- controlnet_model_path = "lllyasviel/sd-controlnet-canny"
440
- controlnet = ControlNetModel.from_pretrained(controlnet_model_path, torch_dtype=torch_dtype, use_safetensors=True)
441
 
442
- generator = torch.Generator(device).manual_seed(seed)
443
-
444
- pipe_controlnet = StableDiffusionControlNetPipeline.from_pretrained(
445
- model_default,
446
- controlnet=controlnet,
447
- torch_dtype=torch_dtype,
448
- use_safetensors=True
449
- ).to(device)
450
-
451
- pipe_controlnet.scheduler = UniPCMultistepScheduler.from_config(pipe_controlnet.scheduler.config)
452
-
453
- # Преобразуем изображения
454
- cn_source_image = preprocess_image(cn_source_image, width, height)
455
- control_image = preprocess_image(control_image, width, height)
456
-
457
- image = pipe_controlnet(
458
- prompt=prompt,
459
- negative_prompt=negative_prompt,
460
- image=cn_source_image,
461
- control_image=control_image,
462
- strength=strength_cn,
463
- width=width,
464
- height=height,
465
- num_inference_steps=num_inference_steps,
466
- guidance_scale=guidance_scale,
467
- controlnet_conditioning_scale=control_strength,
468
- generator=generator
469
- ).images[0]
470
- else:
471
-
472
- if control_mode == "depth_map":
473
-
474
- print('control_mode = ', control_mode)
475
-
476
- depth_estimator = pipeline("depth-estimation")
477
- depth_map = get_depth_map(control_image, depth_estimator).unsqueeze(0).half().to(device)
478
-
479
- controlnet_model_path = "lllyasviel/control_v11f1p_sd15_depth"
480
- controlnet = ControlNetModel.from_pretrained(controlnet_model_path, torch_dtype=torch_dtype, use_safetensors=True)
481
-
482
- generator = torch.Generator(device).manual_seed(seed)
483
-
484
- pipe_controlnet = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
485
- model_default,
486
- controlnet=controlnet,
487
- torch_dtype=torch_dtype,
488
- use_safetensors=True
489
- ).to(device)
490
-
491
- pipe_controlnet.scheduler = UniPCMultistepScheduler.from_config(pipe_controlnet.scheduler.config)
492
-
493
- image = pipe_controlnet(
494
- prompt=prompt,
495
- negative_prompt=negative_prompt,
496
- image=control_image,
497
- control_image=depth_map,
498
- width=width,
499
- height=height,
500
- num_inference_steps=num_inference_steps,
501
- guidance_scale=guidance_scale,
502
- generator=generator
503
- ).images[0]
504
- else:
505
- # Генерация изображений с LORA без ControlNet и IP_Adapter ---------------------------------------------------------------------------------------------
506
-
507
- # Инициализация ControlNet
508
- controlnet_model_path = "lllyasviel/sd-controlnet-openpose"
509
- controlnet = ControlNetModel.from_pretrained(controlnet_model_path, torch_dtype=torch_dtype)
510
-
511
- generator = torch.Generator(device).manual_seed(seed)
512
-
513
- if model != model_default:
514
- pipe = StableDiffusionPipeline.from_pretrained(model, torch_dtype=torch_dtype).to(device)
515
- prompt_embeds = long_prompt_encoder(prompt, pipe.tokenizer, pipe.text_encoder)
516
- negative_prompt_embeds = long_prompt_encoder(negative_prompt, pipe.tokenizer, pipe.text_encoder)
517
- prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
518
- else:
519
- pipe = pipe_default
520
- prompt_embeds = long_prompt_encoder(prompt, pipe.tokenizer, pipe.text_encoder)
521
- negative_prompt_embeds = long_prompt_encoder(negative_prompt, pipe.tokenizer, pipe.text_encoder)
522
- prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
523
- pipe.fuse_lora(lora_scale=lora_scale)
524
-
525
- params = {
526
- 'prompt_embeds': prompt_embeds,
527
- 'negative_prompt_embeds': negative_prompt_embeds,
528
- 'guidance_scale': guidance_scale,
529
- 'num_inference_steps': num_inference_steps,
530
- 'width': width,
531
- 'height': height,
532
- 'generator': generator,
533
- }
534
-
535
- image = pipe(**params).images[0]
536
-
537
- return image
538
- # ---------------------------------------------------------------------------------------------------------------------------------------------
539
 
540
  examples = [
541
- "A young man in anime style. The image is characterized by high definition and resolution. Handsome, thoughtful man, attentive eyes. The man is depicted in the foreground, close-up or in the middle. High-quality images of the face, eyes, nose, lips, hands and clothes. The background and background are blurred and indistinct. The play of light and shadow is visible on the face and clothes.",
542
- "A man runs through the park against the background of trees. The man's entire figure, face, arms and legs are visible. Anime style. The best quality.",
543
- "The smiling man. His face and hands are visible. Anime style. The best quality.",
544
- "The smiling girl. Anime style. Best quality, high quality.",
545
- "lego batman and robin. Rich and vibrant colors.",
546
- "A photo of Pushkin as a hockey player in uniform with a stick, playing hockey on the ice arena in the NHL and scoring a goal.",
547
- ]
548
 
549
  examples_negative = [
550
- "Blurred details, low resolution, bad anatomy, no face visible, poor image of a man's face, poor quality, artifacts, black and white image.",
551
- "Monochrome, lowres, bad anatomy, worst quality, low quality",
552
- "lowres, bad anatomy, worst quality, low quality, black and white image.",
553
  ]
554
 
555
  css = """
@@ -564,67 +74,54 @@ available_models = [
564
  "CompVis/stable-diffusion-v1-4",
565
  ]
566
 
567
- # -------------------------------------------------------------------------------------------------------------------------------------------------
568
  with gr.Blocks(css=css) as demo:
 
569
  with gr.Column(elem_id="col-container"):
570
  gr.Markdown(" # Text-to-Image Gradio Template from V. Gorsky")
571
 
572
- with gr.Row():
573
- model = gr.Dropdown(
574
- label="Model Selection",
575
- choices=available_models,
576
- value="stable-diffusion-v1-5/stable-diffusion-v1-5",
577
- interactive=True
578
- )
579
-
580
- prompt = gr.Textbox(
581
  label="Prompt",
 
582
  max_lines=1,
583
  placeholder="Enter your prompt",
 
584
  )
585
 
586
- negative_prompt = gr.Textbox(
587
  label="Negative prompt",
588
  max_lines=1,
589
  placeholder="Enter a negative prompt",
 
590
  )
591
-
592
- with gr.Row():
593
- lora_scale = gr.Slider(
594
- label="LoRA scale",
595
- minimum=0.0,
596
- maximum=1.0,
597
- step=0.01,
598
- value=0.7,
599
- )
600
-
601
- with gr.Row():
602
- guidance_scale = gr.Slider(
603
- label="Guidance scale",
604
- minimum=0.0,
605
- maximum=10.0,
606
- step=0.01,
607
- value=7.5,
608
- )
609
 
610
- with gr.Row():
611
- seed = gr.Slider(
612
  label="Seed",
613
  minimum=0,
614
  maximum=MAX_SEED,
615
  step=1,
616
- value=4,
617
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
618
 
619
- with gr.Row():
620
- num_inference_steps = gr.Slider(
621
- label="Number of inference steps",
622
- minimum=1,
623
- maximum=100,
624
- step=1,
625
- value=50,
626
- )
627
-
628
  with gr.Accordion("Advanced Settings", open=False):
629
  with gr.Row():
630
  width = gr.Slider(
@@ -632,160 +129,39 @@ with gr.Blocks(css=css) as demo:
632
  minimum=256,
633
  maximum=MAX_IMAGE_SIZE,
634
  step=32,
635
- value=512,
636
  )
637
-
638
- with gr.Row():
639
  height = gr.Slider(
640
  label="Height",
641
  minimum=256,
642
  maximum=MAX_IMAGE_SIZE,
643
  step=32,
644
- value=512,
645
  )
646
 
647
- # ControlNet -----------------------------------------------------------------------------------------------
648
- with gr.Blocks():
649
- with gr.Row():
650
- use_control_net = gr.Checkbox(
651
- label="Use ControlNet",
652
- value=False,
653
- )
654
-
655
- with gr.Column(visible=False) as control_net_options:
656
- strength_cn = gr.Slider(
657
- label="Strength",
658
- minimum=0.0,
659
- maximum=1.0,
660
- value=0.5,
661
- step=0.01,
662
- interactive=True,
663
- )
664
-
665
- control_strength = gr.Slider(
666
- label="Control Strength",
667
- minimum=0.0,
668
- maximum=1.0,
669
- value=0.5,
670
- step=0.01,
671
- interactive=True,
672
- )
673
-
674
- control_mode = gr.Dropdown(
675
- label="Control Mode",
676
- choices=[
677
- "pose_estimation",
678
- "edge_detection",
679
- "depth_map",
680
- ],
681
- value="pose_estimation",
682
- interactive=True,
683
- )
684
-
685
- cn_source_image = gr.Image(label="Upload Source Image")
686
-
687
- control_image = gr.Image(label="Upload Control Net Image")
688
-
689
- use_control_net.change(
690
- fn=lambda x: gr.update(visible=x),
691
- inputs=use_control_net,
692
- outputs=control_net_options
693
- )
694
-
695
- # IP_Adapter ------------------------------------------------------------------------------------------------
696
- with gr.Blocks():
697
- with gr.Row():
698
- use_ip_adapter = gr.Checkbox(
699
- label="Use IP_Adapter",
700
- value=False,
701
- )
702
-
703
- with gr.Column(visible=False) as ip_adapter_options:
704
- strength_ip = gr.Slider(
705
- label="Strength",
706
- minimum=0.0,
707
- maximum=1.0,
708
- value=0.5,
709
- step=0.01,
710
- interactive=True,
711
- )
712
-
713
- ip_adapter_strength = gr.Slider(
714
- label="IP_Adapter Strength",
715
- minimum=0.0,
716
- maximum=1.0,
717
- value=0.5,
718
- step=0.01,
719
- interactive=True,
720
- )
721
-
722
- controlnet_conditioning_scale = gr.Slider(
723
- label="Controlnet conditioning scale",
724
- minimum=0.0,
725
- maximum=1.0,
726
- value=0.5,
727
- step=0.01,
728
- interactive=True,
729
- )
730
-
731
- ip_adapter_mode = gr.Dropdown(
732
- label="Ip_Adapter Mode",
733
- choices=[
734
- "pose_estimation",
735
- "edge_detection",
736
- "depth_map",
737
- "face_model"
738
- ],
739
- value="pose_estimation",
740
- interactive=True,
741
- )
742
-
743
- ip_source_image = gr.Image(label="Upload Source Image")
744
-
745
- ip_adapter_image = gr.Image(label="Upload IP_Adapter Image")
746
-
747
- use_ip_adapter.change(
748
- fn=lambda x: gr.update(visible=x),
749
- inputs=use_ip_adapter,
750
- outputs=ip_adapter_options
751
- )
752
- # ---------------------------------------------------------------------------------------------------------
753
 
754
- gr.Examples(examples=examples, inputs=[prompt], label="Examples for prompt:")
755
- gr.Examples(examples=examples_negative, inputs=[negative_prompt], label="Examples for negative prompt:")
756
 
757
- run_button = gr.Button("Run", scale=1, variant="primary")
758
- result = gr.Image(label="Result", show_label=False)
759
-
760
  gr.on(
761
  triggers=[run_button.click, prompt.submit],
762
  fn=infer,
763
  inputs=[
 
764
  prompt,
765
  negative_prompt,
766
- model,
767
  width,
768
  height,
769
- num_inference_steps,
770
- seed,
771
  guidance_scale,
772
- lora_scale,
773
- use_control_net, # Параметр для включения ControlNet
774
- control_mode, # Параметр для выбора режима ControlNet
775
- strength_cn, # Коэфф. зашумления ControlNet
776
- control_strength, # Сила влияния ControlNet
777
- cn_source_image, # Исходное изображение ControlNet
778
- control_image, # Контрольное изображение ControlNet
779
- use_ip_adapter, # Параметр для включения IP_adapter
780
- ip_adapter_mode, # Параметр для выбора режима IP_adapter
781
- strength_ip, # Коэфф. зашумления IP_adapter
782
- ip_adapter_strength,# Сила влияния IP_adapter
783
- controlnet_conditioning_scale, # Сила влияния ControlNet
784
- ip_source_image, # Исходное изображение IP_adapter
785
- ip_adapter_image, # Контрольное изображение IP_adapter
786
  ],
787
- outputs=[result],
788
  )
789
 
790
  if __name__ == "__main__":
791
  demo.launch()
 
 
1
  import gradio as gr
2
  import numpy as np
3
+ import random
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ from diffusers import DiffusionPipeline
6
+ import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ device = "cuda" if torch.cuda.is_available() else "cpu"
9
+ model_repo_id = "stabilityai/sdxl-turbo" # Replace to the model you would like to use
 
 
10
 
11
+ if torch.cuda.is_available():
12
+ torch_dtype = torch.float16
13
+ else:
14
+ torch_dtype = torch.float32
15
 
16
+ pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
17
+ pipe = pipe.to(device)
 
 
 
18
 
19
+ MAX_SEED = np.iinfo(np.int32).max
20
+ MAX_IMAGE_SIZE = 1024
 
 
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ # @spaces.GPU #[uncomment to use ZeroGPU]
24
  def infer(
25
+ model,
26
+ prompt,
27
+ negative_prompt,
28
+ seed,
29
+ width,
30
+ height,
31
+ guidance_scale,
32
+ num_inference_steps,
33
+ progress=gr.Progress(track_tqdm=True),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  ):
35
 
36
+ global model_repo_id
37
+ if model != model_repo_id:
38
+ print(model, model_repo_id)
39
+ pipe = DiffusionPipeline.from_pretrained(model, torch_dtype=torch_dtype)
40
+ pipe = pipe.to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
+ generator = torch.Generator().manual_seed(seed)
43
 
44
+ image = pipe(
45
+ prompt=prompt,
46
+ negative_prompt=negative_prompt,
47
+ guidance_scale=guidance_scale,
48
+ num_inference_steps=num_inference_steps,
49
+ width=width,
50
+ height=height,
51
+ generator=generator,
52
+ ).images[0]
53
 
54
+ return image, seed
 
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  examples = [
58
+ "Young man in anime style. The image is of high sharpness and resolution. A handsome, thoughtful man. The man is depicted in the foreground, close-up or middle plan. The background is blurry, not sharp. The play of light and shadow is visible on the face and clothes."
59
+ ]
 
 
 
 
 
60
 
61
  examples_negative = [
62
+ "blurred details, low resolution, poor image of a man's face, poor quality, artifacts, black and white image"
 
 
63
  ]
64
 
65
  css = """
 
74
  "CompVis/stable-diffusion-v1-4",
75
  ]
76
 
 
77
  with gr.Blocks(css=css) as demo:
78
+
79
  with gr.Column(elem_id="col-container"):
80
  gr.Markdown(" # Text-to-Image Gradio Template from V. Gorsky")
81
 
82
+ model = gr.Dropdown(
83
+ label="Model Selection",
84
+ choices=available_models,
85
+ value="stable-diffusion-v1-5/stable-diffusion-v1-5",
86
+ interactive=True
87
+ )
88
+ prompt = gr.Text(
 
 
89
  label="Prompt",
90
+ show_label=False,
91
  max_lines=1,
92
  placeholder="Enter your prompt",
93
+ container=False,
94
  )
95
 
96
+ negative_prompt = gr.Text(
97
  label="Negative prompt",
98
  max_lines=1,
99
  placeholder="Enter a negative prompt",
100
+ visible=True,
101
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
+ seed = gr.Slider(
 
104
  label="Seed",
105
  minimum=0,
106
  maximum=MAX_SEED,
107
  step=1,
108
+ value=0,
109
+ )
110
+ guidance_scale = gr.Slider(
111
+ label="Guidance scale",
112
+ minimum=0.0,
113
+ maximum=10.0,
114
+ step=0.1,
115
+ value=7.5, # Replace with defaults that work for your model
116
+ )
117
+ num_inference_steps = gr.Slider(
118
+ label="Number of inference steps",
119
+ minimum=1,
120
+ maximum=100,
121
+ step=1,
122
+ value=30, # Replace with defaults that work for your model
123
+ )
124
 
 
 
 
 
 
 
 
 
 
125
  with gr.Accordion("Advanced Settings", open=False):
126
  with gr.Row():
127
  width = gr.Slider(
 
129
  minimum=256,
130
  maximum=MAX_IMAGE_SIZE,
131
  step=32,
132
+ value=512, # Replace with defaults that work for your model
133
  )
134
+
 
135
  height = gr.Slider(
136
  label="Height",
137
  minimum=256,
138
  maximum=MAX_IMAGE_SIZE,
139
  step=32,
140
+ value=512, # Replace with defaults that work for your model
141
  )
142
 
143
+ gr.Examples(examples=examples, inputs=[prompt])
144
+ gr.Examples(examples=examples_negative, inputs=[negative_prompt])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
+ run_button = gr.Button("Run", scale=0, variant="primary")
147
+ result = gr.Image(label="Result", show_label=False)
148
 
 
 
 
149
  gr.on(
150
  triggers=[run_button.click, prompt.submit],
151
  fn=infer,
152
  inputs=[
153
+ model,
154
  prompt,
155
  negative_prompt,
156
+ seed,
157
  width,
158
  height,
 
 
159
  guidance_scale,
160
+ num_inference_steps,
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  ],
162
+ outputs=[result, seed],
163
  )
164
 
165
  if __name__ == "__main__":
166
  demo.launch()
167
+