Lifeinhockey commited on
Commit
460d058
·
verified ·
1 Parent(s): 051efbb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +464 -118
app.py CHANGED
@@ -1,7 +1,16 @@
1
  import gradio as gr
2
  import numpy as np
3
  import torch
4
- from diffusers import StableDiffusionPipeline, ControlNetModel, StableDiffusionControlNetPipeline, StableDiffusionControlNetImg2ImgPipeline
 
 
 
 
 
 
 
 
 
5
  from peft import PeftModel, LoraConfig
6
  import os
7
  from PIL import Image
@@ -17,38 +26,35 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
17
  model_default = "stable-diffusion-v1-5/stable-diffusion-v1-5"
18
  torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
19
 
20
- # Инициализация ControlNet
21
- controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-openpose", torch_dtype=torch_dtype)
22
-
23
  def get_lora_sd_pipeline(
24
- lora_dir='./lora_man_animestyle',
25
- base_model_name_or_path=None,
26
- dtype=torch.float16,
27
  adapter_name="default"
28
  ):
29
  unet_sub_dir = os.path.join(lora_dir, "unet")
30
  text_encoder_sub_dir = os.path.join(lora_dir, "text_encoder")
31
-
32
  if os.path.exists(text_encoder_sub_dir) and base_model_name_or_path is None:
33
  config = LoraConfig.from_pretrained(text_encoder_sub_dir)
34
  base_model_name_or_path = config.base_model_name_or_path
35
-
36
  if base_model_name_or_path is None:
37
  raise ValueError("Укажите название базовой модели или путь к ней")
38
-
39
  pipe = StableDiffusionPipeline.from_pretrained(base_model_name_or_path, torch_dtype=dtype)
40
  before_params = pipe.unet.parameters()
41
  pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name)
42
  pipe.unet.set_adapter(adapter_name)
43
  after_params = pipe.unet.parameters()
44
-
45
  if os.path.exists(text_encoder_sub_dir):
46
  pipe.text_encoder = PeftModel.from_pretrained(pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name)
47
-
48
  if dtype in (torch.float16, torch.bfloat16):
49
  pipe.unet.half()
50
  pipe.text_encoder.half()
51
-
52
  return pipe
53
 
54
  def long_prompt_encoder(prompt, tokenizer, text_encoder, max_length=77):
@@ -66,20 +72,30 @@ def align_embeddings(prompt_embeds, negative_prompt_embeds):
66
  def preprocess_image(image, target_width, target_height, resize_to_224=False):
67
  if isinstance(image, np.ndarray):
68
  image = Image.fromarray(image)
69
-
70
  # Если resize_to_224=True, изменяем размер до 224x224
71
  if resize_to_224:
72
  image = image.resize((224, 224), Image.LANCZOS)
73
  else:
74
  image = image.resize((target_width, target_height), Image.LANCZOS)
75
-
76
  image = np.array(image).astype(np.float32) / 255.0 # Нормализация [0, 1]
77
  image = image[None].transpose(0, 3, 1, 2) # Преобразуем в (batch, channels, height, width)
78
  image = torch.from_numpy(image).to(device)
79
- return image
 
 
 
 
 
 
 
 
 
80
 
81
- pipe_default = get_lora_sd_pipeline(lora_dir='./lora_man_animestyle', base_model_name_or_path=model_default, dtype=torch_dtype).to(device)
82
 
 
83
  def infer(
84
  prompt,
85
  negative_prompt,
@@ -104,128 +120,453 @@ def infer(
104
  ):
105
  generator = torch.Generator(device).manual_seed(seed)
106
 
107
- # Генерация с Ip_Adapter ------------------------------------------------------------------------------------------------------------------
108
  if use_ip_adapter and ip_source_image is not None and ip_adapter_image is not None:
109
- #pipe_controlnet = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
110
- pipe_ip_adapter = StableDiffusionControlNetPipeline.from_pretrained(
111
- model_default,
112
- controlnet=controlnet,
113
- torch_dtype=torch_dtype
114
- ).to(device)
115
-
116
- # Загрузка IP-Adapter
117
- pipe_ip_adapter.load_ip_adapter(IP_ADAPTER, subfolder="models", weight_name=WEIGHT_NAME_plus)
118
- pipe_ip_adapter.set_ip_adapter_scale(ip_adapter_strength)
119
-
120
- # Преобразование изображений для IP-Adapter (размер 224x224)
121
- ip_source_image = preprocess_image(ip_source_image, width, height, resize_to_224=True)
122
- ip_adapter_image = preprocess_image(ip_adapter_image, width, height, resize_to_224=True)
123
-
124
- # Создаём пайплайн IP_adapter с LoRA, если он ещё не создан
125
- if not hasattr(pipe_ip_adapter, 'lora_loaded') or not pipe_ip_adapter.lora_loaded:
126
- # Загружаем LoRA для UNet
127
- pipe_ip_adapter.unet = PeftModel.from_pretrained(
128
- pipe_ip_adapter.unet,
129
- './lora_man_animestyle/unet',
130
- adapter_name="default"
131
- )
132
- pipe_ip_adapter.unet.set_adapter("default")
133
-
134
- # Загружаем LoRA для Text Encoder, если она существует
135
- text_encoder_lora_path = './lora_man_animestyle/text_encoder'
136
- if os.path.exists(text_encoder_lora_path):
137
- pipe_ip_adapter.text_encoder = PeftModel.from_pretrained(
138
- pipe_ip_adapter.text_encoder,
139
- text_encoder_lora_path,
140
- adapter_name="default"
141
- )
142
- pipe_ip_adapter.text_encoder.set_adapter("default")
143
-
144
- # Объединяем LoRA с основной моделью
145
- pipe_ip_adapter.fuse_lora(lora_scale=lora_scale)
146
- pipe_ip_adapter.lora_loaded = True # Помечаем, что LoRA загружена
147
-
148
- # Убедимся, что ip_adapter_strength имеет тип float
149
- ip_adapter_strength = float(ip_adapter_strength)
150
- #strength_ip = float(strength_ip)
151
 
152
- # Используем IP-Adapter с LoRA
153
- prompt_embeds = long_prompt_encoder(prompt, pipe_ip_adapter.tokenizer, pipe_ip_adapter.text_encoder)
154
- negative_prompt_embeds = long_prompt_encoder(negative_prompt, pipe_ip_adapter.tokenizer, pipe_ip_adapter.text_encoder)
155
- prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
156
- image = pipe_ip_adapter(
157
- prompt_embeds=prompt_embeds,
158
- negative_prompt_embeds=negative_prompt_embeds,
159
- image=ip_adapter_image, #ip_source_image,
160
- ip_adapter_image=ip_source_image, #ip_adapter_image,
161
- strength=strength_ip, # Коэфф. зашумления, чем больше, тем больше меняется результирующее изображение относитенльно исходного
162
- width=width,
163
- height=height,
164
- num_inference_steps=num_inference_steps,
165
- guidance_scale=guidance_scale,
166
- controlnet_conditioning_scale=1.0,
167
- generator=generator,
168
- ).images[0]
169
- else:
170
- # Генерация с ControlNet ----------------------------------------------------------------------------------------------------------------
171
- if use_control_net and control_image is not None and cn_source_image is not None:
172
- pipe_controlnet = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
 
 
 
 
 
 
 
 
173
  model_default,
174
  controlnet=controlnet,
175
  torch_dtype=torch_dtype
176
  ).to(device)
177
-
178
- # Преобразуем изображения
179
- cn_source_image = preprocess_image(cn_source_image, width, height)
180
- control_image = preprocess_image(control_image, width, height)
181
-
182
- # Создаём пайплайн ControlNet с LoRA, если он ещё не создан
183
- if not hasattr(pipe_controlnet, 'lora_loaded') or not pipe_controlnet.lora_loaded:
 
 
 
 
184
  # Загружаем LoRA для UNet
185
- pipe_controlnet.unet = PeftModel.from_pretrained(
186
- pipe_controlnet.unet,
187
- './lora_man_animestyle/unet',
188
  adapter_name="default"
189
  )
190
- pipe_controlnet.unet.set_adapter("default")
191
-
192
  # Загружаем LoRA для Text Encoder, если она существует
193
- text_encoder_lora_path = './lora_man_animestyle/text_encoder'
194
  if os.path.exists(text_encoder_lora_path):
195
- pipe_controlnet.text_encoder = PeftModel.from_pretrained(
196
- pipe_controlnet.text_encoder,
197
- text_encoder_lora_path,
198
  adapter_name="default"
199
  )
200
- pipe_controlnet.text_encoder.set_adapter("default")
201
-
202
  # Объединяем LoRA с основной моделью
203
- pipe_controlnet.fuse_lora(lora_scale=lora_scale)
204
- pipe_controlnet.lora_loaded = True # Помечаем, что LoRA загружена
205
-
206
- # Убедимся, что control_strength имеет тип float
207
- control_strength = float(control_strength)
208
- #strength_sn = float(strength_sn)
209
-
210
- # Используем ControlNet с LoRA
211
- prompt_embeds = long_prompt_encoder(prompt, pipe_controlnet.tokenizer, pipe_controlnet.text_encoder)
212
- negative_prompt_embeds = long_prompt_encoder(negative_prompt, pipe_controlnet.tokenizer, pipe_controlnet.text_encoder)
213
  prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
214
- image = pipe_controlnet(
215
  prompt_embeds=prompt_embeds,
216
  negative_prompt_embeds=negative_prompt_embeds,
217
- image=cn_source_image,
218
- control_image=control_image,
219
- strength=strength_cn, # Коэфф. зашумления, чем больше, тем больше меняется результирующее изображение относитенльно исходного
220
  width=width,
221
  height=height,
222
  num_inference_steps=num_inference_steps,
223
  guidance_scale=guidance_scale,
224
- controlnet_conditioning_scale=control_strength,
225
- generator=generator
226
  ).images[0]
227
  else:
228
- # Генерация без ControlNet и IP_adapter ---------------------------------------------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  if model != model_default:
230
  pipe = StableDiffusionPipeline.from_pretrained(model, torch_dtype=torch_dtype).to(device)
231
  prompt_embeds = long_prompt_encoder(prompt, pipe.tokenizer, pipe.text_encoder)
@@ -237,7 +578,7 @@ def infer(
237
  negative_prompt_embeds = long_prompt_encoder(negative_prompt, pipe.tokenizer, pipe.text_encoder)
238
  prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
239
  pipe.fuse_lora(lora_scale=lora_scale)
240
-
241
  params = {
242
  'prompt_embeds': prompt_embeds,
243
  'negative_prompt_embeds': negative_prompt_embeds,
@@ -247,7 +588,7 @@ def infer(
247
  'height': height,
248
  'generator': generator,
249
  }
250
-
251
  image = pipe(**params).images[0]
252
 
253
  return image
@@ -256,10 +597,14 @@ def infer(
256
  examples = [
257
  "A young man in anime style. The image is characterized by high definition and resolution. Handsome, thoughtful man, attentive eyes. The man is depicted in the foreground, close-up or in the middle. High-quality images of the face, eyes, nose, lips, hands and clothes. The background and background are blurred and indistinct. The play of light and shadow is visible on the face and clothes.",
258
  "A man runs through the park against the background of trees. The man's entire figure, face, arms and legs are visible. Anime style. The best quality.",
 
 
 
259
  ]
260
 
261
  examples_negative = [
262
  "Blurred details, low resolution, bad anatomy, no face visible, poor image of a man's face, poor quality, artifacts, black and white image.",
 
263
  ]
264
 
265
  css = """
@@ -482,3 +827,4 @@ with gr.Blocks(css=css) as demo:
482
 
483
  if __name__ == "__main__":
484
  demo.launch()
 
 
1
  import gradio as gr
2
  import numpy as np
3
  import torch
4
+ from diffusers import (
5
+ StableDiffusionPipeline,
6
+ ControlNetModel,
7
+ StableDiffusionControlNetPipeline,
8
+ StableDiffusionControlNetImg2ImgPipeline,
9
+ AutoPipelineForImage2Image,
10
+ DDIMScheduler,
11
+ UniPCMultistepScheduler)
12
+ from transformers import pipeline
13
+ from diffusers.utils import load_image, make_image_grid
14
  from peft import PeftModel, LoraConfig
15
  import os
16
  from PIL import Image
 
26
  model_default = "stable-diffusion-v1-5/stable-diffusion-v1-5"
27
  torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
28
 
 
 
 
29
  def get_lora_sd_pipeline(
30
+ lora_dir='lora_man_animestyle',
31
+ base_model_name_or_path=None,
32
+ dtype=torch.float16,
33
  adapter_name="default"
34
  ):
35
  unet_sub_dir = os.path.join(lora_dir, "unet")
36
  text_encoder_sub_dir = os.path.join(lora_dir, "text_encoder")
37
+
38
  if os.path.exists(text_encoder_sub_dir) and base_model_name_or_path is None:
39
  config = LoraConfig.from_pretrained(text_encoder_sub_dir)
40
  base_model_name_or_path = config.base_model_name_or_path
41
+
42
  if base_model_name_or_path is None:
43
  raise ValueError("Укажите название базовой модели или путь к ней")
44
+
45
  pipe = StableDiffusionPipeline.from_pretrained(base_model_name_or_path, torch_dtype=dtype)
46
  before_params = pipe.unet.parameters()
47
  pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name)
48
  pipe.unet.set_adapter(adapter_name)
49
  after_params = pipe.unet.parameters()
50
+
51
  if os.path.exists(text_encoder_sub_dir):
52
  pipe.text_encoder = PeftModel.from_pretrained(pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name)
53
+
54
  if dtype in (torch.float16, torch.bfloat16):
55
  pipe.unet.half()
56
  pipe.text_encoder.half()
57
+
58
  return pipe
59
 
60
  def long_prompt_encoder(prompt, tokenizer, text_encoder, max_length=77):
 
72
  def preprocess_image(image, target_width, target_height, resize_to_224=False):
73
  if isinstance(image, np.ndarray):
74
  image = Image.fromarray(image)
75
+
76
  # Если resize_to_224=True, изменяем размер до 224x224
77
  if resize_to_224:
78
  image = image.resize((224, 224), Image.LANCZOS)
79
  else:
80
  image = image.resize((target_width, target_height), Image.LANCZOS)
81
+
82
  image = np.array(image).astype(np.float32) / 255.0 # Нормализация [0, 1]
83
  image = image[None].transpose(0, 3, 1, 2) # Преобразуем в (batch, channels, height, width)
84
  image = torch.from_numpy(image).to(device)
85
+ return image
86
+
87
+ def get_depth_map(image, depth_estimator):
88
+ image = depth_estimator(image)["depth"]
89
+ image = np.array(image)
90
+ image = image[:, :, None]
91
+ image = np.concatenate([image, image, image], axis=2)
92
+ detected_map = torch.from_numpy(image).float() / 255.0
93
+ depth_map = detected_map.permute(2, 0, 1)
94
+ return depth_map
95
 
96
+ pipe_default = get_lora_sd_pipeline(lora_dir='lora_man_animestyle', base_model_name_or_path=model_default, dtype=torch_dtype).to(device)
97
 
98
+ # ----------------------------------------------------------------------------------------------------------------------------------------------------
99
  def infer(
100
  prompt,
101
  negative_prompt,
 
120
  ):
121
  generator = torch.Generator(device).manual_seed(seed)
122
 
123
+ # Генерация изображений с Ip_Adapter ------------------------------------------------------------------------------------------------------------------
124
  if use_ip_adapter and ip_source_image is not None and ip_adapter_image is not None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
+ # Режим pose_estimation ---------------------------------------------------------------------------------------------------------------------------
127
+ # prompt = "A man runs through the park against the background of trees. The man's entire figure, face, arms and legs are visible. Anime style. The best quality."
128
+ # negative_prompt = "Blurred details, low resolution, bad anatomy, no face visible, poor image of a man's face, poor quality, artifacts, black and white image."
129
+
130
+ # seed = 4
131
+ # width = 512
132
+ # height = 512
133
+ # num_inference_steps = 50
134
+ # guidance_scale = 7.5
135
+ # lora_scale = 0.7
136
+
137
+ # strength_ip = 0.9 # Коэфф. зашумления IP_adapter
138
+ # ip_adapter_strength = 0.2 # Сила влияния IP_adapter
139
+ # controlnet_conditioning_scale = 0.99 # Сила влияния ControlNet
140
+
141
+ # use_ip_adapter = True # Параметр для включения IP_adapter
142
+ # ip_source_image = load_image("ControlNet_1.jpeg") # Исходное изображение IP_adapter
143
+ # ip_adapter_image = load_image("Run.jpeg") # Контрольное изображение IP_adapter
144
+
145
+ # #ip_adapter_mode = "pose_estimation" # Режим работы Ip_Adapter
146
+
147
+ if ip_adapter_mode == "pose_estimation":
148
+
149
+ # Инициализация ControlNet
150
+ controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-openpose", torch_dtype=torch_dtype)
151
+
152
+ generator = torch.Generator(device).manual_seed(seed)
153
+
154
+ pipe_ip_adapter = StableDiffusionControlNetPipeline.from_pretrained(
155
  model_default,
156
  controlnet=controlnet,
157
  torch_dtype=torch_dtype
158
  ).to(device)
159
+
160
+ # Загрузка IP-Adapter
161
+ pipe_ip_adapter.load_ip_adapter(IP_ADAPTER, subfolder="models", weight_name=WEIGHT_NAME_plus)
162
+ pipe_ip_adapter.set_ip_adapter_scale(ip_adapter_strength)
163
+
164
+ # Преобразование изображений для IP-Adapter (размер 224x224)
165
+ ip_source_image = preprocess_image(ip_source_image, width, height, resize_to_224=True)
166
+ ip_adapter_image = preprocess_image(ip_adapter_image, width, height, resize_to_224=True)
167
+
168
+ # Создаём пайплайн IP_adapter с LoRA, если он ещё не создан
169
+ if not hasattr(pipe_ip_adapter, 'lora_loaded') or not pipe_ip_adapter.lora_loaded:
170
  # Загружаем LoRA для UNet
171
+ pipe_ip_adapter.unet = PeftModel.from_pretrained(
172
+ pipe_ip_adapter.unet,
173
+ 'lora_man_animestyle/unet',
174
  adapter_name="default"
175
  )
176
+ pipe_ip_adapter.unet.set_adapter("default")
177
+
178
  # Загружаем LoRA для Text Encoder, если она существует
179
+ text_encoder_lora_path = 'lora_man_animestyle/text_encoder'
180
  if os.path.exists(text_encoder_lora_path):
181
+ pipe_ip_adapter.text_encoder = PeftModel.from_pretrained(
182
+ pipe_ip_adapter.text_encoder,
183
+ text_encoder_lora_path,
184
  adapter_name="default"
185
  )
186
+ pipe_ip_adapter.text_encoder.set_adapter("default")
187
+
188
  # Объединяем LoRA с основной моделью
189
+ pipe_ip_adapter.fuse_lora(lora_scale=lora_scale)
190
+ pipe_ip_adapter.lora_loaded = True # Помечаем, что LoRA загружена
191
+
192
+ # Убедимся, что ip_adapter_strength имеет тип float
193
+ ip_adapter_strength = float(ip_adapter_strength)
194
+
195
+ # Используем IP-Adapter с LoRA
196
+ prompt_embeds = long_prompt_encoder(prompt, pipe_ip_adapter.tokenizer, pipe_ip_adapter.text_encoder)
197
+ negative_prompt_embeds = long_prompt_encoder(negative_prompt, pipe_ip_adapter.tokenizer, pipe_ip_adapter.text_encoder)
 
198
  prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
199
+ image = pipe_ip_adapter(
200
  prompt_embeds=prompt_embeds,
201
  negative_prompt_embeds=negative_prompt_embeds,
202
+ image=ip_adapter_image, #ip_source_image,
203
+ ip_adapter_image=ip_source_image, #ip_adapter_image,
204
+ strength=strength_ip, # Коэфф. зашумления, чем больше, тем больше меняется результирующее изображение относитенльно исходного
205
  width=width,
206
  height=height,
207
  num_inference_steps=num_inference_steps,
208
  guidance_scale=guidance_scale,
209
+ controlnet_conditioning_scale=controlnet_conditioning_scale,
210
+ generator=generator,
211
  ).images[0]
212
  else:
213
+ # Режим edge_detection ---------------------------------------------------------------------------------------------------------------------------
214
+ # prompt = "The smiling man. His face and hands are visible. Anime style. The best quality."
215
+ # negative_prompt = "Blurred details, low resolution, bad anatomy, no face visible, poor image of a man's face, poor quality, artifacts, black and white image."
216
+
217
+ # seed = 9
218
+ # width = 512
219
+ # height = 512
220
+ # num_inference_steps = 50
221
+ # guidance_scale = 7.5
222
+ # lora_scale = 0.7
223
+
224
+ # strength_ip = 0.5 #0.9 # Коэфф. зашумления IP_adapter
225
+ # ip_adapter_strength = 0.15 #0.1 # Сила влияния IP_adapter
226
+ # controlnet_conditioning_scale = 0.6 # Сила влияния ControlNet
227
+
228
+ # use_ip_adapter = True # Параметр для включения IP_adapter
229
+ # ip_source_image = load_image("005_6.jpeg") # Исходное изображение IP_adapter
230
+ # ip_adapter_image = load_image("edges.jpeg") # Контрольное изображение IP_adapter
231
+
232
+ # #ip_adapter_mode = "edge_detection" # Режим работы Ip_Adapter
233
+
234
+ if ip_adapter_mode == "edge_detection":
235
+
236
+ # Инициализация ControlNet
237
+ controlnet_model_path = "lllyasviel/control_v11f1p_sd15_depth"
238
+ controlnet = ControlNetModel.from_pretrained(controlnet_model_path, torch_dtype=torch.float16)
239
+
240
+ generator = torch.Generator(device).manual_seed(seed)
241
+
242
+
243
+ pipe_ip_adapter = StableDiffusionControlNetPipeline.from_pretrained(
244
+ model_default,
245
+ controlnet=controlnet,
246
+ torch_dtype=torch_dtype
247
+ ).to(device)
248
+
249
+ # Загрузка IP-Adapter
250
+ #pipe_ip_adapter.load_ip_adapter(IP_ADAPTER, subfolder="models", weight_name=WEIGHT_NAME_face)
251
+ pipe_ip_adapter.load_ip_adapter(IP_ADAPTER, subfolder="models", weight_name=WEIGHT_NAME_plus)
252
+ pipe_ip_adapter.set_ip_adapter_scale(ip_adapter_strength)
253
+
254
+ # Преобразование изображений для IP-Adapter (размер 224x224)
255
+ ip_source_image = preprocess_image(ip_source_image, width, height, resize_to_224=True)
256
+ ip_adapter_image = preprocess_image(ip_adapter_image, width, height, resize_to_224=True)
257
+
258
+ # Создаём пайплайн IP_adapter с LoRA, если он ещё не создан
259
+ if not hasattr(pipe_ip_adapter, 'lora_loaded') or not pipe_ip_adapter.lora_loaded:
260
+ # Загружаем LoRA для UNet
261
+ pipe_ip_adapter.unet = PeftModel.from_pretrained(
262
+ pipe_ip_adapter.unet,
263
+ 'lora_man_animestyle/unet',
264
+ adapter_name="default"
265
+ )
266
+ pipe_ip_adapter.unet.set_adapter("default")
267
+
268
+ # Загружаем LoRA для Text Encoder, если она существует
269
+ text_encoder_lora_path = 'lora_man_animestyle/text_encoder'
270
+ if os.path.exists(text_encoder_lora_path):
271
+ pipe_ip_adapter.text_encoder = PeftModel.from_pretrained(
272
+ pipe_ip_adapter.text_encoder,
273
+ text_encoder_lora_path,
274
+ adapter_name="default"
275
+ )
276
+ pipe_ip_adapter.text_encoder.set_adapter("default")
277
+
278
+ # Объединяем LoRA с основной моделью
279
+ pipe_ip_adapter.fuse_lora(lora_scale=lora_scale)
280
+ pipe_ip_adapter.lora_loaded = True # Помечаем, что LoRA загружена
281
+
282
+ # Убедимся, что ip_adapter_strength имеет тип float
283
+ ip_adapter_strength = float(ip_adapter_strength)
284
+
285
+ # Используем IP-Adapter с LoRA
286
+ prompt_embeds = long_prompt_encoder(prompt, pipe_ip_adapter.tokenizer, pipe_ip_adapter.text_encoder)
287
+ negative_prompt_embeds = long_prompt_encoder(negative_prompt, pipe_ip_adapter.tokenizer, pipe_ip_adapter.text_encoder)
288
+ prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
289
+
290
+ # scale = { # по умолчанию в остальных блоках везде 0.
291
+ # "down": {
292
+ # "block_0": [0.0, 1.0],
293
+ # "block_1": [0.0, 1.0],
294
+ # },
295
+ # "up": {
296
+ # "block_0": [0.0, 1.0, 0.0],
297
+ # "block_1": [0.0, 1.0, 0.0],
298
+ # },
299
+ # }
300
+ # scale = {
301
+ # "down": {"block_2": [0.0, 1.0]},
302
+ # "up": {"block_0": [0.0, 1.0, 0.0]},
303
+ # }
304
+ # pipe_ip_adapter.set_ip_adapter_scale(scale)
305
+
306
+ image = pipe_ip_adapter(
307
+ prompt_embeds=prompt_embeds,
308
+ negative_prompt_embeds=negative_prompt_embeds,
309
+ image=ip_adapter_image, #ip_source_image,
310
+ ip_adapter_image=ip_source_image, #ip_adapter_image,
311
+ strength=strength_ip, # Коэфф. зашумления, чем больше, тем больше меняется результирующее изображение относитенльно исходного
312
+ width=width,
313
+ height=height,
314
+ num_inference_steps=num_inference_steps,
315
+ guidance_scale=guidance_scale,
316
+ controlnet_conditioning_scale=controlnet_conditioning_scale,
317
+ generator=generator,
318
+ ).images[0]
319
+ else:
320
+ # Режим depth_map ---------------------------------------------------------------------------------------------------------------------------
321
+ # prompt = "The smiling girl, best quality, high quality"
322
+ # negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality" #"Blurred details, low resolution, bad anatomy, no face visible, poor image of a man's face, poor quality, artifacts, black and white image."
323
+
324
+ # seed = 6
325
+ # num_inference_steps = 50
326
+ # guidance_scale = 7.5
327
+ # lora_scale = 0.7
328
+
329
+ # strength_ip = 0.9 # Коэфф. зашумления IP_adapter
330
+ # ip_adapter_strength = 0.5 # Сила влияния IP_adapter
331
+ # controlnet_conditioning_scale = 0.99 # Сила влияния ControlNet
332
+
333
+ # use_ip_adapter = True # Параметр для включения IP_adapter
334
+ # ip_adapter_image = load_image("032_3.jpeg")
335
+ # depth_map = load_image("depth_map.jpeg")
336
+
337
+ # #ip_adapter_mode = "depth_map" # Режим работы Ip_Adapter
338
+
339
+ if ip_adapter_mode == "depth_map":
340
+
341
+ # Инициализация ControlNet
342
+ controlnet_model_path = "lllyasviel/control_v11f1p_sd15_depth"
343
+ controlnet = ControlNetModel.from_pretrained(controlnet_model_path, torch_dtype=torch.float16)
344
+
345
+ generator = torch.Generator(device).manual_seed(seed)
346
+
347
+ pipe_ip_adapter = StableDiffusionControlNetPipeline.from_pretrained(
348
+ model_default,
349
+ controlnet=controlnet,
350
+ torch_dtype=torch_dtype
351
+ ).to(device)
352
+ pipe_ip_adapter.load_ip_adapter(IP_ADAPTER, subfolder="models", weight_name=WEIGHT_NAME)
353
+
354
+ pipe_ip_adapter.set_ip_adapter_scale(ip_adapter_strength)
355
+ image = pipe_ip_adapter(
356
+ prompt=prompt,
357
+ negative_prompt=negative_prompt,
358
+ image=depth_map,
359
+ ip_adapter_image=ip_adapter_image,
360
+ num_inference_steps=num_inference_steps,
361
+ strength=strength_ip, # Коэфф. зашумления, чем больше, тем больше меняется результирующее изображение относитенльно исходного
362
+ guidance_scale=guidance_scale,
363
+ controlnet_conditioning_scale=controlnet_conditioning_scale,
364
+ generator=generator,
365
+ ).images[0]
366
+ else:
367
+ # Генерация изображений с ControlNet ----------------------------------------------------------------------------------------------------------------
368
+ if use_control_net and control_image is not None and cn_source_image is not None:
369
+
370
+ # Режим pose_estimation ---------------------------------------------------------------------------------------------------------------------------
371
+ # prompt = "A man runs through the park against the background of trees. The man's entire figure, face, arms and legs are visible. Anime style. The best quality."
372
+ # negative_prompt = "Blurred details, low resolution, bad anatomy, no face visible, poor image of a man's face, poor quality, artifacts, black and white image."
373
+
374
+ # seed = 444
375
+ # width = 512
376
+ # height = 512
377
+ # num_inference_steps = 50
378
+ # guidance_scale = 7.5
379
+ # lora_scale = 0.7
380
+
381
+ # strength_cn = 0.9 # Коэфф. зашумления ControlNet
382
+ # control_strength = 0.92 # Сила влияния ControlNet
383
+
384
+ # use_control_net = True # Параметр для включения ControlNet
385
+ # cn_source_image = load_image("ControlNet_1.jpeg") # Исходное изображение ControlNet
386
+ # control_image = load_image("Run.jpeg") # Контрольное изображение ControlNet
387
+
388
+ # #control_mode = "pose_estimation" # Режим работы ControlNet
389
+
390
+ if control_mode == "pose_estimation":
391
+
392
+ # Инициализация ControlNet
393
+ controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-openpose", torch_dtype=torch_dtype)
394
+
395
+ generator = torch.Generator(device).manual_seed(seed)
396
+
397
+ pipe_controlnet = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
398
+ model_default,
399
+ controlnet=controlnet,
400
+ torch_dtype=torch_dtype
401
+ ).to(device)
402
+
403
+ # Преобразуем изображения
404
+ cn_source_image = preprocess_image(cn_source_image, width, height)
405
+ control_image = preprocess_image(control_image, width, height)
406
+
407
+ # Создаём пайплайн ControlNet с LoRA, если он ещё не создан
408
+ if not hasattr(pipe_controlnet, 'lora_loaded') or not pipe_controlnet.lora_loaded:
409
+ # Загружаем LoRA для UNet
410
+ pipe_controlnet.unet = PeftModel.from_pretrained(
411
+ pipe_controlnet.unet,
412
+ 'lora_man_animestyle/unet',
413
+ adapter_name="default"
414
+ )
415
+ pipe_controlnet.unet.set_adapter("default")
416
+
417
+ # Загружаем LoRA для Text Encoder, если она существует
418
+ text_encoder_lora_path = 'lora_man_animestyle/text_encoder'
419
+ if os.path.exists(text_encoder_lora_path):
420
+ pipe_controlnet.text_encoder = PeftModel.from_pretrained(
421
+ pipe_controlnet.text_encoder,
422
+ text_encoder_lora_path,
423
+ adapter_name="default"
424
+ )
425
+ pipe_controlnet.text_encoder.set_adapter("default")
426
+
427
+ # Объединяем LoRA с основной моделью
428
+ pipe_controlnet.fuse_lora(lora_scale=lora_scale)
429
+ pipe_controlnet.lora_loaded = True # Помечаем, что LoRA загружена
430
+
431
+ # Убедимся, что control_strength имеет тип float
432
+ control_strength = float(control_strength)
433
+ #strength_sn = float(strength_sn)
434
+
435
+ # Используем ControlNet с LoRA
436
+ prompt_embeds = long_prompt_encoder(prompt, pipe_controlnet.tokenizer, pipe_controlnet.text_encoder)
437
+ negative_prompt_embeds = long_prompt_encoder(negative_prompt, pipe_controlnet.tokenizer, pipe_controlnet.text_encoder)
438
+ prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
439
+ image = pipe_controlnet(
440
+ prompt_embeds=prompt_embeds,
441
+ negative_prompt_embeds=negative_prompt_embeds,
442
+ image=cn_source_image,
443
+ control_image=control_image,
444
+ strength=strength_cn, # Коэфф. зашумления, чем больше, тем больше меняется результирующее изображение относитенльно исходного
445
+ width=width,
446
+ height=height,
447
+ num_inference_steps=num_inference_steps,
448
+ guidance_scale=guidance_scale,
449
+ controlnet_conditioning_scale=control_strength,
450
+ generator=generator
451
+ ).images[0]
452
+ else:
453
+ # Режим edge_detection ---------------------------------------------------------------------------------------------------------------------------
454
+ # prompt = "The smiling girl, best quality, high quality" # "the mona lisa"
455
+ # negative_prompt = "Blurred details, low resolution, bad anatomy, no face visible, poor image of a man's face, poor quality, artifacts, black and white image."
456
+
457
+ # seed = 8 # 1 8 12 14 18
458
+ # width = 512
459
+ # height = 512
460
+ # num_inference_steps = 50
461
+ # guidance_scale = 7.5
462
+ # lora_scale = 0.7
463
+
464
+ # strength_cn = 0.2 # Коэфф. зашумления ControlNet
465
+ # control_strength = 0.8 # Сила влияния ControlNet
466
+
467
+ # use_control_net = True # Параметр для включения ControlNet
468
+ # cn_source_image = load_image("edges_w.jpeg") # Исходное изображение ControlNet
469
+ # control_image = load_image("027_0_1.jpeg") # Контрольное изображение ControlNet
470
+
471
+ # #control_mode = "edge_detection" # Режим работы ControlNet
472
+
473
+ if control_mode == "edge_detection":
474
+
475
+ controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16, use_safetensors=True)
476
+
477
+ generator = torch.Generator(device).manual_seed(seed)
478
+
479
+ pipe_controlnet = StableDiffusionControlNetPipeline.from_pretrained(
480
+ "stable-diffusion-v1-5/stable-diffusion-v1-5",
481
+ controlnet=controlnet,
482
+ torch_dtype=torch.float16,
483
+ use_safetensors=True
484
+ ).to(device)
485
+
486
+ pipe_controlnet.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
487
+
488
+ image = pipe_controlnet(
489
+ prompt_embeds=prompt_embeds,
490
+ negative_prompt_embeds=negative_prompt_embeds,
491
+ image=cn_source_image,
492
+ control_image=control_image,
493
+ strength=strength_cn, # Коэфф. зашумления, чем больше, тем больше меняется результирующее изображение относитенльно исходного
494
+ width=width,
495
+ height=height,
496
+ num_inference_steps=num_inference_steps,
497
+ guidance_scale=guidance_scale,
498
+ controlnet_conditioning_scale=control_strength,
499
+ generator=generator
500
+ ).images[0]
501
+ else:
502
+ # Режим depth_map ---------------------------------------------------------------------------------------------------------------------------
503
+ # prompt = "lego batman and robin" #"Lego Harry Potter and Jean Granger" #"Harry Potter and Hagrid in the lego style" #"lego batman and robin"
504
+ # negative_prompt = "Blurred details, low resolution, bad anatomy, no face visible, poor image of a man's face, poor quality, artifacts, black and white image."
505
+
506
+ # seed = 8
507
+ # width = 512
508
+ # height = 512
509
+ # num_inference_steps = 50
510
+ # guidance_scale = 7.5
511
+ # lora_scale = 0.7
512
+
513
+ # strength_cn = 1.0 # Коэфф. зашумления ControlNet
514
+ # control_strength = 0.0 # Сила влияния ControlNet
515
+
516
+ # use_control_net = True # Параметр для включения ControlNet
517
+ # cn_source_image = load_image("edges_w.jpeg") # Исходное изображение ControlNet
518
+ # control_image = load_image("014_3.jpeg") # Контрольное изображение ControlNet
519
+
520
+ # #control_mode = "depth_map" # Режим работы ControlNet
521
+
522
+ if control_mode == "depth_map":
523
+
524
+ depth_estimator = pipeline("depth-estimation")
525
+ depth_map = get_depth_map(control_image, depth_estimator).unsqueeze(0).half().to(device)
526
+
527
+ controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11f1p_sd15_depth", torch_dtype=torch.float16, use_safetensors=True)
528
+
529
+ generator = torch.Generator(device).manual_seed(seed)
530
+
531
+ pipe_controlnet = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
532
+ "stable-diffusion-v1-5/stable-diffusion-v1-5",
533
+ controlnet=controlnet,
534
+ torch_dtype=torch.float16,
535
+ use_safetensors=True
536
+ ).to(device)
537
+
538
+ pipe_controlnet.scheduler = UniPCMultistepScheduler.from_config(pipe_controlnet.scheduler.config)
539
+
540
+ image = pipe_controlnet(
541
+ prompt=prompt,
542
+ negative_prompt=negative_prompt,
543
+ image=control_image,
544
+ control_image=depth_map,
545
+ #strength=strength_cn, # Коэфф. зашумления, чем больше, тем больше меняется результирующее изображение относитенльно исходного
546
+ width=width,
547
+ height=height,
548
+ num_inference_steps=num_inference_steps,
549
+ guidance_scale=guidance_scale,
550
+ #controlnet_conditioning_scale=control_strength,
551
+ generator=generator
552
+ ).images[0]
553
+ else:
554
+ # Генерация изображений с LORA без ControlNet и IP_Adapter ---------------------------------------------------------------------------------------------
555
+ # prompt = "A young man in anime style. The image is characterized by high definition and resolution. Handsome, thoughtful man, attentive eyes. The man is depicted in the foreground, close-up or in the middle. High-quality images of the face, eyes, nose, lips, hands and clothes. The background and background are blurred and indistinct. The play of light and shadow is visible on the face and clothes."
556
+ # negative_prompt = "Blurred details, low resolution, bad anatomy, no face visible, poor image of a man's face, poor quality, artifacts, black and white image."
557
+
558
+ # seed = 5
559
+ # width = 512
560
+ # height = 512
561
+ # num_inference_steps = 30
562
+ # guidance_scale = 7.5
563
+ # lora_scale = 0.7
564
+
565
+ # Инициализация ControlNet
566
+ controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-openpose", torch_dtype=torch_dtype)
567
+
568
+ generator = torch.Generator(device).manual_seed(seed)
569
+
570
  if model != model_default:
571
  pipe = StableDiffusionPipeline.from_pretrained(model, torch_dtype=torch_dtype).to(device)
572
  prompt_embeds = long_prompt_encoder(prompt, pipe.tokenizer, pipe.text_encoder)
 
578
  negative_prompt_embeds = long_prompt_encoder(negative_prompt, pipe.tokenizer, pipe.text_encoder)
579
  prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
580
  pipe.fuse_lora(lora_scale=lora_scale)
581
+
582
  params = {
583
  'prompt_embeds': prompt_embeds,
584
  'negative_prompt_embeds': negative_prompt_embeds,
 
588
  'height': height,
589
  'generator': generator,
590
  }
591
+
592
  image = pipe(**params).images[0]
593
 
594
  return image
 
597
  examples = [
598
  "A young man in anime style. The image is characterized by high definition and resolution. Handsome, thoughtful man, attentive eyes. The man is depicted in the foreground, close-up or in the middle. High-quality images of the face, eyes, nose, lips, hands and clothes. The background and background are blurred and indistinct. The play of light and shadow is visible on the face and clothes.",
599
  "A man runs through the park against the background of trees. The man's entire figure, face, arms and legs are visible. Anime style. The best quality.",
600
+ "The smiling man. His face and hands are visible. Anime style. The best quality.",
601
+ "The smiling girl, best quality, high quality",
602
+ "lego batman and robin",
603
  ]
604
 
605
  examples_negative = [
606
  "Blurred details, low resolution, bad anatomy, no face visible, poor image of a man's face, poor quality, artifacts, black and white image.",
607
+ "Monochrome, lowres, bad anatomy, worst quality, low quality",
608
  ]
609
 
610
  css = """
 
827
 
828
  if __name__ == "__main__":
829
  demo.launch()
830
+