Lifeinhockey commited on
Commit
48ad347
·
verified ·
1 Parent(s): 9ba7c22

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +709 -103
app.py CHANGED
@@ -1,74 +1,556 @@
1
  import gradio as gr
2
  import numpy as np
3
- import random
4
- from diffusers import DiffusionPipeline
5
  import torch
6
-
7
- ###########################################
 
 
 
 
 
 
 
 
 
 
 
8
  from rembg import remove
9
- from PIL import Image
10
- ###############################################
11
 
12
- device = "cuda" if torch.cuda.is_available() else "cpu"
13
- model_repo_id = "stabilityai/sdxl-turbo"
 
 
 
 
14
 
15
- if torch.cuda.is_available():
16
- torch_dtype = torch.float16
17
- else:
18
- torch_dtype = torch.float32
19
 
20
- pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
21
- pipe = pipe.to(device)
 
 
 
 
 
 
22
 
23
- MAX_SEED = np.iinfo(np.int32).max
24
- MAX_IMAGE_SIZE = 1024
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
 
 
 
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  def infer(
28
- model,
29
- prompt,
30
- negative_prompt,
31
- seed,
32
- width,
33
- height,
34
- guidance_scale,
35
- num_inference_steps,
36
- remove_bg, #################################################################
37
- progress=gr.Progress(track_tqdm=True),
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  ):
39
- global model_repo_id
40
- if model != model_repo_id:
41
- print(model, model_repo_id)
42
- pipe = DiffusionPipeline.from_pretrained(model, torch_dtype=torch_dtype)
43
- pipe = pipe.to(device)
44
-
45
- generator = torch.Generator().manual_seed(seed)
46
-
47
- image = pipe(
48
- prompt=prompt,
49
- negative_prompt=negative_prompt,
50
- guidance_scale=guidance_scale,
51
- num_inference_steps=num_inference_steps,
52
- width=width,
53
- height=height,
54
- generator=generator,
55
- ).images[0]
56
-
57
- #############################################################
58
- # Если выбрано удаление фона
59
- if remove_bg:
60
- image = remove(image)
61
- ##############################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
- return image, seed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
  examples = [
67
- "Young man in anime style. The image is of high sharpness and resolution. A handsome, thoughtful man. The man is depicted in the foreground, close-up or middle plan. The background is blurry, not sharp. The play of light and shadow is visible on the face and clothes."
68
- ]
 
 
 
 
 
69
 
70
  examples_negative = [
71
- "blurred details, low resolution, poor image of a man's face, poor quality, artifacts, black and white image"
 
 
72
  ]
73
 
74
  css = """
@@ -83,54 +565,67 @@ available_models = [
83
  "CompVis/stable-diffusion-v1-4",
84
  ]
85
 
 
86
  with gr.Blocks(css=css) as demo:
87
-
88
  with gr.Column(elem_id="col-container"):
89
  gr.Markdown(" # Text-to-Image Gradio Template from V. Gorsky")
90
 
91
- model = gr.Dropdown(
92
- label="Model Selection",
93
- choices=available_models,
94
- value="stable-diffusion-v1-5/stable-diffusion-v1-5",
95
- interactive=True
96
- )
97
- prompt = gr.Text(
 
 
98
  label="Prompt",
99
- show_label=False,
100
  max_lines=1,
101
  placeholder="Enter your prompt",
102
- container=False,
103
  )
104
 
105
- negative_prompt = gr.Text(
106
  label="Negative prompt",
107
  max_lines=1,
108
  placeholder="Enter a negative prompt",
109
- visible=True,
110
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
- seed = gr.Slider(
 
113
  label="Seed",
114
  minimum=0,
115
  maximum=MAX_SEED,
116
  step=1,
117
- value=0,
118
- )
119
- guidance_scale = gr.Slider(
120
- label="Guidance scale",
121
- minimum=0.0,
122
- maximum=10.0,
123
- step=0.1,
124
- value=7.5,
125
- )
126
- num_inference_steps = gr.Slider(
127
- label="Number of inference steps",
128
- minimum=1,
129
- maximum=100,
130
- step=1,
131
- value=30,
132
- )
133
 
 
 
 
 
 
 
 
 
 
134
  with gr.Accordion("Advanced Settings", open=False):
135
  with gr.Row():
136
  width = gr.Slider(
@@ -138,49 +633,160 @@ with gr.Blocks(css=css) as demo:
138
  minimum=256,
139
  maximum=MAX_IMAGE_SIZE,
140
  step=32,
141
- value=512,
142
  )
143
-
 
144
  height = gr.Slider(
145
  label="Height",
146
  minimum=256,
147
  maximum=MAX_IMAGE_SIZE,
148
  step=32,
149
- value=512,
150
  )
151
 
152
- #########################################################
153
- # Добавляем Checkbox для удаления фона
154
- remove_bg = gr.Checkbox(
155
- label="Remove Background",
156
- value=False,
157
- interactive=True
158
- )
159
- ##########################################################
160
 
161
- gr.Examples(examples=examples, inputs=[prompt])
162
- gr.Examples(examples=examples_negative, inputs=[negative_prompt])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
- run_button = gr.Button("Run", scale=0, variant="primary")
165
- result = gr.Image(label="Result", show_label=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
 
 
 
 
 
 
167
  gr.on(
168
  triggers=[run_button.click, prompt.submit],
169
  fn=infer,
170
  inputs=[
171
- model,
172
  prompt,
173
  negative_prompt,
174
- seed,
175
  width,
176
  height,
177
- guidance_scale,
178
  num_inference_steps,
179
- remove_bg, # Добавляем remove_bg в inputs ###############################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  ],
181
- outputs=[result, seed],
182
  )
183
 
184
  if __name__ == "__main__":
185
  demo.launch()
186
-
 
1
  import gradio as gr
2
  import numpy as np
 
 
3
  import torch
4
+ from diffusers import (
5
+ StableDiffusionPipeline,
6
+ ControlNetModel,
7
+ StableDiffusionControlNetPipeline,
8
+ StableDiffusionControlNetImg2ImgPipeline,
9
+ AutoPipelineForImage2Image,
10
+ DDIMScheduler,
11
+ UniPCMultistepScheduler)
12
+ from transformers import pipeline
13
+ from diffusers.utils import load_image, make_image_grid
14
+ from peft import PeftModel, LoraConfig
15
+ import os
16
+ from PIL import Image
17
  from rembg import remove
 
 
18
 
19
+ MAX_SEED = np.iinfo(np.int32).max
20
+ MAX_IMAGE_SIZE = 1024
21
+ IP_ADAPTER = 'h94/IP-Adapter'
22
+ WEIGHT_NAME = "ip-adapter_sd15.bin"
23
+ WEIGHT_NAME_plus = "ip-adapter-plus_sd15.bin"
24
+ WEIGHT_NAME_face = "ip-adapter-full-face_sd15.bin"
25
 
26
+ model_default = "stable-diffusion-v1-5/stable-diffusion-v1-5"
27
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
28
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
 
29
 
30
+ def get_lora_sd_pipeline(
31
+ lora_dir='lora_man_animestyle',
32
+ base_model_name_or_path=None,
33
+ dtype=torch.float16,
34
+ adapter_name="default"
35
+ ):
36
+ unet_sub_dir = os.path.join(lora_dir, "unet")
37
+ text_encoder_sub_dir = os.path.join(lora_dir, "text_encoder")
38
 
39
+ if os.path.exists(text_encoder_sub_dir) and base_model_name_or_path is None:
40
+ config = LoraConfig.from_pretrained(text_encoder_sub_dir)
41
+ base_model_name_or_path = config.base_model_name_or_path
42
+
43
+ if base_model_name_or_path is None:
44
+ raise ValueError("Укажите название базовой модели или путь к ней")
45
+
46
+ pipe = StableDiffusionPipeline.from_pretrained(base_model_name_or_path, torch_dtype=dtype)
47
+ before_params = pipe.unet.parameters()
48
+ pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name)
49
+ pipe.unet.set_adapter(adapter_name)
50
+ after_params = pipe.unet.parameters()
51
+
52
+ if os.path.exists(text_encoder_sub_dir):
53
+ pipe.text_encoder = PeftModel.from_pretrained(pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name)
54
 
55
+ if dtype in (torch.float16, torch.bfloat16):
56
+ pipe.unet.half()
57
+ pipe.text_encoder.half()
58
 
59
+ return pipe
60
+
61
+ def long_prompt_encoder(prompt, tokenizer, text_encoder, max_length=77):
62
+ tokens = tokenizer(prompt, truncation=False, return_tensors="pt")["input_ids"]
63
+ part_s = [tokens[:, i:i + max_length] for i in range(0, tokens.shape[1], max_length)]
64
+ with torch.no_grad():
65
+ embeds = [text_encoder(part.to(text_encoder.device))[0] for part in part_s]
66
+ return torch.cat(embeds, dim=1)
67
+
68
+ def align_embeddings(prompt_embeds, negative_prompt_embeds):
69
+ max_length = max(prompt_embeds.shape[1], negative_prompt_embeds.shape[1])
70
+ return torch.nn.functional.pad(prompt_embeds, (0, 0, 0, max_length - prompt_embeds.shape[1])), \
71
+ torch.nn.functional.pad(negative_prompt_embeds, (0, 0, 0, max_length - negative_prompt_embeds.shape[1]))
72
+
73
+ def preprocess_image(image, target_width, target_height, resize_to_224=False):
74
+ if isinstance(image, np.ndarray):
75
+ image = Image.fromarray(image)
76
+
77
+ # Если resize_to_224=True, изменяем размер до 224x224
78
+ if resize_to_224:
79
+ image = image.resize((224, 224), Image.LANCZOS)
80
+ else:
81
+ image = image.resize((target_width, target_height), Image.LANCZOS)
82
+
83
+ image = np.array(image).astype(np.float32) / 255.0 # Нормализация [0, 1]
84
+ image = image[None].transpose(0, 3, 1, 2) # Преобразуем в (batch, channels, height, width)
85
+ image = torch.from_numpy(image).to(device)
86
+ return image
87
+
88
+ def get_depth_map(image, depth_estimator):
89
+ # Преобразуем изображение в PIL, если это необходимо
90
+ if isinstance(image, np.ndarray):
91
+ image = Image.fromarray(image)
92
+ elif isinstance(image, torch.Tensor):
93
+ image = Image.fromarray(image.cpu().numpy())
94
+ # Получаем карту глубины
95
+ depth_map = depth_estimator(image)["depth"]
96
+ depth_map = np.array(depth_map)
97
+ depth_map = depth_map[:, :, None] # Добавляем третье измерение
98
+ depth_map = np.concatenate([depth_map, depth_map, depth_map], axis=2) # Преобразуем в 3 канала
99
+ depth_map = torch.from_numpy(depth_map).float() / 255.0 # Нормализация [0, 1]
100
+ depth_map = depth_map.permute(2, 0, 1) # Меняем порядок осей (C, H, W)
101
+ return depth_map
102
+
103
+ pipe_default = get_lora_sd_pipeline(lora_dir='lora_man_animestyle', base_model_name_or_path=model_default, dtype=torch_dtype).to(device)
104
+
105
+ # ----------------------------------------------------------------------------------------------------------------------------------------------------
106
  def infer(
107
+ prompt,
108
+ negative_prompt,
109
+ model=model_default,
110
+ width=512,
111
+ height=512,
112
+ num_inference_steps=50,
113
+ seed=4,
114
+ guidance_scale=7.5,
115
+ lora_scale=0.7,
116
+ use_control_net=False, # Параметр для включения ControlNet
117
+ control_mode=None, # Параметр для выбора режима ControlNet
118
+ strength_cn=0.5, # Коэфф. зашумления ControlNet
119
+ control_strength=0.5, # Сила влияния ControlNet
120
+ cn_source_image=None, # Исходное изображение ControlNet
121
+ control_image=None, # Контрольное изображение ControlNet
122
+ use_ip_adapter=False, # Параметр для включения IP_adapter
123
+ ip_adapter_mode=None, # Параметр для выбора режима IP_adapter
124
+ strength_ip=0.5, # Коэфф. зашумления IP_adapter
125
+ ip_adapter_strength=0.5,# Сила влияния IP_adapter
126
+ controlnet_conditioning_scale=0.5, # Сила влияния ControlNet
127
+ ip_source_image=None, # Исходное изображение IP_adapter
128
+ ip_adapter_image=None, # Контрольное изображение IP_adapter
129
+ progress=gr.Progress(track_tqdm=True)
130
  ):
131
+
132
+ # Генерация изображений с Ip_Adapter ------------------------------------------------------------------------------------------------------------------
133
+ if use_ip_adapter and ip_source_image is not None and ip_adapter_image is not None:
134
+
135
+ if ip_adapter_mode == "pose_estimation":
136
+
137
+ print('ip_adapter_mode = ', ip_adapter_mode)
138
+
139
+ # Инициализация ControlNet
140
+ controlnet_model_path = "lllyasviel/sd-controlnet-openpose"
141
+ controlnet = ControlNetModel.from_pretrained(controlnet_model_path, torch_dtype=torch_dtype)
142
+
143
+ generator = torch.Generator(device).manual_seed(seed)
144
+
145
+ pipe_ip_adapter = StableDiffusionControlNetPipeline.from_pretrained(
146
+ model_default,
147
+ controlnet=controlnet,
148
+ torch_dtype=torch_dtype
149
+ ).to(device)
150
+
151
+ # Загрузка IP-Adapter
152
+ pipe_ip_adapter.load_ip_adapter(IP_ADAPTER, subfolder="models", weight_name=WEIGHT_NAME_plus)
153
+ pipe_ip_adapter.set_ip_adapter_scale(ip_adapter_strength)
154
+
155
+ # Преобразование изображений для IP-Adapter (размер 224x224)
156
+ ip_source_image = preprocess_image(ip_source_image, width, height, resize_to_224=True)
157
+ ip_adapter_image = preprocess_image(ip_adapter_image, width, height, resize_to_224=True)
158
+
159
+ # Создаём пайплайн IP_adapter с LoRA, если он ещё не создан
160
+ if not hasattr(pipe_ip_adapter, 'lora_loaded') or not pipe_ip_adapter.lora_loaded:
161
+ # Загружаем LoRA для UNet
162
+ pipe_ip_adapter.unet = PeftModel.from_pretrained(
163
+ pipe_ip_adapter.unet,
164
+ 'lora_man_animestyle/unet',
165
+ adapter_name="default"
166
+ )
167
+ pipe_ip_adapter.unet.set_adapter("default")
168
+
169
+ # Загружаем LoRA для Text Encoder, если она существует
170
+ text_encoder_lora_path = 'lora_man_animestyle/text_encoder'
171
+ if os.path.exists(text_encoder_lora_path):
172
+ pipe_ip_adapter.text_encoder = PeftModel.from_pretrained(
173
+ pipe_ip_adapter.text_encoder,
174
+ text_encoder_lora_path,
175
+ adapter_name="default"
176
+ )
177
+ pipe_ip_adapter.text_encoder.set_adapter("default")
178
+
179
+ # Объединяем LoRA с основной моделью
180
+ pipe_ip_adapter.fuse_lora(lora_scale=lora_scale)
181
+ pipe_ip_adapter.lora_loaded = True # Помечаем, что LoRA загружена
182
+
183
+ # Убедимся, что параметры имеют тип float
184
+ ip_adapter_strength = float(ip_adapter_strength)
185
+ controlnet_conditioning_scale = float(controlnet_conditioning_scale)
186
+
187
+ # Используем IP-Adapter с LoRA
188
+ prompt_embeds = long_prompt_encoder(prompt, pipe_ip_adapter.tokenizer, pipe_ip_adapter.text_encoder)
189
+ negative_prompt_embeds = long_prompt_encoder(negative_prompt, pipe_ip_adapter.tokenizer, pipe_ip_adapter.text_encoder)
190
+ prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
191
+ image = pipe_ip_adapter(
192
+ prompt_embeds=prompt_embeds,
193
+ negative_prompt_embeds=negative_prompt_embeds,
194
+ image=ip_adapter_image,
195
+ ip_adapter_image=ip_source_image,
196
+ strength=strength_ip,
197
+ width=width,
198
+ height=height,
199
+ num_inference_steps=num_inference_steps,
200
+ guidance_scale=guidance_scale,
201
+ controlnet_conditioning_scale=controlnet_conditioning_scale,
202
+ generator=generator,
203
+ ).images[0]
204
+ else:
205
+
206
+ if ip_adapter_mode == "edge_detection":
207
+
208
+ print('ip_adapter_mode = ', ip_adapter_mode)
209
+
210
+ # Инициализация ControlNet
211
+ controlnet_model_path = "lllyasviel/control_v11f1p_sd15_depth"
212
+ controlnet = ControlNetModel.from_pretrained(controlnet_model_path, torch_dtype=torch_dtype)
213
+
214
+ generator = torch.Generator(device).manual_seed(seed)
215
+
216
+ pipe_ip_adapter = StableDiffusionControlNetPipeline.from_pretrained(
217
+ model_default,
218
+ controlnet=controlnet,
219
+ torch_dtype=torch_dtype
220
+ ).to(device)
221
+
222
+ # Загрузка IP-Adapter
223
+ #pipe_ip_adapter.load_ip_adapter(IP_ADAPTER, subfolder="models", weight_name=WEIGHT_NAME_face)
224
+ pipe_ip_adapter.load_ip_adapter(IP_ADAPTER, subfolder="models", weight_name=WEIGHT_NAME_plus)
225
+ pipe_ip_adapter.set_ip_adapter_scale(ip_adapter_strength)
226
+
227
+ # Преобразование изображений для IP-Adapter (размер 224x224)
228
+ ip_source_image = preprocess_image(ip_source_image, width, height, resize_to_224=True)
229
+ ip_adapter_image = preprocess_image(ip_adapter_image, width, height, resize_to_224=True)
230
+
231
+ # Создаём пайплайн IP_adapter с LoRA, если он ещё не создан
232
+ if not hasattr(pipe_ip_adapter, 'lora_loaded') or not pipe_ip_adapter.lora_loaded:
233
+ # Загружаем LoRA для UNet
234
+ pipe_ip_adapter.unet = PeftModel.from_pretrained(
235
+ pipe_ip_adapter.unet,
236
+ 'lora_man_animestyle/unet',
237
+ adapter_name="default"
238
+ )
239
+ pipe_ip_adapter.unet.set_adapter("default")
240
+
241
+ # Загружаем LoRA для Text Encoder, если она существует
242
+ text_encoder_lora_path = 'lora_man_animestyle/text_encoder'
243
+ if os.path.exists(text_encoder_lora_path):
244
+ pipe_ip_adapter.text_encoder = PeftModel.from_pretrained(
245
+ pipe_ip_adapter.text_encoder,
246
+ text_encoder_lora_path,
247
+ adapter_name="default"
248
+ )
249
+ pipe_ip_adapter.text_encoder.set_adapter("default")
250
+
251
+ # Объединяем LoRA с основной моделью
252
+ pipe_ip_adapter.fuse_lora(lora_scale=lora_scale)
253
+ pipe_ip_adapter.lora_loaded = True # Помечаем, что LoRA загружена
254
+
255
+ # Убедимся, что параметры имеют тип float
256
+ ip_adapter_strength = float(ip_adapter_strength)
257
+ controlnet_conditioning_scale = float(controlnet_conditioning_scale)
258
+
259
+ # Используем IP-Adapter с LoRA
260
+ prompt_embeds = long_prompt_encoder(prompt, pipe_ip_adapter.tokenizer, pipe_ip_adapter.text_encoder)
261
+ negative_prompt_embeds = long_prompt_encoder(negative_prompt, pipe_ip_adapter.tokenizer, pipe_ip_adapter.text_encoder)
262
+ prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
263
+
264
+ # scale = { # по умолчанию в остальных блоках везде 0.
265
+ # "down": {
266
+ # "block_0": [0.0, 1.0],
267
+ # "block_1": [0.0, 1.0],
268
+ # },
269
+ # "up": {
270
+ # "block_0": [0.0, 1.0, 0.0],
271
+ # "block_1": [0.0, 1.0, 0.0],
272
+ # },
273
+ # }
274
+ # scale = {
275
+ # "down": {"block_2": [0.0, 1.0]},
276
+ # "up": {"block_0": [0.0, 1.0, 0.0]},
277
+ # }
278
+ # pipe_ip_adapter.set_ip_adapter_scale(scale)
279
+
280
+ image = pipe_ip_adapter(
281
+ prompt_embeds=prompt_embeds,
282
+ negative_prompt_embeds=negative_prompt_embeds,
283
+ image=ip_adapter_image,
284
+ ip_adapter_image=ip_source_image,
285
+ strength=strength_ip,
286
+ width=width,
287
+ height=height,
288
+ num_inference_steps=num_inference_steps,
289
+ guidance_scale=guidance_scale,
290
+ controlnet_conditioning_scale=controlnet_conditioning_scale,
291
+ generator=generator,
292
+ ).images[0]
293
+ else:
294
+
295
+ if ip_adapter_mode == "depth_map":
296
+
297
+ print('ip_adapter_mode = ', ip_adapter_mode)
298
+
299
+ # Убедимся, что параметры имеют тип float
300
+ controlnet_conditioning_scale = float(controlnet_conditioning_scale)
301
+
302
+ # Инициализация ControlNet
303
+ controlnet_model_path = "lllyasviel/control_v11f1p_sd15_depth"
304
+ controlnet = ControlNetModel.from_pretrained(controlnet_model_path, torch_dtype=torch_dtype)
305
+
306
+ generator = torch.Generator(device).manual_seed(seed)
307
+
308
+ # Преобразование изображений для IP-Adapter (размер 224x224)
309
+ ip_source_image = preprocess_image(ip_source_image, width, height, resize_to_224=True)
310
+ ip_adapter_image = preprocess_image(ip_adapter_image, width, height, resize_to_224=True)
311
+
312
+ pipe_ip_adapter = StableDiffusionControlNetPipeline.from_pretrained(
313
+ model_default,
314
+ controlnet=controlnet,
315
+ torch_dtype=torch_dtype
316
+ ).to(device)
317
+ pipe_ip_adapter.load_ip_adapter(IP_ADAPTER, subfolder="models", weight_name=WEIGHT_NAME)
318
+
319
+ pipe_ip_adapter.set_ip_adapter_scale(ip_adapter_strength)
320
+ image = pipe_ip_adapter(
321
+ prompt=prompt,
322
+ negative_prompt=negative_prompt,
323
+ image=ip_source_image,
324
+ width=width,
325
+ height=height,
326
+ ip_adapter_image=ip_adapter_image,
327
+ num_inference_steps=num_inference_steps,
328
+ strength=strength_ip,
329
+ guidance_scale=guidance_scale,
330
+ controlnet_conditioning_scale=controlnet_conditioning_scale,
331
+ generator=generator,
332
+ ).images[0]
333
+ else:
334
+
335
+ if ip_adapter_mode == "face_model":
336
+
337
+ print('ip_adapter_mode = ', ip_adapter_mode)
338
+
339
+ # Преобразование изображений для IP-Adapter (размер 224x224)
340
+ ip_source_image = preprocess_image(ip_source_image, width, height, resize_to_224=True)
341
+ ip_adapter_image = preprocess_image(ip_adapter_image, width, height, resize_to_224=True)
342
+
343
+ pipe_ip_adapter = StableDiffusionPipeline.from_pretrained(
344
+ model_default,
345
+ torch_dtype=torch_dtype,
346
+ ).to(device)
347
+
348
+ pipe_ip_adapter.scheduler = DDIMScheduler.from_config(pipe_ip_adapter.scheduler.config)
349
+ pipe_ip_adapter.load_ip_adapter(IP_ADAPTER, subfolder="models", weight_name=WEIGHT_NAME_face)
350
+
351
+ generator = torch.Generator(device).manual_seed(seed)
352
+
353
+ pipe_ip_adapter.set_ip_adapter_scale(ip_adapter_strength)
354
+ image = pipe_ip_adapter(
355
+ prompt=prompt,
356
+ negative_prompt=negative_prompt,
357
+ ip_adapter_image=ip_adapter_image,
358
+ width=width,
359
+ height=height,
360
+ guidance_scale=guidance_scale,
361
+ num_inference_steps=num_inference_steps,
362
+ generator=generator,
363
+ ).images[0]
364
+ else:
365
+ # Генерация изображений с ControlNet ----------------------------------------------------------------------------------------------------------------
366
+
367
+ if use_control_net and control_image is not None and cn_source_image is not None:
368
+
369
+ if control_mode == "pose_estimation":
370
+
371
+ print('control_mode = ', control_mode)
372
+
373
+ # Инициализация ControlNet
374
+ controlnet_model_path = "lllyasviel/sd-controlnet-openpose"
375
+ controlnet = ControlNetModel.from_pretrained(controlnet_model_path, torch_dtype=torch_dtype)
376
+
377
+ generator = torch.Generator(device).manual_seed(seed)
378
+
379
+ pipe_controlnet = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
380
+ model_default,
381
+ controlnet=controlnet,
382
+ torch_dtype=torch_dtype
383
+ ).to(device)
384
+
385
+ # Преобразуем изображения
386
+ cn_source_image = preprocess_image(cn_source_image, width, height)
387
+ control_image = preprocess_image(control_image, width, height)
388
+
389
+ # Создаём пайплайн ControlNet с LoRA, если он ещё не создан
390
+ if not hasattr(pipe_controlnet, 'lora_loaded') or not pipe_controlnet.lora_loaded:
391
+ # Загружаем LoRA для UNet
392
+ pipe_controlnet.unet = PeftModel.from_pretrained(
393
+ pipe_controlnet.unet,
394
+ 'lora_man_animestyle/unet',
395
+ adapter_name="default"
396
+ )
397
+ pipe_controlnet.unet.set_adapter("default")
398
+
399
+ # Загружаем LoRA для Text Encoder, если она существует
400
+ text_encoder_lora_path = 'lora_man_animestyle/text_encoder'
401
+ if os.path.exists(text_encoder_lora_path):
402
+ pipe_controlnet.text_encoder = PeftModel.from_pretrained(
403
+ pipe_controlnet.text_encoder,
404
+ text_encoder_lora_path,
405
+ adapter_name="default"
406
+ )
407
+ pipe_controlnet.text_encoder.set_adapter("default")
408
+
409
+ # Объединяем LoRA с основной моделью
410
+ pipe_controlnet.fuse_lora(lora_scale=lora_scale)
411
+ pipe_controlnet.lora_loaded = True # Помечаем, что LoRA загружена
412
+
413
+ # Убедимся, что control_strength имеет тип float
414
+ control_strength = float(control_strength)
415
+ #strength_sn = float(strength_sn)
416
+
417
+ # Используем ControlNet с LoRA
418
+ prompt_embeds = long_prompt_encoder(prompt, pipe_controlnet.tokenizer, pipe_controlnet.text_encoder)
419
+ negative_prompt_embeds = long_prompt_encoder(negative_prompt, pipe_controlnet.tokenizer, pipe_controlnet.text_encoder)
420
+ prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
421
+ image = pipe_controlnet(
422
+ prompt_embeds=prompt_embeds,
423
+ negative_prompt_embeds=negative_prompt_embeds,
424
+ image=cn_source_image,
425
+ control_image=control_image,
426
+ strength=strength_cn,
427
+ width=width,
428
+ height=height,
429
+ num_inference_steps=num_inference_steps,
430
+ guidance_scale=guidance_scale,
431
+ controlnet_conditioning_scale=control_strength,
432
+ generator=generator
433
+ ).images[0]
434
+ else:
435
+
436
+ if control_mode == "edge_detection":
437
+
438
+ print('control_mode = ', control_mode)
439
+
440
+ controlnet_model_path = "lllyasviel/sd-controlnet-canny"
441
+ controlnet = ControlNetModel.from_pretrained(controlnet_model_path, torch_dtype=torch_dtype, use_safetensors=True)
442
+
443
+ generator = torch.Generator(device).manual_seed(seed)
444
+
445
+ pipe_controlnet = StableDiffusionControlNetPipeline.from_pretrained(
446
+ model_default,
447
+ controlnet=controlnet,
448
+ torch_dtype=torch_dtype,
449
+ use_safetensors=True
450
+ ).to(device)
451
+
452
+ pipe_controlnet.scheduler = UniPCMultistepScheduler.from_config(pipe_controlnet.scheduler.config)
453
+
454
+ # Преобразуем изображения
455
+ cn_source_image = preprocess_image(cn_source_image, width, height)
456
+ control_image = preprocess_image(control_image, width, height)
457
+
458
+ image = pipe_controlnet(
459
+ prompt=prompt,
460
+ negative_prompt=negative_prompt,
461
+ image=cn_source_image,
462
+ control_image=control_image,
463
+ strength=strength_cn,
464
+ width=width,
465
+ height=height,
466
+ num_inference_steps=num_inference_steps,
467
+ guidance_scale=guidance_scale,
468
+ controlnet_conditioning_scale=control_strength,
469
+ generator=generator
470
+ ).images[0]
471
+ else:
472
+
473
+ if control_mode == "depth_map":
474
+
475
+ print('control_mode = ', control_mode)
476
+
477
+ depth_estimator = pipeline("depth-estimation")
478
+ depth_map = get_depth_map(control_image, depth_estimator).unsqueeze(0).half().to(device)
479
+
480
+ controlnet_model_path = "lllyasviel/control_v11f1p_sd15_depth"
481
+ controlnet = ControlNetModel.from_pretrained(controlnet_model_path, torch_dtype=torch_dtype, use_safetensors=True)
482
+
483
+ generator = torch.Generator(device).manual_seed(seed)
484
+
485
+ pipe_controlnet = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
486
+ model_default,
487
+ controlnet=controlnet,
488
+ torch_dtype=torch_dtype,
489
+ use_safetensors=True
490
+ ).to(device)
491
+
492
+ pipe_controlnet.scheduler = UniPCMultistepScheduler.from_config(pipe_controlnet.scheduler.config)
493
 
494
+ image = pipe_controlnet(
495
+ prompt=prompt,
496
+ negative_prompt=negative_prompt,
497
+ image=control_image,
498
+ control_image=depth_map,
499
+ width=width,
500
+ height=height,
501
+ num_inference_steps=num_inference_steps,
502
+ guidance_scale=guidance_scale,
503
+ generator=generator
504
+ ).images[0]
505
+ else:
506
+ # Генерация изображений с LORA без ControlNet и IP_Adapter ---------------------------------------------------------------------------------------------
507
+
508
+ # Инициализация ControlNet
509
+ controlnet_model_path = "lllyasviel/sd-controlnet-openpose"
510
+ controlnet = ControlNetModel.from_pretrained(controlnet_model_path, torch_dtype=torch_dtype)
511
+
512
+ generator = torch.Generator(device).manual_seed(seed)
513
 
514
+ if model != model_default:
515
+ pipe = StableDiffusionPipeline.from_pretrained(model, torch_dtype=torch_dtype).to(device)
516
+ prompt_embeds = long_prompt_encoder(prompt, pipe.tokenizer, pipe.text_encoder)
517
+ negative_prompt_embeds = long_prompt_encoder(negative_prompt, pipe.tokenizer, pipe.text_encoder)
518
+ prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
519
+ else:
520
+ pipe = pipe_default
521
+ prompt_embeds = long_prompt_encoder(prompt, pipe.tokenizer, pipe.text_encoder)
522
+ negative_prompt_embeds = long_prompt_encoder(negative_prompt, pipe.tokenizer, pipe.text_encoder)
523
+ prompt_embeds, negative_prompt_embeds = align_embeddings(prompt_embeds, negative_prompt_embeds)
524
+ pipe.fuse_lora(lora_scale=lora_scale)
525
+
526
+ params = {
527
+ 'prompt_embeds': prompt_embeds,
528
+ 'negative_prompt_embeds': negative_prompt_embeds,
529
+ 'guidance_scale': guidance_scale,
530
+ 'num_inference_steps': num_inference_steps,
531
+ 'width': width,
532
+ 'height': height,
533
+ 'generator': generator,
534
+ }
535
+
536
+ image = pipe(**params).images[0]
537
+
538
+ return image
539
+ # ---------------------------------------------------------------------------------------------------------------------------------------------
540
 
541
  examples = [
542
+ "A young man in anime style. The image is characterized by high definition and resolution. Handsome, thoughtful man, attentive eyes. The man is depicted in the foreground, close-up or in the middle. High-quality images of the face, eyes, nose, lips, hands and clothes. The background and background are blurred and indistinct. The play of light and shadow is visible on the face and clothes.",
543
+ "A man runs through the park against the background of trees. The man's entire figure, face, arms and legs are visible. Anime style. The best quality.",
544
+ "The smiling man. His face and hands are visible. Anime style. The best quality.",
545
+ "The smiling girl. Anime style. Best quality, high quality.",
546
+ "lego batman and robin. Rich and vibrant colors.",
547
+ "A photo of Pushkin as a hockey player in uniform with a stick, playing hockey on the ice arena in the NHL and scoring a goal.",
548
+ ]
549
 
550
  examples_negative = [
551
+ "Blurred details, low resolution, bad anatomy, no face visible, poor image of a man's face, poor quality, artifacts, black and white image.",
552
+ "Monochrome, lowres, bad anatomy, worst quality, low quality",
553
+ "lowres, bad anatomy, worst quality, low quality, black and white image.",
554
  ]
555
 
556
  css = """
 
565
  "CompVis/stable-diffusion-v1-4",
566
  ]
567
 
568
+ # -------------------------------------------------------------------------------------------------------------------------------------------------
569
  with gr.Blocks(css=css) as demo:
 
570
  with gr.Column(elem_id="col-container"):
571
  gr.Markdown(" # Text-to-Image Gradio Template from V. Gorsky")
572
 
573
+ with gr.Row():
574
+ model = gr.Dropdown(
575
+ label="Model Selection",
576
+ choices=available_models,
577
+ value="stable-diffusion-v1-5/stable-diffusion-v1-5",
578
+ interactive=True
579
+ )
580
+
581
+ prompt = gr.Textbox(
582
  label="Prompt",
 
583
  max_lines=1,
584
  placeholder="Enter your prompt",
 
585
  )
586
 
587
+ negative_prompt = gr.Textbox(
588
  label="Negative prompt",
589
  max_lines=1,
590
  placeholder="Enter a negative prompt",
 
591
  )
592
+
593
+ with gr.Row():
594
+ lora_scale = gr.Slider(
595
+ label="LoRA scale",
596
+ minimum=0.0,
597
+ maximum=1.0,
598
+ step=0.01,
599
+ value=0.7,
600
+ )
601
+
602
+ with gr.Row():
603
+ guidance_scale = gr.Slider(
604
+ label="Guidance scale",
605
+ minimum=0.0,
606
+ maximum=10.0,
607
+ step=0.01,
608
+ value=7.5,
609
+ )
610
 
611
+ with gr.Row():
612
+ seed = gr.Slider(
613
  label="Seed",
614
  minimum=0,
615
  maximum=MAX_SEED,
616
  step=1,
617
+ value=4,
618
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
619
 
620
+ with gr.Row():
621
+ num_inference_steps = gr.Slider(
622
+ label="Number of inference steps",
623
+ minimum=1,
624
+ maximum=100,
625
+ step=1,
626
+ value=50,
627
+ )
628
+
629
  with gr.Accordion("Advanced Settings", open=False):
630
  with gr.Row():
631
  width = gr.Slider(
 
633
  minimum=256,
634
  maximum=MAX_IMAGE_SIZE,
635
  step=32,
636
+ value=512,
637
  )
638
+
639
+ with gr.Row():
640
  height = gr.Slider(
641
  label="Height",
642
  minimum=256,
643
  maximum=MAX_IMAGE_SIZE,
644
  step=32,
645
+ value=512,
646
  )
647
 
648
+ # ControlNet -----------------------------------------------------------------------------------------------
649
+ with gr.Blocks():
650
+ with gr.Row():
651
+ use_control_net = gr.Checkbox(
652
+ label="Use ControlNet",
653
+ value=False,
654
+ )
 
655
 
656
+ with gr.Column(visible=False) as control_net_options:
657
+ strength_cn = gr.Slider(
658
+ label="Strength",
659
+ minimum=0.0,
660
+ maximum=1.0,
661
+ value=0.5,
662
+ step=0.01,
663
+ interactive=True,
664
+ )
665
+
666
+ control_strength = gr.Slider(
667
+ label="Control Strength",
668
+ minimum=0.0,
669
+ maximum=1.0,
670
+ value=0.5,
671
+ step=0.01,
672
+ interactive=True,
673
+ )
674
+
675
+ control_mode = gr.Dropdown(
676
+ label="Control Mode",
677
+ choices=[
678
+ "pose_estimation",
679
+ "edge_detection",
680
+ "depth_map",
681
+ ],
682
+ value="pose_estimation",
683
+ interactive=True,
684
+ )
685
 
686
+ cn_source_image = gr.Image(label="Upload Source Image")
687
+
688
+ control_image = gr.Image(label="Upload Control Net Image")
689
+
690
+ use_control_net.change(
691
+ fn=lambda x: gr.update(visible=x),
692
+ inputs=use_control_net,
693
+ outputs=control_net_options
694
+ )
695
+
696
+ # IP_Adapter ------------------------------------------------------------------------------------------------
697
+ with gr.Blocks():
698
+ with gr.Row():
699
+ use_ip_adapter = gr.Checkbox(
700
+ label="Use IP_Adapter",
701
+ value=False,
702
+ )
703
+
704
+ with gr.Column(visible=False) as ip_adapter_options:
705
+ strength_ip = gr.Slider(
706
+ label="Strength",
707
+ minimum=0.0,
708
+ maximum=1.0,
709
+ value=0.5,
710
+ step=0.01,
711
+ interactive=True,
712
+ )
713
+
714
+ ip_adapter_strength = gr.Slider(
715
+ label="IP_Adapter Strength",
716
+ minimum=0.0,
717
+ maximum=1.0,
718
+ value=0.5,
719
+ step=0.01,
720
+ interactive=True,
721
+ )
722
+
723
+ controlnet_conditioning_scale = gr.Slider(
724
+ label="Controlnet conditioning scale",
725
+ minimum=0.0,
726
+ maximum=1.0,
727
+ value=0.5,
728
+ step=0.01,
729
+ interactive=True,
730
+ )
731
+
732
+ ip_adapter_mode = gr.Dropdown(
733
+ label="Ip_Adapter Mode",
734
+ choices=[
735
+ "pose_estimation",
736
+ "edge_detection",
737
+ "depth_map",
738
+ "face_model"
739
+ ],
740
+ value="pose_estimation",
741
+ interactive=True,
742
+ )
743
+
744
+ ip_source_image = gr.Image(label="Upload Source Image")
745
+
746
+ ip_adapter_image = gr.Image(label="Upload IP_Adapter Image")
747
+
748
+ use_ip_adapter.change(
749
+ fn=lambda x: gr.update(visible=x),
750
+ inputs=use_ip_adapter,
751
+ outputs=ip_adapter_options
752
+ )
753
+ # ---------------------------------------------------------------------------------------------------------
754
 
755
+ gr.Examples(examples=examples, inputs=[prompt], label="Examples for prompt:")
756
+ gr.Examples(examples=examples_negative, inputs=[negative_prompt], label="Examples for negative prompt:")
757
+
758
+ run_button = gr.Button("Run", scale=1, variant="primary")
759
+ result = gr.Image(label="Result", show_label=False)
760
+
761
  gr.on(
762
  triggers=[run_button.click, prompt.submit],
763
  fn=infer,
764
  inputs=[
 
765
  prompt,
766
  negative_prompt,
767
+ model,
768
  width,
769
  height,
 
770
  num_inference_steps,
771
+ seed,
772
+ guidance_scale,
773
+ lora_scale,
774
+ use_control_net, # Параметр для включения ControlNet
775
+ control_mode, # Параметр для выбора режима ControlNet
776
+ strength_cn, # Коэфф. зашумления ControlNet
777
+ control_strength, # Сила влияния ControlNet
778
+ cn_source_image, # Исходное изображение ControlNet
779
+ control_image, # Контрольное изображение ControlNet
780
+ use_ip_adapter, # Параметр для в��лючения IP_adapter
781
+ ip_adapter_mode, # Параметр для выбора режима IP_adapter
782
+ strength_ip, # Коэфф. зашумления IP_adapter
783
+ ip_adapter_strength,# Сила влияния IP_adapter
784
+ controlnet_conditioning_scale, # Сила влияния ControlNet
785
+ ip_source_image, # Исходное изображение IP_adapter
786
+ ip_adapter_image, # Контрольное изображение IP_adapter
787
  ],
788
+ outputs=[result],
789
  )
790
 
791
  if __name__ == "__main__":
792
  demo.launch()