aiqtech commited on
Commit
90b7eda
ยท
verified ยท
1 Parent(s): ed12c55

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +245 -201
app.py CHANGED
@@ -1,54 +1,40 @@
1
- import spaces
2
  import random
 
3
  import torch
4
- import cv2
5
- import insightface
6
  import gradio as gr
7
- import numpy as np
8
- import os
 
 
9
  from huggingface_hub import snapshot_download, login
 
10
  from transformers import CLIPVisionModelWithProjection, CLIPImageProcessor
11
- from kolors.pipelines.pipeline_stable_diffusion_xl_chatglm_256_ipadapter_FaceID import StableDiffusionXLPipeline
12
  from kolors.models.modeling_chatglm import ChatGLMModel
13
  from kolors.models.tokenization_chatglm import ChatGLMTokenizer
14
- from diffusers import AutoencoderKL
15
  from kolors.models.unet_2d_condition import UNet2DConditionModel
16
- from diffusers import EulerDiscreteScheduler
17
- from PIL import Image
18
- from insightface.app import FaceAnalysis
19
 
20
- # ---------------------------
21
- # Runtime / device settings
22
- # ---------------------------
23
- HF_TOKEN = os.getenv("HF_TOKEN")
24
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
25
- DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32
26
 
 
 
27
  if HF_TOKEN:
28
  login(token=HF_TOKEN)
29
  print("Successfully logged in to Hugging Face Hub")
30
 
 
 
 
 
 
 
31
  print("Downloading models...")
32
  ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors", token=HF_TOKEN)
33
- ckpt_dir_faceid = snapshot_download(repo_id="Kwai-Kolors/Kolors-IP-Adapter-FaceID-Plus", token=HF_TOKEN)
34
-
35
- print("Loading models on CPU first...")
36
-
37
- # ---------------------------
38
- # ChatGLM tokenizer pad fix
39
- # ---------------------------
40
- original_chatglm_pad = ChatGLMTokenizer._pad if hasattr(ChatGLMTokenizer, '_pad') else None
41
- def fixed_pad(self, *args, **kwargs):
42
- kwargs.pop('padding_side', None)
43
- if original_chatglm_pad:
44
- return original_chatglm_pad(self, *args, **kwargs)
45
- else:
46
- return super(ChatGLMTokenizer, self)._pad(*args, **kwargs)
47
- ChatGLMTokenizer._pad = fixed_pad
48
-
49
- # ---------------------------
50
- # Load Kolors components (dtype fp16 on CUDA, fp32 on CPU)
51
- # ---------------------------
52
  text_encoder = ChatGLMModel.from_pretrained(
53
  f"{ckpt_dir}/text_encoder",
54
  torch_dtype=DTYPE,
@@ -60,234 +46,292 @@ tokenizer = ChatGLMTokenizer.from_pretrained(
60
  )
61
  vae = AutoencoderKL.from_pretrained(
62
  f"{ckpt_dir}/vae",
 
63
  torch_dtype=DTYPE
64
  )
65
  scheduler = EulerDiscreteScheduler.from_pretrained(f"{ckpt_dir}/scheduler")
66
  unet = UNet2DConditionModel.from_pretrained(
67
  f"{ckpt_dir}/unet",
 
68
  torch_dtype=DTYPE
69
  )
70
 
71
- # CLIP image encoder + processor
72
- clip_image_encoder = CLIPVisionModelWithProjection.from_pretrained(
73
- "openai/clip-vit-large-patch14-336",
74
- torch_dtype=DTYPE,
75
- use_safetensors=True
76
- )
77
- clip_image_processor = CLIPImageProcessor.from_pretrained(
78
- "openai/clip-vit-large-patch14-336"
79
- )
80
 
81
- # Create pipeline (initially on CPU to be safe with memory)
 
 
 
82
  pipe = StableDiffusionXLPipeline(
83
  vae=vae,
84
  text_encoder=text_encoder,
85
  tokenizer=tokenizer,
86
  unet=unet,
87
  scheduler=scheduler,
88
- face_clip_encoder=clip_image_encoder,
89
- face_clip_processor=clip_image_processor,
90
- force_zeros_for_empty_prompt=False,
91
  )
92
 
93
- print("Models loaded successfully!")
94
-
95
- # ---------------------------
96
- # InsightFace helper (force CPU provider to avoid CUDA init errors)
97
- # ---------------------------
98
- class FaceInfoGenerator:
99
- def __init__(self, root_dir: str = "./.insightface/"):
100
- providers = ["CPUExecutionProvider"] # GPU ์—†๋Š” ํ™˜๊ฒฝ์—์„œ ์•ˆ์ „
101
- self.app = FaceAnalysis(
102
- name="antelopev2",
103
- root=root_dir,
104
- providers=providers
105
- )
106
- self.app.prepare(ctx_id=0, det_size=(640, 640))
107
-
108
- def get_faceinfo_one_img(self, face_image: Image.Image):
109
- if face_image is None:
110
- return None
111
- # PIL RGB -> OpenCV BGR
112
- face_info = self.app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))
113
- if len(face_info) == 0:
114
- return None
115
- # Largest face
116
- face_info = sorted(
117
- face_info,
118
- key=lambda x: (x["bbox"][2] - x["bbox"][0]) * (x["bbox"][3] - x["bbox"][1])
119
- )[-1]
120
- return face_info
121
-
122
- def face_bbox_to_square(bbox):
123
- l, t, r, b = bbox
124
- cent_x = (l + r) / 2
125
- cent_y = (t + b) / 2
126
- w, h = r - l, b - t
127
- rad = max(w, h) / 2
128
- return [cent_x - rad, cent_y - rad, cent_x + rad, cent_y + rad]
129
 
130
  MAX_SEED = np.iinfo(np.int32).max
131
- face_info_generator = FaceInfoGenerator()
132
-
133
- # ---------------------------
134
- # Inference function
135
- # - No @spaces.GPU decorator (GPU ์—†์„ ๋•Œ ์ถฉ๋Œ ๋ฐฉ์ง€)
136
- # - Autocast only on CUDA
137
- # ---------------------------
138
- def infer(
139
- prompt,
140
- image=None,
141
- negative_prompt="low quality, blurry, distorted",
142
- seed=66,
143
- randomize_seed=False,
144
- guidance_scale=5.0,
145
- num_inference_steps=25
146
- ):
147
- if image is None:
148
- gr.Warning("Please upload an image with a face.")
149
- return None, 0
150
 
151
- # Detect face (InsightFace on CPU)
152
- face_info = face_info_generator.get_faceinfo_one_img(image)
153
- if face_info is None:
154
- raise gr.Error("No face detected. Please upload an image with a clear face.")
155
 
156
- # Prepare crop for IP-Adapter FaceID
157
- face_bbox_square = face_bbox_to_square(face_info["bbox"])
158
- crop_image = image.crop(face_bbox_square).resize((336, 336))
159
- crop_image = [crop_image] # pipeline expects list
160
- face_embeds = torch.from_numpy(np.array([face_info["embedding"]]))
161
 
162
- # Device move
163
- device = torch.device(DEVICE)
164
- global pipe
165
 
166
- # Move modules to device with proper dtype
167
- pipe.vae = pipe.vae.to(device, dtype=DTYPE)
168
- pipe.text_encoder = pipe.text_encoder.to(device, dtype=DTYPE)
169
- pipe.unet = pipe.unet.to(device, dtype=DTYPE)
170
- pipe.face_clip_encoder = pipe.face_clip_encoder.to(device, dtype=DTYPE)
171
- face_embeds = face_embeds.to(device, dtype=DTYPE)
172
 
173
- # Load IP-Adapter weights (FaceID Plus)
174
- pipe.load_ip_adapter_faceid_plus(f"{ckpt_dir_faceid}/ipa-faceid-plus.bin", device=device)
175
- pipe.set_face_fidelity_scale(0.8)
176
 
177
- if randomize_seed:
178
- seed = random.randint(0, MAX_SEED)
179
- generator = torch.Generator(device=device).manual_seed(seed)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
- # Inference: autocast only on CUDA
182
  with torch.no_grad():
183
  if DEVICE == "cuda":
184
  with torch.autocast(device_type="cuda", dtype=torch.float16):
185
  images = pipe(
186
  prompt=prompt,
 
187
  negative_prompt=negative_prompt,
188
- height=1024,
189
- width=1024,
190
  num_inference_steps=int(num_inference_steps),
191
  guidance_scale=float(guidance_scale),
192
  num_images_per_prompt=1,
193
  generator=generator,
194
- face_crop_image=crop_image,
195
- face_insightface_embeds=face_embeds
196
  ).images
197
  else:
198
  images = pipe(
199
  prompt=prompt,
 
200
  negative_prompt=negative_prompt,
201
- height=1024,
202
- width=1024,
203
  num_inference_steps=int(num_inference_steps),
204
  guidance_scale=float(guidance_scale),
205
  num_images_per_prompt=1,
206
  generator=generator,
207
- face_crop_image=crop_image,
208
- face_insightface_embeds=face_embeds
209
  ).images
210
 
211
- result = images[0]
212
 
213
- # Offload back to CPU to free GPU memory
214
- try:
215
- pipe.vae = pipe.vae.to("cpu")
216
- pipe.text_encoder = pipe.text_encoder.to("cpu")
217
- pipe.unet = pipe.unet.to("cpu")
218
- pipe.face_clip_encoder = pipe.face_clip_encoder.to("cpu")
219
- if DEVICE == "cuda":
220
- torch.cuda.empty_cache()
221
- except Exception:
222
- pass
223
 
224
- return result, seed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
 
226
- # If CUDA is available, optionally wrap with spaces.GPU for scheduling
227
  if torch.cuda.is_available():
228
- infer = spaces.GPU(duration=120)(infer)
 
 
 
 
 
 
 
 
 
 
229
 
230
- # ---------------------------
231
- # Gradio UI
232
- # ---------------------------
233
  css = """
234
- footer { visibility: hidden; }
235
- #col-left, #col-right { max-width: 640px; margin: 0 auto; }
236
- .gr-button { max-width: 100%; }
 
 
 
 
 
 
 
237
  """
238
 
239
- with gr.Blocks(theme="soft", css=css) as Kolors:
240
- gr.HTML(
241
- """
242
- <div style='text-align: center;'>
243
- <h1>๐ŸŽจ Kolors Face ID - AI Portrait Generator</h1>
244
- <p>Upload a face photo and create stunning AI portraits!</p>
245
- <div style='display:flex; justify-content:center; gap:12px; margin-top:20px;'>
246
- <a href="https://huggingface.co/spaces/openfree/Best-AI" target="_blank">
247
- <img src="https://img.shields.io/badge/OpenFree-BEST%20AI-blue?style=for-the-badge" alt="OpenFree">
248
- </a>
249
- <a href="https://discord.gg/openfreeai" target="_blank">
250
- <img src="https://img.shields.io/badge/Discord-OpenFree%20AI-purple?style=for-the-badge&logo=discord" alt="Discord">
251
- </a>
252
- </div>
253
- <div style='margin-top:8px;font-size:12px;opacity:.7;'>
254
- Device: {device}, DType: {dtype}
255
- </div>
256
- </div>
257
- """.format(device=DEVICE.upper(), dtype=str(DTYPE).replace("torch.", ""))
258
- )
259
 
260
- with gr.Row():
261
- with gr.Column(elem_id="col-left"):
262
- prompt = gr.Textbox(
263
  label="Prompt",
264
- placeholder="Describe the portrait style you want...",
265
- lines=3,
266
- value="A professional portrait photo, high quality"
 
267
  )
268
- image = gr.Image(label="Upload Face Image", type="pil", height=300)
269
-
270
- with gr.Accordion("Advanced Settings", open=False):
271
- negative_prompt = gr.Textbox(
272
- label="Negative prompt",
273
- value="low quality, blurry, distorted"
 
 
 
 
 
 
274
  )
275
- seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=66)
276
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
277
- guidance_scale = gr.Slider(label="Guidance", minimum=1, maximum=10, step=0.5, value=5.0)
278
- num_inference_steps = gr.Slider(label="Steps", minimum=10, maximum=50, step=5, value=25)
279
 
280
- button = gr.Button("๐ŸŽจ Generate Portrait", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
 
282
- with gr.Column(elem_id="col-right"):
283
- result = gr.Image(label="Generated Portrait")
284
- seed_used = gr.Number(label="Seed Used", precision=0)
 
 
 
 
 
285
 
286
- button.click(
 
287
  fn=infer,
288
- inputs=[prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps],
289
- outputs=[result, seed_used]
 
 
 
 
 
 
 
 
 
 
 
290
  )
291
 
292
- if __name__ == "__main__":
293
- Kolors.queue(max_size=20).launch(debug=True)
 
1
+ import os
2
  import random
3
+ import numpy as np
4
  import torch
 
 
5
  import gradio as gr
6
+ from PIL import Image
7
+
8
+ import spaces
9
+
10
  from huggingface_hub import snapshot_download, login
11
+
12
  from transformers import CLIPVisionModelWithProjection, CLIPImageProcessor
13
+ from kolors.pipelines.pipeline_stable_diffusion_xl_chatglm_256_ipadapter import StableDiffusionXLPipeline
14
  from kolors.models.modeling_chatglm import ChatGLMModel
15
  from kolors.models.tokenization_chatglm import ChatGLMTokenizer
 
16
  from kolors.models.unet_2d_condition import UNet2DConditionModel
17
+ from diffusers import AutoencoderKL, EulerDiscreteScheduler
 
 
18
 
 
 
 
 
 
 
19
 
20
+ # ============= Runtime & Auth =============
21
+ HF_TOKEN = os.getenv("HF_TOKEN")
22
  if HF_TOKEN:
23
  login(token=HF_TOKEN)
24
  print("Successfully logged in to Hugging Face Hub")
25
 
26
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
27
+ DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32
28
+ print(f"Device: {DEVICE}, DType: {DTYPE}")
29
+
30
+ # ============= Weights =============
31
+ # ์›๋ณธ ์ฝ”๋“œ ๊ตฌ์กฐ๋ฅผ ๋”ฐ๋ฅด๋˜, snapshot_download ๊ฒฝ๋กœ๋ฅผ ๊ทธ๋Œ€๋กœ ํ™œ์šฉํ•ฉ๋‹ˆ๋‹ค.
32
  print("Downloading models...")
33
  ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors", token=HF_TOKEN)
34
+ ckpt_dir_ip = snapshot_download(repo_id="Kwai-Kolors/Kolors-IP-Adapter-Plus", token=HF_TOKEN)
35
+
36
+ # ============= Load Models (IP-Adapter, not FaceID) =============
37
+ # CPU์—์„œ๋Š” fp16์ด NaN์„ ์œ ๋ฐœํ•  ์ˆ˜ ์žˆ์œผ๋ฏ€๋กœ DTYPE๋กœ ํ†ต์ผ
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  text_encoder = ChatGLMModel.from_pretrained(
39
  f"{ckpt_dir}/text_encoder",
40
  torch_dtype=DTYPE,
 
46
  )
47
  vae = AutoencoderKL.from_pretrained(
48
  f"{ckpt_dir}/vae",
49
+ revision=None,
50
  torch_dtype=DTYPE
51
  )
52
  scheduler = EulerDiscreteScheduler.from_pretrained(f"{ckpt_dir}/scheduler")
53
  unet = UNet2DConditionModel.from_pretrained(
54
  f"{ckpt_dir}/unet",
55
+ revision=None,
56
  torch_dtype=DTYPE
57
  )
58
 
59
+ # CLIP image encoder for IP-Adapter-Plus
60
+ image_encoder = CLIPVisionModelWithProjection.from_pretrained(
61
+ f"{ckpt_dir_ip}/image_encoder",
62
+ ignore_mismatched_sizes=True
63
+ ).to(dtype=DTYPE, device=DEVICE)
 
 
 
 
64
 
65
+ ip_img_size = 336
66
+ clip_image_processor = CLIPImageProcessor(size=ip_img_size, crop_size=ip_img_size)
67
+
68
+ # StableDiffusionXL pipeline with IP-Adapter (image reference)
69
  pipe = StableDiffusionXLPipeline(
70
  vae=vae,
71
  text_encoder=text_encoder,
72
  tokenizer=tokenizer,
73
  unet=unet,
74
  scheduler=scheduler,
75
+ image_encoder=image_encoder,
76
+ feature_extractor=clip_image_processor,
77
+ force_zeros_for_empty_prompt=False
78
  )
79
 
80
+ # Move core modules to device/dtype
81
+ pipe.vae = pipe.vae.to(DEVICE, dtype=DTYPE)
82
+ pipe.text_encoder = pipe.text_encoder.to(DEVICE, dtype=DTYPE)
83
+ pipe.unet = pipe.unet.to(DEVICE, dtype=DTYPE)
84
+
85
+ # kolors unet ํ˜ธํ™˜ ์ฒ˜๋ฆฌ
86
+ if hasattr(pipe.unet, "encoder_hid_proj"):
87
+ pipe.unet.text_encoder_hid_proj = pipe.unet.encoder_hid_proj
88
+
89
+ # Load IP-Adapter weights (general)
90
+ pipe.load_ip_adapter(
91
+ f"{ckpt_dir_ip}",
92
+ subfolder="",
93
+ weight_name=["ip_adapter_plus_general.bin"]
94
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  MAX_SEED = np.iinfo(np.int32).max
97
+ MAX_IMAGE_SIZE = 1024
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
 
 
 
 
99
 
100
+ def _to_multiple_of_8(x: int) -> int:
101
+ return int(x // 8 * 8)
 
 
 
102
 
 
 
 
103
 
104
+ def _ensure_even(x: int) -> int:
105
+ return x if x % 2 == 0 else x - 1
 
 
 
 
106
 
 
 
 
107
 
108
+ def _prepare_dims(width: int, height: int) -> tuple[int, int]:
109
+ # SDXL ๊ถŒ์žฅ: 8์˜ ๋ฐฐ์ˆ˜ ํ•ด์ƒ๋„
110
+ w = _to_multiple_of_8(width)
111
+ h = _to_multiple_of_8(height)
112
+ # H.264 ๋“ฑ๊ณผ์˜ ํ˜ธํ™˜์„ฑ์„ ๊ณ ๋ คํ•ด ์ง์ˆ˜ ์œ ์ง€(์„ ํƒ)
113
+ w = _ensure_even(w)
114
+ h = _ensure_even(h)
115
+ return max(256, min(MAX_IMAGE_SIZE, w)), max(256, min(MAX_IMAGE_SIZE, h))
116
+
117
+
118
+ def _move_to_device():
119
+ # ํ˜ธ์ถœ ์‹œ์ ์— ์•ˆ์ „ํ•˜๊ฒŒ ๋ณด์žฅ
120
+ global pipe, image_encoder
121
+ pipe.vae = pipe.vae.to(DEVICE, dtype=DTYPE)
122
+ pipe.text_encoder = pipe.text_encoder.to(DEVICE, dtype=DTYPE)
123
+ pipe.unet = pipe.unet.to(DEVICE, dtype=DTYPE)
124
+ image_encoder = image_encoder.to(device=DEVICE, dtype=DTYPE)
125
+ pipe.image_encoder = image_encoder
126
+
127
+
128
+ def _generate(
129
+ prompt: str,
130
+ ip_adapter_image: Image.Image,
131
+ ip_adapter_scale: float,
132
+ negative_prompt: str,
133
+ seed: int,
134
+ width: int,
135
+ height: int,
136
+ guidance_scale: float,
137
+ num_inference_steps: int,
138
+ ):
139
+ _move_to_device()
140
+ pipe.set_ip_adapter_scale([ip_adapter_scale])
141
+
142
+ # ํ•ด์ƒ๋„ ์ •๊ทœํ™”
143
+ width, height = _prepare_dims(width, height)
144
+
145
+ generator = torch.Generator(device=DEVICE).manual_seed(seed)
146
 
 
147
  with torch.no_grad():
148
  if DEVICE == "cuda":
149
  with torch.autocast(device_type="cuda", dtype=torch.float16):
150
  images = pipe(
151
  prompt=prompt,
152
+ ip_adapter_image=[ip_adapter_image],
153
  negative_prompt=negative_prompt,
154
+ height=height,
155
+ width=width,
156
  num_inference_steps=int(num_inference_steps),
157
  guidance_scale=float(guidance_scale),
158
  num_images_per_prompt=1,
159
  generator=generator,
 
 
160
  ).images
161
  else:
162
  images = pipe(
163
  prompt=prompt,
164
+ ip_adapter_image=[ip_adapter_image],
165
  negative_prompt=negative_prompt,
166
+ height=height,
167
+ width=width,
168
  num_inference_steps=int(num_inference_steps),
169
  guidance_scale=float(guidance_scale),
170
  num_images_per_prompt=1,
171
  generator=generator,
 
 
172
  ).images
173
 
174
+ return images[0]
175
 
 
 
 
 
 
 
 
 
 
 
176
 
177
+ # Spaces GPU ์Šค์ผ€์ค„๋Ÿฌ๋Š” CUDA๊ฐ€ ์žˆ์„ ๋•Œ๋งŒ ๊ฐ์Œ‰๋‹ˆ๋‹ค.
178
+ def _infer_core(
179
+ prompt,
180
+ ip_adapter_image,
181
+ ip_adapter_scale=0.5,
182
+ negative_prompt="",
183
+ seed=100,
184
+ randomize_seed=False,
185
+ width=1024,
186
+ height=1024,
187
+ guidance_scale=5.0,
188
+ num_inference_steps=50,
189
+ progress=gr.Progress(track_tqdm=True),
190
+ ):
191
+ if ip_adapter_image is None:
192
+ gr.Warning("Please upload an IP-Adapter reference image.")
193
+ return None, 0
194
+
195
+ if randomize_seed:
196
+ seed = random.randint(0, MAX_SEED)
197
+
198
+ image = _generate(
199
+ prompt=prompt or "",
200
+ ip_adapter_image=ip_adapter_image,
201
+ ip_adapter_scale=float(ip_adapter_scale),
202
+ negative_prompt=negative_prompt or "",
203
+ seed=int(seed),
204
+ width=int(width),
205
+ height=int(height),
206
+ guidance_scale=float(guidance_scale),
207
+ num_inference_steps=int(num_inference_steps),
208
+ )
209
+ return image, seed
210
+
211
 
 
212
  if torch.cuda.is_available():
213
+ infer = spaces.GPU(duration=80)(_infer_core)
214
+ else:
215
+ infer = _infer_core
216
+
217
+
218
+ examples = [
219
+ ["A dog", "minta.jpeg", 0.4],
220
+ ["A capybara", "king-min.png", 0.5],
221
+ ["A cat", "blue_hair.png", 0.5],
222
+ ["", "meow.jpeg", 1.0],
223
+ ]
224
 
 
 
 
225
  css = """
226
+ #col-container {
227
+ margin: 0 auto;
228
+ max-width: 720px;
229
+ }
230
+ #result img{
231
+ object-position: top;
232
+ }
233
+ #result .image-container{
234
+ height: 100%
235
+ }
236
  """
237
 
238
+ with gr.Blocks(css=css) as demo:
239
+ with gr.Column(elem_id="col-container"):
240
+ gr.Markdown("# Kolors IP-Adapter - image reference and variations")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
 
242
+ with gr.Row():
243
+ prompt = gr.Text(
 
244
  label="Prompt",
245
+ show_label=False,
246
+ max_lines=1,
247
+ placeholder="Enter your prompt",
248
+ container=False,
249
  )
250
+ run_button = gr.Button("Run", scale=0)
251
+
252
+ with gr.Row():
253
+ with gr.Column():
254
+ ip_adapter_image = gr.Image(label="IP-Adapter Image", type="pil")
255
+ ip_adapter_scale = gr.Slider(
256
+ label="Image influence scale",
257
+ info="Use 1 for creating variations",
258
+ minimum=0.0,
259
+ maximum=1.0,
260
+ step=0.05,
261
+ value=0.5,
262
  )
263
+ result = gr.Image(label="Result", elem_id="result")
 
 
 
264
 
265
+ with gr.Accordion("Advanced Settings", open=False):
266
+ negative_prompt = gr.Text(
267
+ label="Negative prompt",
268
+ max_lines=1,
269
+ placeholder="Enter a negative prompt",
270
+ )
271
+ seed = gr.Slider(
272
+ label="Seed",
273
+ minimum=0,
274
+ maximum=MAX_SEED,
275
+ step=1,
276
+ value=0,
277
+ )
278
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
279
+ with gr.Row():
280
+ width = gr.Slider(
281
+ label="Width",
282
+ minimum=256,
283
+ maximum=MAX_IMAGE_SIZE,
284
+ step=32,
285
+ value=1024,
286
+ )
287
+ height = gr.Slider(
288
+ label="Height",
289
+ minimum=256,
290
+ maximum=MAX_IMAGE_SIZE,
291
+ step=32,
292
+ value=1024,
293
+ )
294
+ with gr.Row():
295
+ guidance_scale = gr.Slider(
296
+ label="Guidance scale",
297
+ minimum=0.0,
298
+ maximum=10.0,
299
+ step=0.1,
300
+ value=5.0,
301
+ )
302
+ num_inference_steps = gr.Slider(
303
+ label="Number of inference steps",
304
+ minimum=1,
305
+ maximum=100,
306
+ step=1,
307
+ value=25,
308
+ )
309
 
310
+ # ํŒŒ์ผ ์˜ˆ์‹œ๊ฐ€ ๋กœ์ปฌ์— ์—†์„ ์ˆ˜ ์žˆ์–ด cache_examples="lazy" ์œ ์ง€
311
+ gr.Examples(
312
+ examples=examples,
313
+ fn=infer,
314
+ inputs=[prompt, ip_adapter_image, ip_adapter_scale],
315
+ outputs=[result, seed],
316
+ cache_examples="lazy",
317
+ )
318
 
319
+ gr.on(
320
+ triggers=[run_button.click, prompt.submit],
321
  fn=infer,
322
+ inputs=[
323
+ prompt,
324
+ ip_adapter_image,
325
+ ip_adapter_scale,
326
+ negative_prompt,
327
+ seed,
328
+ randomize_seed,
329
+ width,
330
+ height,
331
+ guidance_scale,
332
+ num_inference_steps,
333
+ ],
334
+ outputs=[result, seed],
335
  )
336
 
337
+ demo.queue().launch()