i0switch commited on
Commit
d58abe4
·
verified ·
1 Parent(s): 11db7d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -90
app.py CHANGED
@@ -1,43 +1,31 @@
1
- """InstantID × Beautiful Realistic Asians v7 (ZeroGPUfriendly, persistent cache)
2
-
3
- ポイント
4
- ---------
5
- * **import spaces を最初に**して ZeroGPU パッチを確実に適用。
6
- * グローバル領域では CPU でモデルをロードし、CUDA への移動は
7
- `@spaces.GPU` 関数内で一度だけ実行。
8
- * `.to("cuda")` や `torch.cuda.*` を関数外に置かないことで
9
- `RuntimeError: No CUDA GPUs are available` を回避。
10
  """
11
-
12
- # ---------------------------------------------------------------------------
13
- # 0. 依存ライブラリの読み込み (ZeroGPU パッチ → PyTorch の順)
14
- # ---------------------------------------------------------------------------
15
- import spaces # ⭐ ZeroGPU は torch より前に必須
16
-
17
- # --- ★ Monkey‑Patch: torchvision 0.17+ で消えた functional_tensor を補完 ---
18
  import types, sys
19
  from torchvision.transforms import functional as F
20
 
21
  mod = types.ModuleType("torchvision.transforms.functional_tensor")
 
22
  mod.rgb_to_grayscale = F.rgb_to_grayscale
23
  sys.modules["torchvision.transforms.functional_tensor"] = mod
24
  # ---------------------------------------------------------------------------
25
 
26
- import os, subprocess, cv2, torch, gradio as gr, numpy as np
27
  from pathlib import Path
28
  from PIL import Image
29
  from diffusers import (
30
- StableDiffusionPipeline,
31
- ControlNetModel,
32
- DPMSolverMultistepScheduler,
33
- AutoencoderKL,
34
  )
35
  from compel import Compel
36
  from insightface.app import FaceAnalysis
37
 
38
- # ---------------------------------------------------------------------------
39
- # 1. キャッシュ用ディレクトリ
40
- # ---------------------------------------------------------------------------
41
  PERSIST_BASE = Path("/data")
42
  CACHE_ROOT = (
43
  PERSIST_BASE / "instantid_cache"
@@ -53,7 +41,6 @@ UPSCALE_DIR = CACHE_ROOT / "realesrgan"
53
  for p in (MODELS_DIR, LORA_DIR, EMB_DIR, UPSCALE_DIR):
54
  p.mkdir(parents=True, exist_ok=True)
55
 
56
-
57
  def dl(url: str, dst: Path, attempts: int = 2):
58
  """wget + リトライの簡易ダウンローダ"""
59
  if dst.exists():
@@ -64,26 +51,26 @@ def dl(url: str, dst: Path, attempts: int = 2):
64
  return
65
  raise RuntimeError(f"download failed → {url}")
66
 
67
- # ---------------------------------------------------------------------------
68
- # 2. 必要アセットのダウンロード
69
- # ---------------------------------------------------------------------------
70
  print("— asset check —")
71
 
72
- # 2‑A. ベース checkpoint
73
  BASE_CKPT = MODELS_DIR / "beautiful_realistic_asians_v7_fp16.safetensors"
74
  dl(
75
  "https://civitai.com/api/download/models/177164?type=Model&format=SafeTensor&size=pruned&fp=fp16",
76
  BASE_CKPT,
77
  )
78
 
79
- # 2‑B. FaceID LoRA(Δのみ)
80
  LORA_FILE = LORA_DIR / "ip-adapter-faceid-plusv2_sd15_lora.safetensors"
81
  dl(
82
  "https://huggingface.co/h94/IP-Adapter-FaceID/resolve/main/ip-adapter-faceid-plusv2_sd15_lora.safetensors",
83
  LORA_FILE,
84
  )
85
 
86
- # 2‑C. textual inversion Embeddings
87
  EMB_URLS = {
88
  "ng_deepnegative_v1_75t.pt": [
89
  "https://huggingface.co/datasets/gsdf/EasyNegative/resolve/main/ng_deepnegative_v1_75t.pt",
@@ -111,7 +98,7 @@ for fname, urls in EMB_URLS.items():
111
  if idx == len(urls): raise
112
  print(" ↳ fallback URL …")
113
 
114
- # 2‑D. RealESRGAN weights (×8)
115
  RRG_WEIGHTS = UPSCALE_DIR / "RealESRGAN_x8plus.pth"
116
  RRG_URLS = [
117
  "https://huggingface.co/NoCrypt/Superscale_RealESRGAN/resolve/main/RealESRGAN_x8plus.pth",
@@ -125,71 +112,85 @@ for idx, link in enumerate(RRG_URLS, 1):
125
  if idx == len(RRG_URLS): raise
126
  print(" ↳ fallback URL …")
127
 
128
- # ---------------------------------------------------------------------------
129
- # 3. モデル読み込み (すべて CPU)
130
- # ---------------------------------------------------------------------------
131
-
132
- device: str = "cpu" # グローバルは CPU 固定
133
- dtype = torch.float32 # 後で GPU 化する際に float16 に
134
-
135
- # FaceAnalysis (insightface)
136
- providers = ["CPUExecutionProvider"]
 
 
 
137
  face_app = FaceAnalysis(name="buffalo_l", root=str(CACHE_ROOT), providers=providers)
138
- face_app.prepare(ctx_id=-1, det_size=(640, 640))
139
 
140
- # Stable Diffusion Pipeline (CPU)
 
 
 
141
  pipe = StableDiffusionPipeline.from_single_file(
142
  BASE_CKPT, torch_dtype=dtype, safety_checker=None, use_safetensors=True, clip_skip=2
143
  )
144
  pipe.vae = AutoencoderKL.from_pretrained(
145
  "stabilityai/sd-vae-ft-mse", torch_dtype=dtype
146
- )
 
147
  pipe.scheduler = DPMSolverMultistepScheduler.from_config(
148
  pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++"
149
  )
 
 
150
  pipe.load_ip_adapter(
151
- "h94/IP-Adapter",
152
- subfolder="models",
153
  weight_name="ip-adapter-plus-face_sd15.bin",
154
  )
 
 
 
155
  pipe.load_lora_weights(str(LORA_DIR), weight_name=LORA_FILE.name)
156
  pipe.set_ip_adapter_scale(0.65)
157
 
158
- # textual inversion
159
  for emb in EMB_DIR.glob("*.*"):
160
  try:
161
  pipe.load_textual_inversion(emb, token=emb.stem)
162
  print("emb loaded →", emb.stem)
163
  except Exception:
164
  print("emb skip →", emb.name)
 
165
 
166
- # Real‑ESRGAN (CPU)
 
 
 
 
 
 
 
 
 
 
167
  try:
168
  from basicsr.archs.rrdb_arch import RRDBNet
169
  try:
170
  from realesrgan import RealESRGAN
171
  except ImportError:
172
  from realesrgan import RealESRGANer as RealESRGAN
173
-
174
  rrdb = RRDBNet(3, 3, 64, 23, 32, scale=8)
175
- upsampler = RealESRGAN("cpu", rrdb, scale=8)
176
  upsampler.load_weights(str(RRG_WEIGHTS))
177
  UPSCALE_OK = True
178
  except Exception as e:
179
  print("Real-ESRGAN disabled →", e)
180
  UPSCALE_OK = False
181
 
182
- # compel
183
- compel_proc = Compel(
184
- tokenizer=pipe.tokenizer,
185
- text_encoder=pipe.text_encoder,
186
- truncate_long_prompts=False,
187
- )
188
- print("pipeline ready (CPU) ✔")
189
-
190
- # ---------------------------------------------------------------------------
191
- # 4. プロンプト定義
192
- # ---------------------------------------------------------------------------
193
  BASE_PROMPT = (
194
  "Cinematic photo, (best quality:1.1), ultra-realistic, photorealistic of {subject}, "
195
  "natural skin texture, bokeh, standing, front view, full body shot, thighs, "
@@ -204,35 +205,11 @@ NEG_PROMPT = (
204
  "missing arms, missing legs, skin blemishes, acne, age spot"
205
  )
206
 
207
- # ---------------------------------------------------------------------------
208
- # 5. 生成関数 (GPU 処理部)
209
- # ---------------------------------------------------------------------------
210
- GPU_INITIALISED = False # 一度だけ GPU へ移動するためのフラグ
211
-
212
  @spaces.GPU(duration=60)
213
  def generate(
214
  face_np, subject, add_prompt, add_neg, cfg, ip_scale, steps, w, h, upscale, up_factor,
215
  progress=gr.Progress(track_tqdm=True),
216
  ):
217
- global GPU_INITIALISED, device, dtype, pipe, face_app, upsampler
218
-
219
- if not GPU_INITIALISED:
220
- print("\n--- first GPU initialisation ---")
221
- device = "cuda"
222
- dtype = torch.float16
223
-
224
- pipe.to(device)
225
- pipe.vae.to(device)
226
- face_app.prepare(ctx_id=0, det_size=(640, 640))
227
- if UPSCALE_OK:
228
- try:
229
- upsampler.model = upsampler.model.to(device) # RealESRGANer
230
- upsampler.device = device # for newer API
231
- except Exception:
232
- pass
233
- GPU_INITIALISED = True
234
- print("GPU ready ✔")
235
-
236
  if face_np is None or face_np.size == 0:
237
  raise gr.Error("顔画像をアップロードしてください。")
238
 
@@ -244,7 +221,7 @@ def generate(
244
  pipe.set_ip_adapter_scale(ip_scale)
245
  img_in = Image.fromarray(face_np)
246
 
247
- # compel で長さを揃えバッチ化
248
  prompt_embeds, negative_prompt_embeds = compel_proc([prompt, neg])
249
  prompt_embeds = prompt_embeds.unsqueeze(0)
250
  negative_prompt_embeds = negative_prompt_embeds.unsqueeze(0)
@@ -253,6 +230,8 @@ def generate(
253
  prompt_embeds=prompt_embeds,
254
  negative_prompt_embeds=negative_prompt_embeds,
255
  ip_adapter_image=img_in,
 
 
256
  num_inference_steps=int(steps) + 5,
257
  guidance_scale=cfg,
258
  width=int(w),
@@ -272,11 +251,11 @@ def generate(
272
  )
273
  return result
274
 
275
- # ---------------------------------------------------------------------------
276
- # 6. Gradio UI
277
- # ---------------------------------------------------------------------------
278
  with gr.Blocks() as demo:
279
- gr.Markdown("# InstantID – Beautiful Realistic Asians v7 (ZeroGPU edition)")
280
  with gr.Row():
281
  with gr.Column():
282
  face_in = gr.Image(label="顔写真", type="numpy")
@@ -302,3 +281,4 @@ with gr.Blocks() as demo:
302
  )
303
 
304
  print("launching …")
 
 
1
+ # app.py — InstantID × Beautiful Realistic Asians v7 (ZeroGPU-friendly, persistent cache)
2
+ """Persistent-cache backend for InstantID portrait generation.
3
+ * 依存モデルは /data が書込可ならそこへ、それ以外は ~/.cache に保存
4
+ * wget を使った簡易リトライ DL
 
 
 
 
 
5
  """
6
+ # --- ★ Monkey-Patch: torchvision 0.17+ で消えた functional_tensor を補完 ---
 
 
 
 
 
 
7
  import types, sys
8
  from torchvision.transforms import functional as F
9
 
10
  mod = types.ModuleType("torchvision.transforms.functional_tensor")
11
+ # 必要なのは rgb_to_grayscale だけなのでこれだけエイリアス
12
  mod.rgb_to_grayscale = F.rgb_to_grayscale
13
  sys.modules["torchvision.transforms.functional_tensor"] = mod
14
  # ---------------------------------------------------------------------------
15
 
16
+ import os, subprocess, cv2, torch, spaces, gradio as gr, numpy as np
17
  from pathlib import Path
18
  from PIL import Image
19
  from diffusers import (
20
+ StableDiffusionPipeline, ControlNetModel,
21
+ DPMSolverMultistepScheduler, AutoencoderKL,
 
 
22
  )
23
  from compel import Compel
24
  from insightface.app import FaceAnalysis
25
 
26
+ ##############################################################################
27
+ # 0. キャッシュ用ディレクトリ
28
+ ##############################################################################
29
  PERSIST_BASE = Path("/data")
30
  CACHE_ROOT = (
31
  PERSIST_BASE / "instantid_cache"
 
41
  for p in (MODELS_DIR, LORA_DIR, EMB_DIR, UPSCALE_DIR):
42
  p.mkdir(parents=True, exist_ok=True)
43
 
 
44
  def dl(url: str, dst: Path, attempts: int = 2):
45
  """wget + リトライの簡易ダウンローダ"""
46
  if dst.exists():
 
51
  return
52
  raise RuntimeError(f"download failed → {url}")
53
 
54
+ ##############################################################################
55
+ # 1. 必要アセットのダウンロード
56
+ ##############################################################################
57
  print("— asset check —")
58
 
59
+ # 1-A. ベース checkpoint
60
  BASE_CKPT = MODELS_DIR / "beautiful_realistic_asians_v7_fp16.safetensors"
61
  dl(
62
  "https://civitai.com/api/download/models/177164?type=Model&format=SafeTensor&size=pruned&fp=fp16",
63
  BASE_CKPT,
64
  )
65
 
66
+ # 1-B. FaceID LoRA(Δのみ)
67
  LORA_FILE = LORA_DIR / "ip-adapter-faceid-plusv2_sd15_lora.safetensors"
68
  dl(
69
  "https://huggingface.co/h94/IP-Adapter-FaceID/resolve/main/ip-adapter-faceid-plusv2_sd15_lora.safetensors",
70
  LORA_FILE,
71
  )
72
 
73
+ # 1-C. textual inversion Embeddings
74
  EMB_URLS = {
75
  "ng_deepnegative_v1_75t.pt": [
76
  "https://huggingface.co/datasets/gsdf/EasyNegative/resolve/main/ng_deepnegative_v1_75t.pt",
 
98
  if idx == len(urls): raise
99
  print(" ↳ fallback URL …")
100
 
101
+ # 1-D. Real-ESRGAN weights (×8)
102
  RRG_WEIGHTS = UPSCALE_DIR / "RealESRGAN_x8plus.pth"
103
  RRG_URLS = [
104
  "https://huggingface.co/NoCrypt/Superscale_RealESRGAN/resolve/main/RealESRGAN_x8plus.pth",
 
112
  if idx == len(RRG_URLS): raise
113
  print(" ↳ fallback URL …")
114
 
115
+ ##############################################################################
116
+ # 2. ランタイム初期化
117
+ ##############################################################################
118
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
119
+ dtype = torch.float16 if torch.cuda.is_available() else torch.float32
120
+ print("device:", device, "| dtype:", dtype)
121
+
122
+ providers = (
123
+ ["CUDAExecutionProvider", "CPUExecutionProvider"]
124
+ if torch.cuda.is_available()
125
+ else ["CPUExecutionProvider"]
126
+ )
127
  face_app = FaceAnalysis(name="buffalo_l", root=str(CACHE_ROOT), providers=providers)
128
+ face_app.prepare(ctx_id=(0 if torch.cuda.is_available() else -1), det_size=(640, 640))
129
 
130
+ # ControlNet + SD パイプライン
131
+ #controlnet = ControlNetModel.from_pretrained(
132
+ # "InstantX/InstantID", subfolder="ControlNetModel", torch_dtype=dtype
133
+ #)
134
  pipe = StableDiffusionPipeline.from_single_file(
135
  BASE_CKPT, torch_dtype=dtype, safety_checker=None, use_safetensors=True, clip_skip=2
136
  )
137
  pipe.vae = AutoencoderKL.from_pretrained(
138
  "stabilityai/sd-vae-ft-mse", torch_dtype=dtype
139
+ ).to(device)
140
+ #pipe.controlnet = controlnet
141
  pipe.scheduler = DPMSolverMultistepScheduler.from_config(
142
  pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++"
143
  )
144
+
145
+ # --- ここが核心:画像エンコーダ込みで公式レポから直接ロード ------------------
146
  pipe.load_ip_adapter(
147
+ "h94/IP-Adapter", # Hugging Face Hub ID
148
+ subfolder="models", # ip-adapter-plus-face_sd15.bin が入っているフォルダ
149
  weight_name="ip-adapter-plus-face_sd15.bin",
150
  )
151
+ # ---------------------------------------------------------------------------
152
+
153
+ # FaceID LoRA(差分 LoRA のみ)
154
  pipe.load_lora_weights(str(LORA_DIR), weight_name=LORA_FILE.name)
155
  pipe.set_ip_adapter_scale(0.65)
156
 
157
+ # textual inversion 読み込み
158
  for emb in EMB_DIR.glob("*.*"):
159
  try:
160
  pipe.load_textual_inversion(emb, token=emb.stem)
161
  print("emb loaded →", emb.stem)
162
  except Exception:
163
  print("emb skip →", emb.name)
164
+ pipe.to(device)
165
 
166
+ # compel プロセッサを初期化
167
+ compel_proc = Compel(
168
+ tokenizer=pipe.tokenizer,
169
+ text_encoder=pipe.text_encoder,
170
+ truncate_long_prompts=False # 長いプロンプトを切り捨てない
171
+ )
172
+ print("pipeline ready ✔")
173
+
174
+ ##############################################################################
175
+ # 3. アップスケーラ
176
+ ##############################################################################
177
  try:
178
  from basicsr.archs.rrdb_arch import RRDBNet
179
  try:
180
  from realesrgan import RealESRGAN
181
  except ImportError:
182
  from realesrgan import RealESRGANer as RealESRGAN
 
183
  rrdb = RRDBNet(3, 3, 64, 23, 32, scale=8)
184
+ upsampler = RealESRGAN(device, rrdb, scale=8)
185
  upsampler.load_weights(str(RRG_WEIGHTS))
186
  UPSCALE_OK = True
187
  except Exception as e:
188
  print("Real-ESRGAN disabled →", e)
189
  UPSCALE_OK = False
190
 
191
+ ##############################################################################
192
+ # 4. プロンプト & 生成関数
193
+ ##############################################################################
 
 
 
 
 
 
 
 
194
  BASE_PROMPT = (
195
  "Cinematic photo, (best quality:1.1), ultra-realistic, photorealistic of {subject}, "
196
  "natural skin texture, bokeh, standing, front view, full body shot, thighs, "
 
205
  "missing arms, missing legs, skin blemishes, acne, age spot"
206
  )
207
 
 
 
 
 
 
208
  @spaces.GPU(duration=60)
209
  def generate(
210
  face_np, subject, add_prompt, add_neg, cfg, ip_scale, steps, w, h, upscale, up_factor,
211
  progress=gr.Progress(track_tqdm=True),
212
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
  if face_np is None or face_np.size == 0:
214
  raise gr.Error("顔画像をアップロードしてください。")
215
 
 
221
  pipe.set_ip_adapter_scale(ip_scale)
222
  img_in = Image.fromarray(face_np)
223
 
224
+ # compelで長さを揃え、.unsqueeze(0)でバッチ次元を追加する
225
  prompt_embeds, negative_prompt_embeds = compel_proc([prompt, neg])
226
  prompt_embeds = prompt_embeds.unsqueeze(0)
227
  negative_prompt_embeds = negative_prompt_embeds.unsqueeze(0)
 
230
  prompt_embeds=prompt_embeds,
231
  negative_prompt_embeds=negative_prompt_embeds,
232
  ip_adapter_image=img_in,
233
+ #image=img_in,
234
+ #controlnet_conditioning_scale=0.9,
235
  num_inference_steps=int(steps) + 5,
236
  guidance_scale=cfg,
237
  width=int(w),
 
251
  )
252
  return result
253
 
254
+ ##############################################################################
255
+ # 5. Gradio UI
256
+ ##############################################################################
257
  with gr.Blocks() as demo:
258
+ gr.Markdown("# InstantID – Beautiful Realistic Asians v7")
259
  with gr.Row():
260
  with gr.Column():
261
  face_in = gr.Image(label="顔写真", type="numpy")
 
281
  )
282
 
283
  print("launching …")
284
+ demo.queue().launch(show_error=True)