i0switch commited on
Commit
f47143a
·
verified ·
1 Parent(s): 4330019

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -70
app.py CHANGED
@@ -1,31 +1,43 @@
1
- # app.py — InstantID × Beautiful Realistic Asians v7 (ZeroGPU-friendly, persistent cache)
2
- """Persistent-cache backend for InstantID portrait generation.
3
- * 依存モデルは /data が書込可ならそこへ、それ以外は ~/.cache に保存
4
- * wget を使った簡易リトライ DL
 
 
 
 
 
5
  """
6
- # --- ★ Monkey-Patch: torchvision 0.17+ で消えた functional_tensor を補完 ---
 
 
 
 
 
 
7
  import types, sys
8
  from torchvision.transforms import functional as F
9
 
10
  mod = types.ModuleType("torchvision.transforms.functional_tensor")
11
- # 必要なのは rgb_to_grayscale だけなのでこれだけエイリアス
12
  mod.rgb_to_grayscale = F.rgb_to_grayscale
13
  sys.modules["torchvision.transforms.functional_tensor"] = mod
14
  # ---------------------------------------------------------------------------
15
 
16
- import os, subprocess, cv2, torch, spaces, gradio as gr, numpy as np
17
  from pathlib import Path
18
  from PIL import Image
19
  from diffusers import (
20
- StableDiffusionPipeline, ControlNetModel,
21
- DPMSolverMultistepScheduler, AutoencoderKL,
 
 
22
  )
23
  from compel import Compel
24
  from insightface.app import FaceAnalysis
25
 
26
- ##############################################################################
27
- # 0. キャッシュ用ディレクトリ
28
- ##############################################################################
29
  PERSIST_BASE = Path("/data")
30
  CACHE_ROOT = (
31
  PERSIST_BASE / "instantid_cache"
@@ -41,6 +53,7 @@ UPSCALE_DIR = CACHE_ROOT / "realesrgan"
41
  for p in (MODELS_DIR, LORA_DIR, EMB_DIR, UPSCALE_DIR):
42
  p.mkdir(parents=True, exist_ok=True)
43
 
 
44
  def dl(url: str, dst: Path, attempts: int = 2):
45
  """wget + リトライの簡易ダウンローダ"""
46
  if dst.exists():
@@ -51,26 +64,26 @@ def dl(url: str, dst: Path, attempts: int = 2):
51
  return
52
  raise RuntimeError(f"download failed → {url}")
53
 
54
- ##############################################################################
55
- # 1. 必要アセットのダウンロード
56
- ##############################################################################
57
  print("— asset check —")
58
 
59
- # 1-A. ベース checkpoint
60
  BASE_CKPT = MODELS_DIR / "beautiful_realistic_asians_v7_fp16.safetensors"
61
  dl(
62
  "https://civitai.com/api/download/models/177164?type=Model&format=SafeTensor&size=pruned&fp=fp16",
63
  BASE_CKPT,
64
  )
65
 
66
- # 1-B. FaceID LoRA(Δのみ)
67
  LORA_FILE = LORA_DIR / "ip-adapter-faceid-plusv2_sd15_lora.safetensors"
68
  dl(
69
  "https://huggingface.co/h94/IP-Adapter-FaceID/resolve/main/ip-adapter-faceid-plusv2_sd15_lora.safetensors",
70
  LORA_FILE,
71
  )
72
 
73
- # 1-C. textual inversion Embeddings
74
  EMB_URLS = {
75
  "ng_deepnegative_v1_75t.pt": [
76
  "https://huggingface.co/datasets/gsdf/EasyNegative/resolve/main/ng_deepnegative_v1_75t.pt",
@@ -98,7 +111,7 @@ for fname, urls in EMB_URLS.items():
98
  if idx == len(urls): raise
99
  print(" ↳ fallback URL …")
100
 
101
- # 1-D. Real-ESRGAN weights (×8)
102
  RRG_WEIGHTS = UPSCALE_DIR / "RealESRGAN_x8plus.pth"
103
  RRG_URLS = [
104
  "https://huggingface.co/NoCrypt/Superscale_RealESRGAN/resolve/main/RealESRGAN_x8plus.pth",
@@ -112,85 +125,71 @@ for idx, link in enumerate(RRG_URLS, 1):
112
  if idx == len(RRG_URLS): raise
113
  print(" ↳ fallback URL …")
114
 
115
- ##############################################################################
116
- # 2. ランタイム初期化
117
- ##############################################################################
118
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
119
- dtype = torch.float16 if torch.cuda.is_available() else torch.float32
120
- print("device:", device, "| dtype:", dtype)
121
-
122
- providers = (
123
- ["CUDAExecutionProvider", "CPUExecutionProvider"]
124
- if torch.cuda.is_available()
125
- else ["CPUExecutionProvider"]
126
- )
127
  face_app = FaceAnalysis(name="buffalo_l", root=str(CACHE_ROOT), providers=providers)
128
- face_app.prepare(ctx_id=(0 if torch.cuda.is_available() else -1), det_size=(640, 640))
129
 
130
- # ControlNet + SD パイプライン
131
- #controlnet = ControlNetModel.from_pretrained(
132
- # "InstantX/InstantID", subfolder="ControlNetModel", torch_dtype=dtype
133
- #)
134
  pipe = StableDiffusionPipeline.from_single_file(
135
  BASE_CKPT, torch_dtype=dtype, safety_checker=None, use_safetensors=True, clip_skip=2
136
  )
137
  pipe.vae = AutoencoderKL.from_pretrained(
138
  "stabilityai/sd-vae-ft-mse", torch_dtype=dtype
139
- ).to(device)
140
- #pipe.controlnet = controlnet
141
  pipe.scheduler = DPMSolverMultistepScheduler.from_config(
142
  pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++"
143
  )
144
-
145
- # --- ここが核心:画像エンコーダ込みで公式レポから直接ロード ------------------
146
  pipe.load_ip_adapter(
147
- "h94/IP-Adapter", # Hugging Face Hub ID
148
- subfolder="models", # ip-adapter-plus-face_sd15.bin が入っているフォルダ
149
  weight_name="ip-adapter-plus-face_sd15.bin",
150
  )
151
- # ---------------------------------------------------------------------------
152
-
153
- # FaceID LoRA(差分 LoRA のみ)
154
  pipe.load_lora_weights(str(LORA_DIR), weight_name=LORA_FILE.name)
155
  pipe.set_ip_adapter_scale(0.65)
156
 
157
- # textual inversion 読み込み
158
  for emb in EMB_DIR.glob("*.*"):
159
  try:
160
  pipe.load_textual_inversion(emb, token=emb.stem)
161
  print("emb loaded →", emb.stem)
162
  except Exception:
163
  print("emb skip →", emb.name)
164
- pipe.to(device)
165
 
166
- # compel プロセッサを初期化
167
- compel_proc = Compel(
168
- tokenizer=pipe.tokenizer,
169
- text_encoder=pipe.text_encoder,
170
- truncate_long_prompts=False # 長いプロンプトを切り捨てない
171
- )
172
- print("pipeline ready ✔")
173
-
174
- ##############################################################################
175
- # 3. アップスケーラ
176
- ##############################################################################
177
  try:
178
  from basicsr.archs.rrdb_arch import RRDBNet
179
  try:
180
  from realesrgan import RealESRGAN
181
  except ImportError:
182
  from realesrgan import RealESRGANer as RealESRGAN
 
183
  rrdb = RRDBNet(3, 3, 64, 23, 32, scale=8)
184
- upsampler = RealESRGAN(device, rrdb, scale=8)
185
  upsampler.load_weights(str(RRG_WEIGHTS))
186
  UPSCALE_OK = True
187
  except Exception as e:
188
  print("Real-ESRGAN disabled →", e)
189
  UPSCALE_OK = False
190
 
191
- ##############################################################################
192
- # 4. プロンプト & 生成関数
193
- ##############################################################################
 
 
 
 
 
 
 
 
194
  BASE_PROMPT = (
195
  "Cinematic photo, (best quality:1.1), ultra-realistic, photorealistic of {subject}, "
196
  "natural skin texture, bokeh, standing, front view, full body shot, thighs, "
@@ -205,11 +204,35 @@ NEG_PROMPT = (
205
  "missing arms, missing legs, skin blemishes, acne, age spot"
206
  )
207
 
 
 
 
 
 
208
  @spaces.GPU(duration=60)
209
  def generate(
210
  face_np, subject, add_prompt, add_neg, cfg, ip_scale, steps, w, h, upscale, up_factor,
211
  progress=gr.Progress(track_tqdm=True),
212
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
  if face_np is None or face_np.size == 0:
214
  raise gr.Error("顔画像をアップロードしてください。")
215
 
@@ -221,7 +244,7 @@ def generate(
221
  pipe.set_ip_adapter_scale(ip_scale)
222
  img_in = Image.fromarray(face_np)
223
 
224
- # compelで長さを揃え、.unsqueeze(0)でバッチ次元を追加する
225
  prompt_embeds, negative_prompt_embeds = compel_proc([prompt, neg])
226
  prompt_embeds = prompt_embeds.unsqueeze(0)
227
  negative_prompt_embeds = negative_prompt_embeds.unsqueeze(0)
@@ -230,8 +253,6 @@ def generate(
230
  prompt_embeds=prompt_embeds,
231
  negative_prompt_embeds=negative_prompt_embeds,
232
  ip_adapter_image=img_in,
233
- #image=img_in,
234
- #controlnet_conditioning_scale=0.9,
235
  num_inference_steps=int(steps) + 5,
236
  guidance_scale=cfg,
237
  width=int(w),
@@ -251,11 +272,11 @@ def generate(
251
  )
252
  return result
253
 
254
- ##############################################################################
255
- # 5. Gradio UI
256
- ##############################################################################
257
  with gr.Blocks() as demo:
258
- gr.Markdown("# InstantID – Beautiful Realistic Asians v7")
259
  with gr.Row():
260
  with gr.Column():
261
  face_in = gr.Image(label="顔写真", type="numpy")
@@ -281,4 +302,3 @@ with gr.Blocks() as demo:
281
  )
282
 
283
  print("launching …")
284
- demo.queue().launch(show_error=True)
 
1
+ """InstantID × Beautiful Realistic Asians v7 (ZeroGPUfriendly, persistent cache)
2
+
3
+ ポイント
4
+ ---------
5
+ * **import spaces を最初に**して ZeroGPU パッチを確実に適用。
6
+ * グローバル領域では CPU でモデルをロードし、CUDA への移動は
7
+ `@spaces.GPU` 関数内で一度だけ実行。
8
+ * `.to("cuda")` や `torch.cuda.*` を関数外に置かないことで
9
+ `RuntimeError: No CUDA GPUs are available` を回避。
10
  """
11
+
12
+ # ---------------------------------------------------------------------------
13
+ # 0. 依存ライブラリの読み込み (ZeroGPU パッチ → PyTorch の順)
14
+ # ---------------------------------------------------------------------------
15
+ import spaces # ⭐ ZeroGPU は torch より前に必須
16
+
17
+ # --- ★ Monkey‑Patch: torchvision 0.17+ で消えた functional_tensor を補完 ---
18
  import types, sys
19
  from torchvision.transforms import functional as F
20
 
21
  mod = types.ModuleType("torchvision.transforms.functional_tensor")
 
22
  mod.rgb_to_grayscale = F.rgb_to_grayscale
23
  sys.modules["torchvision.transforms.functional_tensor"] = mod
24
  # ---------------------------------------------------------------------------
25
 
26
+ import os, subprocess, cv2, torch, gradio as gr, numpy as np
27
  from pathlib import Path
28
  from PIL import Image
29
  from diffusers import (
30
+ StableDiffusionPipeline,
31
+ ControlNetModel,
32
+ DPMSolverMultistepScheduler,
33
+ AutoencoderKL,
34
  )
35
  from compel import Compel
36
  from insightface.app import FaceAnalysis
37
 
38
+ # ---------------------------------------------------------------------------
39
+ # 1. キャッシュ用ディレクトリ
40
+ # ---------------------------------------------------------------------------
41
  PERSIST_BASE = Path("/data")
42
  CACHE_ROOT = (
43
  PERSIST_BASE / "instantid_cache"
 
53
  for p in (MODELS_DIR, LORA_DIR, EMB_DIR, UPSCALE_DIR):
54
  p.mkdir(parents=True, exist_ok=True)
55
 
56
+
57
  def dl(url: str, dst: Path, attempts: int = 2):
58
  """wget + リトライの簡易ダウンローダ"""
59
  if dst.exists():
 
64
  return
65
  raise RuntimeError(f"download failed → {url}")
66
 
67
+ # ---------------------------------------------------------------------------
68
+ # 2. 必要アセットのダウンロード
69
+ # ---------------------------------------------------------------------------
70
  print("— asset check —")
71
 
72
+ # 2‑A. ベース checkpoint
73
  BASE_CKPT = MODELS_DIR / "beautiful_realistic_asians_v7_fp16.safetensors"
74
  dl(
75
  "https://civitai.com/api/download/models/177164?type=Model&format=SafeTensor&size=pruned&fp=fp16",
76
  BASE_CKPT,
77
  )
78
 
79
+ # 2‑B. FaceID LoRA(Δのみ)
80
  LORA_FILE = LORA_DIR / "ip-adapter-faceid-plusv2_sd15_lora.safetensors"
81
  dl(
82
  "https://huggingface.co/h94/IP-Adapter-FaceID/resolve/main/ip-adapter-faceid-plusv2_sd15_lora.safetensors",
83
  LORA_FILE,
84
  )
85
 
86
+ # 2‑C. textual inversion Embeddings
87
  EMB_URLS = {
88
  "ng_deepnegative_v1_75t.pt": [
89
  "https://huggingface.co/datasets/gsdf/EasyNegative/resolve/main/ng_deepnegative_v1_75t.pt",
 
111
  if idx == len(urls): raise
112
  print(" ↳ fallback URL …")
113
 
114
+ # 2‑D. RealESRGAN weights (×8)
115
  RRG_WEIGHTS = UPSCALE_DIR / "RealESRGAN_x8plus.pth"
116
  RRG_URLS = [
117
  "https://huggingface.co/NoCrypt/Superscale_RealESRGAN/resolve/main/RealESRGAN_x8plus.pth",
 
125
  if idx == len(RRG_URLS): raise
126
  print(" ↳ fallback URL …")
127
 
128
+ # ---------------------------------------------------------------------------
129
+ # 3. モデル読み込み (すべて CPU)
130
+ # ---------------------------------------------------------------------------
131
+
132
+ device: str = "cpu" # グローバルは CPU 固定
133
+ dtype = torch.float32 # 後で GPU 化する際に float16 に
134
+
135
+ # FaceAnalysis (insightface)
136
+ providers = ["CPUExecutionProvider"]
 
 
 
137
  face_app = FaceAnalysis(name="buffalo_l", root=str(CACHE_ROOT), providers=providers)
138
+ face_app.prepare(ctx_id=-1, det_size=(640, 640))
139
 
140
+ # Stable Diffusion Pipeline (CPU)
 
 
 
141
  pipe = StableDiffusionPipeline.from_single_file(
142
  BASE_CKPT, torch_dtype=dtype, safety_checker=None, use_safetensors=True, clip_skip=2
143
  )
144
  pipe.vae = AutoencoderKL.from_pretrained(
145
  "stabilityai/sd-vae-ft-mse", torch_dtype=dtype
146
+ )
 
147
  pipe.scheduler = DPMSolverMultistepScheduler.from_config(
148
  pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++"
149
  )
 
 
150
  pipe.load_ip_adapter(
151
+ "h94/IP-Adapter",
152
+ subfolder="models",
153
  weight_name="ip-adapter-plus-face_sd15.bin",
154
  )
 
 
 
155
  pipe.load_lora_weights(str(LORA_DIR), weight_name=LORA_FILE.name)
156
  pipe.set_ip_adapter_scale(0.65)
157
 
158
+ # textual inversion
159
  for emb in EMB_DIR.glob("*.*"):
160
  try:
161
  pipe.load_textual_inversion(emb, token=emb.stem)
162
  print("emb loaded →", emb.stem)
163
  except Exception:
164
  print("emb skip →", emb.name)
 
165
 
166
+ # Real‑ESRGAN (CPU)
 
 
 
 
 
 
 
 
 
 
167
  try:
168
  from basicsr.archs.rrdb_arch import RRDBNet
169
  try:
170
  from realesrgan import RealESRGAN
171
  except ImportError:
172
  from realesrgan import RealESRGANer as RealESRGAN
173
+
174
  rrdb = RRDBNet(3, 3, 64, 23, 32, scale=8)
175
+ upsampler = RealESRGAN("cpu", rrdb, scale=8)
176
  upsampler.load_weights(str(RRG_WEIGHTS))
177
  UPSCALE_OK = True
178
  except Exception as e:
179
  print("Real-ESRGAN disabled →", e)
180
  UPSCALE_OK = False
181
 
182
+ # compel
183
+ compel_proc = Compel(
184
+ tokenizer=pipe.tokenizer,
185
+ text_encoder=pipe.text_encoder,
186
+ truncate_long_prompts=False,
187
+ )
188
+ print("pipeline ready (CPU) ✔")
189
+
190
+ # ---------------------------------------------------------------------------
191
+ # 4. プロンプト定義
192
+ # ---------------------------------------------------------------------------
193
  BASE_PROMPT = (
194
  "Cinematic photo, (best quality:1.1), ultra-realistic, photorealistic of {subject}, "
195
  "natural skin texture, bokeh, standing, front view, full body shot, thighs, "
 
204
  "missing arms, missing legs, skin blemishes, acne, age spot"
205
  )
206
 
207
+ # ---------------------------------------------------------------------------
208
+ # 5. 生成関数 (GPU 処理部)
209
+ # ---------------------------------------------------------------------------
210
+ GPU_INITIALISED = False # 一度だけ GPU へ移動するためのフラグ
211
+
212
  @spaces.GPU(duration=60)
213
  def generate(
214
  face_np, subject, add_prompt, add_neg, cfg, ip_scale, steps, w, h, upscale, up_factor,
215
  progress=gr.Progress(track_tqdm=True),
216
  ):
217
+ global GPU_INITIALISED, device, dtype, pipe, face_app, upsampler
218
+
219
+ if not GPU_INITIALISED:
220
+ print("\n--- first GPU initialisation ---")
221
+ device = "cuda"
222
+ dtype = torch.float16
223
+
224
+ pipe.to(device)
225
+ pipe.vae.to(device)
226
+ face_app.prepare(ctx_id=0, det_size=(640, 640))
227
+ if UPSCALE_OK:
228
+ try:
229
+ upsampler.model = upsampler.model.to(device) # RealESRGANer
230
+ upsampler.device = device # for newer API
231
+ except Exception:
232
+ pass
233
+ GPU_INITIALISED = True
234
+ print("GPU ready ✔")
235
+
236
  if face_np is None or face_np.size == 0:
237
  raise gr.Error("顔画像をアップロードしてください。")
238
 
 
244
  pipe.set_ip_adapter_scale(ip_scale)
245
  img_in = Image.fromarray(face_np)
246
 
247
+ # compel で長さを揃えバッチ化
248
  prompt_embeds, negative_prompt_embeds = compel_proc([prompt, neg])
249
  prompt_embeds = prompt_embeds.unsqueeze(0)
250
  negative_prompt_embeds = negative_prompt_embeds.unsqueeze(0)
 
253
  prompt_embeds=prompt_embeds,
254
  negative_prompt_embeds=negative_prompt_embeds,
255
  ip_adapter_image=img_in,
 
 
256
  num_inference_steps=int(steps) + 5,
257
  guidance_scale=cfg,
258
  width=int(w),
 
272
  )
273
  return result
274
 
275
+ # ---------------------------------------------------------------------------
276
+ # 6. Gradio UI
277
+ # ---------------------------------------------------------------------------
278
  with gr.Blocks() as demo:
279
+ gr.Markdown("# InstantID – Beautiful Realistic Asians v7 (ZeroGPU edition)")
280
  with gr.Row():
281
  with gr.Column():
282
  face_in = gr.Image(label="顔写真", type="numpy")
 
302
  )
303
 
304
  print("launching …")