Spaces:

i0switch
/

my-image-generator

Running on Zero

App Files Files Community

i0switch commited on 20 days ago

Commit

f47143a

verified ·

1 Parent(s): 4330019

Update app.py

Browse files

Files changed (1) hide show

app.py +90 -70

app.py CHANGED Viewed

@@ -1,31 +1,43 @@
-# app.py — InstantID × Beautiful Realistic Asians v7 (ZeroGPU-friendly, persistent cache)
-"""Persistent-cache backend for InstantID portrait generation.
-   * 依存モデルは /data が書込可ならそこへ、それ以外は ~/.cache に保存
-   * wget を使った簡易リトライ DL
 """
-# --- ★ Monkey-Patch: torchvision 0.17+ で消えた functional_tensor を補完 ---
 import types, sys
 from torchvision.transforms import functional as F
 mod = types.ModuleType("torchvision.transforms.functional_tensor")
-# 必要なのは rgb_to_grayscale だけなのでこれだけエイリアス
 mod.rgb_to_grayscale = F.rgb_to_grayscale
 sys.modules["torchvision.transforms.functional_tensor"] = mod
 # ---------------------------------------------------------------------------
-import os, subprocess, cv2, torch, spaces, gradio as gr, numpy as np
 from pathlib import Path
 from PIL import Image
 from diffusers import (
-    StableDiffusionPipeline, ControlNetModel,
-    DPMSolverMultistepScheduler, AutoencoderKL,
 )
 from compel import Compel
 from insightface.app import FaceAnalysis
-##############################################################################
-# 0. キャッシュ用ディレクトリ
-##############################################################################
 PERSIST_BASE = Path("/data")
 CACHE_ROOT = (
     PERSIST_BASE / "instantid_cache"
@@ -41,6 +53,7 @@ UPSCALE_DIR = CACHE_ROOT / "realesrgan"
 for p in (MODELS_DIR, LORA_DIR, EMB_DIR, UPSCALE_DIR):
     p.mkdir(parents=True, exist_ok=True)
 def dl(url: str, dst: Path, attempts: int = 2):
     """wget + リトライの簡易ダウンローダ"""
     if dst.exists():
@@ -51,26 +64,26 @@ def dl(url: str, dst: Path, attempts: int = 2):
             return
     raise RuntimeError(f"download failed → {url}")
-##############################################################################
-# 1. 必要アセットのダウンロード
-##############################################################################
 print("— asset check —")
-# 1-A. ベース checkpoint
 BASE_CKPT = MODELS_DIR / "beautiful_realistic_asians_v7_fp16.safetensors"
 dl(
     "https://civitai.com/api/download/models/177164?type=Model&format=SafeTensor&size=pruned&fp=fp16",
     BASE_CKPT,
 )
-# 1-B. FaceID LoRA（Δのみ）
 LORA_FILE = LORA_DIR / "ip-adapter-faceid-plusv2_sd15_lora.safetensors"
 dl(
     "https://huggingface.co/h94/IP-Adapter-FaceID/resolve/main/ip-adapter-faceid-plusv2_sd15_lora.safetensors",
     LORA_FILE,
 )
-# 1-C. textual inversion Embeddings
 EMB_URLS = {
     "ng_deepnegative_v1_75t.pt": [
         "https://huggingface.co/datasets/gsdf/EasyNegative/resolve/main/ng_deepnegative_v1_75t.pt",
@@ -98,7 +111,7 @@ for fname, urls in EMB_URLS.items():
             if idx == len(urls): raise
             print("    ↳ fallback URL …")
-# 1-D. Real-ESRGAN weights (×8)
 RRG_WEIGHTS = UPSCALE_DIR / "RealESRGAN_x8plus.pth"
 RRG_URLS = [
     "https://huggingface.co/NoCrypt/Superscale_RealESRGAN/resolve/main/RealESRGAN_x8plus.pth",
@@ -112,85 +125,71 @@ for idx, link in enumerate(RRG_URLS, 1):
         if idx == len(RRG_URLS): raise
         print("    ↳ fallback URL …")
-##############################################################################
-# 2. ランタイム初期化
-##############################################################################
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-dtype  = torch.float16 if torch.cuda.is_available() else torch.float32
-print("device:", device, "| dtype:", dtype)
-providers = (
-    ["CUDAExecutionProvider", "CPUExecutionProvider"]
-    if torch.cuda.is_available()
-    else ["CPUExecutionProvider"]
-)
 face_app = FaceAnalysis(name="buffalo_l", root=str(CACHE_ROOT), providers=providers)
-face_app.prepare(ctx_id=(0 if torch.cuda.is_available() else -1), det_size=(640, 640))
-# ControlNet + SD パイプライン
-#controlnet = ControlNetModel.from_pretrained(
-#    "InstantX/InstantID", subfolder="ControlNetModel", torch_dtype=dtype
-#)
 pipe = StableDiffusionPipeline.from_single_file(
     BASE_CKPT, torch_dtype=dtype, safety_checker=None, use_safetensors=True, clip_skip=2
 )
 pipe.vae = AutoencoderKL.from_pretrained(
     "stabilityai/sd-vae-ft-mse", torch_dtype=dtype
-).to(device)
-#pipe.controlnet = controlnet
 pipe.scheduler = DPMSolverMultistepScheduler.from_config(
     pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++"
 )
-# --- ここが核心：画像エンコーダ込みで公式レポから直接ロード ------------------
 pipe.load_ip_adapter(
-    "h94/IP-Adapter",               # Hugging Face Hub ID
-    subfolder="models",             # ip-adapter-plus-face_sd15.bin が入っているフォルダ
     weight_name="ip-adapter-plus-face_sd15.bin",
 )
-# ---------------------------------------------------------------------------
-# FaceID LoRA（差分 LoRA のみ）
 pipe.load_lora_weights(str(LORA_DIR), weight_name=LORA_FILE.name)
 pipe.set_ip_adapter_scale(0.65)
-# textual inversion 読み込み
 for emb in EMB_DIR.glob("*.*"):
     try:
         pipe.load_textual_inversion(emb, token=emb.stem)
         print("emb loaded →", emb.stem)
     except Exception:
         print("emb skip →", emb.name)
-pipe.to(device)
-# compel プロセッサを初期化
-compel_proc = Compel(
-    tokenizer=pipe.tokenizer,
-    text_encoder=pipe.text_encoder,
-    truncate_long_prompts=False  # 長いプロンプトを切り捨てない
-)
-print("pipeline ready ✔")
-##############################################################################
-# 3. アップスケーラ
-##############################################################################
 try:
     from basicsr.archs.rrdb_arch import RRDBNet
     try:
         from realesrgan import RealESRGAN
     except ImportError:
         from realesrgan import RealESRGANer as RealESRGAN
     rrdb = RRDBNet(3, 3, 64, 23, 32, scale=8)
-    upsampler = RealESRGAN(device, rrdb, scale=8)
     upsampler.load_weights(str(RRG_WEIGHTS))
     UPSCALE_OK = True
 except Exception as e:
     print("Real-ESRGAN disabled →", e)
     UPSCALE_OK = False
-##############################################################################
-# 4. プロンプト & 生成関数
-##############################################################################
 BASE_PROMPT = (
     "Cinematic photo, (best quality:1.1), ultra-realistic, photorealistic of {subject}, "
     "natural skin texture, bokeh, standing, front view, full body shot, thighs, "
@@ -205,11 +204,35 @@ NEG_PROMPT = (
     "missing arms, missing legs, skin blemishes, acne, age spot"
 )
 @spaces.GPU(duration=60)
 def generate(
     face_np, subject, add_prompt, add_neg, cfg, ip_scale, steps, w, h, upscale, up_factor,
     progress=gr.Progress(track_tqdm=True),
 ):
     if face_np is None or face_np.size == 0:
         raise gr.Error("顔画像をアップロードしてください。")
@@ -221,7 +244,7 @@ def generate(
     pipe.set_ip_adapter_scale(ip_scale)
     img_in = Image.fromarray(face_np)
-    # compelで長さを揃え、.unsqueeze(0)でバッチ次元を追加する
     prompt_embeds, negative_prompt_embeds = compel_proc([prompt, neg])
     prompt_embeds = prompt_embeds.unsqueeze(0)
     negative_prompt_embeds = negative_prompt_embeds.unsqueeze(0)
@@ -230,8 +253,6 @@ def generate(
         prompt_embeds=prompt_embeds,
         negative_prompt_embeds=negative_prompt_embeds,
         ip_adapter_image=img_in,
-        #image=img_in,
-        #controlnet_conditioning_scale=0.9,
         num_inference_steps=int(steps) + 5,
         guidance_scale=cfg,
         width=int(w),
@@ -251,11 +272,11 @@ def generate(
             )
     return result
-##############################################################################
-# 5. Gradio UI
-##############################################################################
 with gr.Blocks() as demo:
-    gr.Markdown("# InstantID – Beautiful Realistic Asians v7")
     with gr.Row():
         with gr.Column():
             face_in   = gr.Image(label="顔写真", type="numpy")
@@ -281,4 +302,3 @@ with gr.Blocks() as demo:
     )
 print("launching …")
-demo.queue().launch(show_error=True)

+"""InstantID × Beautiful Realistic Asians v7 (ZeroGPU‑friendly, persistent cache)
+ポイント
+---------
+* **import spaces を最初に**して ZeroGPU パッチを確実に適用。
+* グローバル領域では CPU でモデルをロードし、CUDA への移動は
+  `@spaces.GPU` 関数内で一度だけ実行。
+* `.to("cuda")` や `torch.cuda.*` を関数外に置かないことで
+  `RuntimeError: No CUDA GPUs are available` を回避。
 """
+# ---------------------------------------------------------------------------
+# 0. 依存ライブラリの読み込み (ZeroGPU パッチ → PyTorch の順)
+# ---------------------------------------------------------------------------
+import spaces  # ⭐ ZeroGPU は torch より前に必須
+# --- ★ Monkey‑Patch: torchvision 0.17+ で消えた functional_tensor を補完 ---
 import types, sys
 from torchvision.transforms import functional as F
 mod = types.ModuleType("torchvision.transforms.functional_tensor")
 mod.rgb_to_grayscale = F.rgb_to_grayscale
 sys.modules["torchvision.transforms.functional_tensor"] = mod
 # ---------------------------------------------------------------------------
+import os, subprocess, cv2, torch, gradio as gr, numpy as np
 from pathlib import Path
 from PIL import Image
 from diffusers import (
+    StableDiffusionPipeline,
+    ControlNetModel,
+    DPMSolverMultistepScheduler,
+    AutoencoderKL,
 )
 from compel import Compel
 from insightface.app import FaceAnalysis
+# ---------------------------------------------------------------------------
+# 1. キャッシュ用ディレクトリ
+# ---------------------------------------------------------------------------
 PERSIST_BASE = Path("/data")
 CACHE_ROOT = (
     PERSIST_BASE / "instantid_cache"
 for p in (MODELS_DIR, LORA_DIR, EMB_DIR, UPSCALE_DIR):
     p.mkdir(parents=True, exist_ok=True)
 def dl(url: str, dst: Path, attempts: int = 2):
     """wget + リトライの簡易ダウンローダ"""
     if dst.exists():
             return
     raise RuntimeError(f"download failed → {url}")
+# ---------------------------------------------------------------------------
+# 2. 必要アセットのダウンロード
+# ---------------------------------------------------------------------------
 print("— asset check —")
+# 2‑A. ベース checkpoint
 BASE_CKPT = MODELS_DIR / "beautiful_realistic_asians_v7_fp16.safetensors"
 dl(
     "https://civitai.com/api/download/models/177164?type=Model&format=SafeTensor&size=pruned&fp=fp16",
     BASE_CKPT,
 )
+# 2‑B. FaceID LoRA（Δのみ）
 LORA_FILE = LORA_DIR / "ip-adapter-faceid-plusv2_sd15_lora.safetensors"
 dl(
     "https://huggingface.co/h94/IP-Adapter-FaceID/resolve/main/ip-adapter-faceid-plusv2_sd15_lora.safetensors",
     LORA_FILE,
 )
+# 2‑C. textual inversion Embeddings
 EMB_URLS = {
     "ng_deepnegative_v1_75t.pt": [
         "https://huggingface.co/datasets/gsdf/EasyNegative/resolve/main/ng_deepnegative_v1_75t.pt",
             if idx == len(urls): raise
             print("    ↳ fallback URL …")
+# 2‑D. Real‑ESRGAN weights (×8)
 RRG_WEIGHTS = UPSCALE_DIR / "RealESRGAN_x8plus.pth"
 RRG_URLS = [
     "https://huggingface.co/NoCrypt/Superscale_RealESRGAN/resolve/main/RealESRGAN_x8plus.pth",
         if idx == len(RRG_URLS): raise
         print("    ↳ fallback URL …")
+# ---------------------------------------------------------------------------
+# 3. モデル読み込み (すべて CPU)
+# ---------------------------------------------------------------------------
+device: str = "cpu"      # グローバルは CPU 固定
+dtype  = torch.float32    # 後で GPU 化する際に float16 に
+# FaceAnalysis (insightface)
+providers = ["CPUExecutionProvider"]
 face_app = FaceAnalysis(name="buffalo_l", root=str(CACHE_ROOT), providers=providers)
+face_app.prepare(ctx_id=-1, det_size=(640, 640))
+# Stable Diffusion Pipeline (CPU)
 pipe = StableDiffusionPipeline.from_single_file(
     BASE_CKPT, torch_dtype=dtype, safety_checker=None, use_safetensors=True, clip_skip=2
 )
 pipe.vae = AutoencoderKL.from_pretrained(
     "stabilityai/sd-vae-ft-mse", torch_dtype=dtype
+)
 pipe.scheduler = DPMSolverMultistepScheduler.from_config(
     pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++"
 )
 pipe.load_ip_adapter(
+    "h94/IP-Adapter",
+    subfolder="models",
     weight_name="ip-adapter-plus-face_sd15.bin",
 )
 pipe.load_lora_weights(str(LORA_DIR), weight_name=LORA_FILE.name)
 pipe.set_ip_adapter_scale(0.65)
+# textual inversion
 for emb in EMB_DIR.glob("*.*"):
     try:
         pipe.load_textual_inversion(emb, token=emb.stem)
         print("emb loaded →", emb.stem)
     except Exception:
         print("emb skip →", emb.name)
+# Real‑ESRGAN (CPU)
 try:
     from basicsr.archs.rrdb_arch import RRDBNet
     try:
         from realesrgan import RealESRGAN
     except ImportError:
         from realesrgan import RealESRGANer as RealESRGAN
     rrdb = RRDBNet(3, 3, 64, 23, 32, scale=8)
+    upsampler = RealESRGAN("cpu", rrdb, scale=8)
     upsampler.load_weights(str(RRG_WEIGHTS))
     UPSCALE_OK = True
 except Exception as e:
     print("Real-ESRGAN disabled →", e)
     UPSCALE_OK = False
+# compel
+compel_proc = Compel(
+    tokenizer=pipe.tokenizer,
+    text_encoder=pipe.text_encoder,
+    truncate_long_prompts=False,
+)
+print("pipeline ready (CPU) ✔")
+# ---------------------------------------------------------------------------
+# 4. プロンプト定義
+# ---------------------------------------------------------------------------
 BASE_PROMPT = (
     "Cinematic photo, (best quality:1.1), ultra-realistic, photorealistic of {subject}, "
     "natural skin texture, bokeh, standing, front view, full body shot, thighs, "
     "missing arms, missing legs, skin blemishes, acne, age spot"
 )
+# ---------------------------------------------------------------------------
+# 5. 生成関数 (GPU 処理部)
+# ---------------------------------------------------------------------------
+GPU_INITIALISED = False  # 一度だけ GPU へ移動するためのフラグ
 @spaces.GPU(duration=60)
 def generate(
     face_np, subject, add_prompt, add_neg, cfg, ip_scale, steps, w, h, upscale, up_factor,
     progress=gr.Progress(track_tqdm=True),
 ):
+    global GPU_INITIALISED, device, dtype, pipe, face_app, upsampler
+    if not GPU_INITIALISED:
+        print("\n--- first GPU initialisation ---")
+        device = "cuda"
+        dtype  = torch.float16
+        pipe.to(device)
+        pipe.vae.to(device)
+        face_app.prepare(ctx_id=0, det_size=(640, 640))
+        if UPSCALE_OK:
+            try:
+                upsampler.model = upsampler.model.to(device)  # RealESRGANer
+                upsampler.device = device                    # for newer API
+            except Exception:
+                pass
+        GPU_INITIALISED = True
+        print("GPU ready ✔")
     if face_np is None or face_np.size == 0:
         raise gr.Error("顔画像をアップロードしてください。")
     pipe.set_ip_adapter_scale(ip_scale)
     img_in = Image.fromarray(face_np)
+    # compel で長さを揃えバッチ化
     prompt_embeds, negative_prompt_embeds = compel_proc([prompt, neg])
     prompt_embeds = prompt_embeds.unsqueeze(0)
     negative_prompt_embeds = negative_prompt_embeds.unsqueeze(0)
         prompt_embeds=prompt_embeds,
         negative_prompt_embeds=negative_prompt_embeds,
         ip_adapter_image=img_in,
         num_inference_steps=int(steps) + 5,
         guidance_scale=cfg,
         width=int(w),
             )
     return result
+# ---------------------------------------------------------------------------
+# 6. Gradio UI
+# ---------------------------------------------------------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("# InstantID – Beautiful Realistic Asians v7 (ZeroGPU edition)")
     with gr.Row():
         with gr.Column():
             face_in   = gr.Image(label="顔写真", type="numpy")
     )
 print("launching …")