Spaces:

i0switch
/

my-image-generator

Running on Zero

App Files Files Community

i0switch commited on 18 days ago

Commit

d58abe4

verified ·

1 Parent(s): 11db7d9

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -90

app.py CHANGED Viewed

@@ -1,43 +1,31 @@
-"""InstantID × Beautiful Realistic Asians v7 (ZeroGPU‑friendly, persistent cache)
-ポイント
----------
-* **import spaces を最初に**して ZeroGPU パッチを確実に適用。
-* グローバル領域では CPU でモデルをロードし、CUDA への移動は
-  `@spaces.GPU` 関数内で一度だけ実行。
-* `.to("cuda")` や `torch.cuda.*` を関数外に置かないことで
-  `RuntimeError: No CUDA GPUs are available` を回避。
 """
-# ---------------------------------------------------------------------------
-# 0. 依存ライブラリの読み込み (ZeroGPU パッチ → PyTorch の順)
-# ---------------------------------------------------------------------------
-import spaces  # ⭐ ZeroGPU は torch より前に必須
-# --- ★ Monkey‑Patch: torchvision 0.17+ で消えた functional_tensor を補完 ---
 import types, sys
 from torchvision.transforms import functional as F
 mod = types.ModuleType("torchvision.transforms.functional_tensor")
 mod.rgb_to_grayscale = F.rgb_to_grayscale
 sys.modules["torchvision.transforms.functional_tensor"] = mod
 # ---------------------------------------------------------------------------
-import os, subprocess, cv2, torch, gradio as gr, numpy as np
 from pathlib import Path
 from PIL import Image
 from diffusers import (
-    StableDiffusionPipeline,
-    ControlNetModel,
-    DPMSolverMultistepScheduler,
-    AutoencoderKL,
 )
 from compel import Compel
 from insightface.app import FaceAnalysis
-# ---------------------------------------------------------------------------
-# 1. キャッシュ用ディレクトリ
-# ---------------------------------------------------------------------------
 PERSIST_BASE = Path("/data")
 CACHE_ROOT = (
     PERSIST_BASE / "instantid_cache"
@@ -53,7 +41,6 @@ UPSCALE_DIR = CACHE_ROOT / "realesrgan"
 for p in (MODELS_DIR, LORA_DIR, EMB_DIR, UPSCALE_DIR):
     p.mkdir(parents=True, exist_ok=True)
 def dl(url: str, dst: Path, attempts: int = 2):
     """wget + リトライの簡易ダウンローダ"""
     if dst.exists():
@@ -64,26 +51,26 @@ def dl(url: str, dst: Path, attempts: int = 2):
             return
     raise RuntimeError(f"download failed → {url}")
-# ---------------------------------------------------------------------------
-# 2. 必要アセットのダウンロード
-# ---------------------------------------------------------------------------
 print("— asset check —")
-# 2‑A. ベース checkpoint
 BASE_CKPT = MODELS_DIR / "beautiful_realistic_asians_v7_fp16.safetensors"
 dl(
     "https://civitai.com/api/download/models/177164?type=Model&format=SafeTensor&size=pruned&fp=fp16",
     BASE_CKPT,
 )
-# 2‑B. FaceID LoRA（Δのみ）
 LORA_FILE = LORA_DIR / "ip-adapter-faceid-plusv2_sd15_lora.safetensors"
 dl(
     "https://huggingface.co/h94/IP-Adapter-FaceID/resolve/main/ip-adapter-faceid-plusv2_sd15_lora.safetensors",
     LORA_FILE,
 )
-# 2‑C. textual inversion Embeddings
 EMB_URLS = {
     "ng_deepnegative_v1_75t.pt": [
         "https://huggingface.co/datasets/gsdf/EasyNegative/resolve/main/ng_deepnegative_v1_75t.pt",
@@ -111,7 +98,7 @@ for fname, urls in EMB_URLS.items():
             if idx == len(urls): raise
             print("    ↳ fallback URL …")
-# 2‑D. Real‑ESRGAN weights (×8)
 RRG_WEIGHTS = UPSCALE_DIR / "RealESRGAN_x8plus.pth"
 RRG_URLS = [
     "https://huggingface.co/NoCrypt/Superscale_RealESRGAN/resolve/main/RealESRGAN_x8plus.pth",
@@ -125,71 +112,85 @@ for idx, link in enumerate(RRG_URLS, 1):
         if idx == len(RRG_URLS): raise
         print("    ↳ fallback URL …")
-# ---------------------------------------------------------------------------
-# 3. モデル読み込み (すべて CPU)
-# ---------------------------------------------------------------------------
-device: str = "cpu"      # グローバルは CPU 固定
-dtype  = torch.float32    # 後で GPU 化する際に float16 に
-# FaceAnalysis (insightface)
-providers = ["CPUExecutionProvider"]
 face_app = FaceAnalysis(name="buffalo_l", root=str(CACHE_ROOT), providers=providers)
-face_app.prepare(ctx_id=-1, det_size=(640, 640))
-# Stable Diffusion Pipeline (CPU)
 pipe = StableDiffusionPipeline.from_single_file(
     BASE_CKPT, torch_dtype=dtype, safety_checker=None, use_safetensors=True, clip_skip=2
 )
 pipe.vae = AutoencoderKL.from_pretrained(
     "stabilityai/sd-vae-ft-mse", torch_dtype=dtype
-)
 pipe.scheduler = DPMSolverMultistepScheduler.from_config(
     pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++"
 )
 pipe.load_ip_adapter(
-    "h94/IP-Adapter",
-    subfolder="models",
     weight_name="ip-adapter-plus-face_sd15.bin",
 )
 pipe.load_lora_weights(str(LORA_DIR), weight_name=LORA_FILE.name)
 pipe.set_ip_adapter_scale(0.65)
-# textual inversion
 for emb in EMB_DIR.glob("*.*"):
     try:
         pipe.load_textual_inversion(emb, token=emb.stem)
         print("emb loaded →", emb.stem)
     except Exception:
         print("emb skip →", emb.name)
-# Real‑ESRGAN (CPU)
 try:
     from basicsr.archs.rrdb_arch import RRDBNet
     try:
         from realesrgan import RealESRGAN
     except ImportError:
         from realesrgan import RealESRGANer as RealESRGAN
     rrdb = RRDBNet(3, 3, 64, 23, 32, scale=8)
-    upsampler = RealESRGAN("cpu", rrdb, scale=8)
     upsampler.load_weights(str(RRG_WEIGHTS))
     UPSCALE_OK = True
 except Exception as e:
     print("Real-ESRGAN disabled →", e)
     UPSCALE_OK = False
-# compel
-compel_proc = Compel(
-    tokenizer=pipe.tokenizer,
-    text_encoder=pipe.text_encoder,
-    truncate_long_prompts=False,
-)
-print("pipeline ready (CPU) ✔")
-# ---------------------------------------------------------------------------
-# 4. プロンプト定義
-# ---------------------------------------------------------------------------
 BASE_PROMPT = (
     "Cinematic photo, (best quality:1.1), ultra-realistic, photorealistic of {subject}, "
     "natural skin texture, bokeh, standing, front view, full body shot, thighs, "
@@ -204,35 +205,11 @@ NEG_PROMPT = (
     "missing arms, missing legs, skin blemishes, acne, age spot"
 )
-# ---------------------------------------------------------------------------
-# 5. 生成関数 (GPU 処理部)
-# ---------------------------------------------------------------------------
-GPU_INITIALISED = False  # 一度だけ GPU へ移動するためのフラグ
 @spaces.GPU(duration=60)
 def generate(
     face_np, subject, add_prompt, add_neg, cfg, ip_scale, steps, w, h, upscale, up_factor,
     progress=gr.Progress(track_tqdm=True),
 ):
-    global GPU_INITIALISED, device, dtype, pipe, face_app, upsampler
-    if not GPU_INITIALISED:
-        print("\n--- first GPU initialisation ---")
-        device = "cuda"
-        dtype  = torch.float16
-        pipe.to(device)
-        pipe.vae.to(device)
-        face_app.prepare(ctx_id=0, det_size=(640, 640))
-        if UPSCALE_OK:
-            try:
-                upsampler.model = upsampler.model.to(device)  # RealESRGANer
-                upsampler.device = device                    # for newer API
-            except Exception:
-                pass
-        GPU_INITIALISED = True
-        print("GPU ready ✔")
     if face_np is None or face_np.size == 0:
         raise gr.Error("顔画像をアップロードしてください。")
@@ -244,7 +221,7 @@ def generate(
     pipe.set_ip_adapter_scale(ip_scale)
     img_in = Image.fromarray(face_np)
-    # compel で長さを揃えバッチ化
     prompt_embeds, negative_prompt_embeds = compel_proc([prompt, neg])
     prompt_embeds = prompt_embeds.unsqueeze(0)
     negative_prompt_embeds = negative_prompt_embeds.unsqueeze(0)
@@ -253,6 +230,8 @@ def generate(
         prompt_embeds=prompt_embeds,
         negative_prompt_embeds=negative_prompt_embeds,
         ip_adapter_image=img_in,
         num_inference_steps=int(steps) + 5,
         guidance_scale=cfg,
         width=int(w),
@@ -272,11 +251,11 @@ def generate(
             )
     return result
-# ---------------------------------------------------------------------------
-# 6. Gradio UI
-# ---------------------------------------------------------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("# InstantID – Beautiful Realistic Asians v7 (ZeroGPU edition)")
     with gr.Row():
         with gr.Column():
             face_in   = gr.Image(label="顔写真", type="numpy")
@@ -302,3 +281,4 @@ with gr.Blocks() as demo:
     )
 print("launching …")

+# app.py — InstantID × Beautiful Realistic Asians v7 (ZeroGPU-friendly, persistent cache)
+"""Persistent-cache backend for InstantID portrait generation.
+   * 依存モデルは /data が書込可ならそこへ、それ以外は ~/.cache に保存
+   * wget を使った簡易リトライ DL
 """
+# --- ★ Monkey-Patch: torchvision 0.17+ で消えた functional_tensor を補完 ---
 import types, sys
 from torchvision.transforms import functional as F
 mod = types.ModuleType("torchvision.transforms.functional_tensor")
+# 必要なのは rgb_to_grayscale だけなのでこれだけエイリアス
 mod.rgb_to_grayscale = F.rgb_to_grayscale
 sys.modules["torchvision.transforms.functional_tensor"] = mod
 # ---------------------------------------------------------------------------
+import os, subprocess, cv2, torch, spaces, gradio as gr, numpy as np
 from pathlib import Path
 from PIL import Image
 from diffusers import (
+    StableDiffusionPipeline, ControlNetModel,
+    DPMSolverMultistepScheduler, AutoencoderKL,
 )
 from compel import Compel
 from insightface.app import FaceAnalysis
+##############################################################################
+# 0. キャッシュ用ディレクトリ
+##############################################################################
 PERSIST_BASE = Path("/data")
 CACHE_ROOT = (
     PERSIST_BASE / "instantid_cache"
 for p in (MODELS_DIR, LORA_DIR, EMB_DIR, UPSCALE_DIR):
     p.mkdir(parents=True, exist_ok=True)
 def dl(url: str, dst: Path, attempts: int = 2):
     """wget + リトライの簡易ダウンローダ"""
     if dst.exists():
             return
     raise RuntimeError(f"download failed → {url}")
+##############################################################################
+# 1. 必要アセットのダウンロード
+##############################################################################
 print("— asset check —")
+# 1-A. ベース checkpoint
 BASE_CKPT = MODELS_DIR / "beautiful_realistic_asians_v7_fp16.safetensors"
 dl(
     "https://civitai.com/api/download/models/177164?type=Model&format=SafeTensor&size=pruned&fp=fp16",
     BASE_CKPT,
 )
+# 1-B. FaceID LoRA（Δのみ）
 LORA_FILE = LORA_DIR / "ip-adapter-faceid-plusv2_sd15_lora.safetensors"
 dl(
     "https://huggingface.co/h94/IP-Adapter-FaceID/resolve/main/ip-adapter-faceid-plusv2_sd15_lora.safetensors",
     LORA_FILE,
 )
+# 1-C. textual inversion Embeddings
 EMB_URLS = {
     "ng_deepnegative_v1_75t.pt": [
         "https://huggingface.co/datasets/gsdf/EasyNegative/resolve/main/ng_deepnegative_v1_75t.pt",
             if idx == len(urls): raise
             print("    ↳ fallback URL …")
+# 1-D. Real-ESRGAN weights (×8)
 RRG_WEIGHTS = UPSCALE_DIR / "RealESRGAN_x8plus.pth"
 RRG_URLS = [
     "https://huggingface.co/NoCrypt/Superscale_RealESRGAN/resolve/main/RealESRGAN_x8plus.pth",
         if idx == len(RRG_URLS): raise
         print("    ↳ fallback URL …")
+##############################################################################
+# 2. ランタイム初期化
+##############################################################################
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+dtype  = torch.float16 if torch.cuda.is_available() else torch.float32
+print("device:", device, "| dtype:", dtype)
+providers = (
+    ["CUDAExecutionProvider", "CPUExecutionProvider"]
+    if torch.cuda.is_available()
+    else ["CPUExecutionProvider"]
+)
 face_app = FaceAnalysis(name="buffalo_l", root=str(CACHE_ROOT), providers=providers)
+face_app.prepare(ctx_id=(0 if torch.cuda.is_available() else -1), det_size=(640, 640))
+# ControlNet + SD パイプライン
+#controlnet = ControlNetModel.from_pretrained(
+#    "InstantX/InstantID", subfolder="ControlNetModel", torch_dtype=dtype
+#)
 pipe = StableDiffusionPipeline.from_single_file(
     BASE_CKPT, torch_dtype=dtype, safety_checker=None, use_safetensors=True, clip_skip=2
 )
 pipe.vae = AutoencoderKL.from_pretrained(
     "stabilityai/sd-vae-ft-mse", torch_dtype=dtype
+).to(device)
+#pipe.controlnet = controlnet
 pipe.scheduler = DPMSolverMultistepScheduler.from_config(
     pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++"
 )
+# --- ここが核心：画像エンコーダ込みで公式レポから直接ロード ------------------
 pipe.load_ip_adapter(
+    "h94/IP-Adapter",               # Hugging Face Hub ID
+    subfolder="models",             # ip-adapter-plus-face_sd15.bin が入っているフォルダ
     weight_name="ip-adapter-plus-face_sd15.bin",
 )
+# ---------------------------------------------------------------------------
+# FaceID LoRA（差分 LoRA のみ）
 pipe.load_lora_weights(str(LORA_DIR), weight_name=LORA_FILE.name)
 pipe.set_ip_adapter_scale(0.65)
+# textual inversion 読み込み
 for emb in EMB_DIR.glob("*.*"):
     try:
         pipe.load_textual_inversion(emb, token=emb.stem)
         print("emb loaded →", emb.stem)
     except Exception:
         print("emb skip →", emb.name)
+pipe.to(device)
+# compel プロセッサを初期化
+compel_proc = Compel(
+    tokenizer=pipe.tokenizer,
+    text_encoder=pipe.text_encoder,
+    truncate_long_prompts=False  # 長いプロンプトを切り捨てない
+)
+print("pipeline ready ✔")
+##############################################################################
+# 3. アップスケーラ
+##############################################################################
 try:
     from basicsr.archs.rrdb_arch import RRDBNet
     try:
         from realesrgan import RealESRGAN
     except ImportError:
         from realesrgan import RealESRGANer as RealESRGAN
     rrdb = RRDBNet(3, 3, 64, 23, 32, scale=8)
+    upsampler = RealESRGAN(device, rrdb, scale=8)
     upsampler.load_weights(str(RRG_WEIGHTS))
     UPSCALE_OK = True
 except Exception as e:
     print("Real-ESRGAN disabled →", e)
     UPSCALE_OK = False
+##############################################################################
+# 4. プロンプト & 生成関数
+##############################################################################
 BASE_PROMPT = (
     "Cinematic photo, (best quality:1.1), ultra-realistic, photorealistic of {subject}, "
     "natural skin texture, bokeh, standing, front view, full body shot, thighs, "
     "missing arms, missing legs, skin blemishes, acne, age spot"
 )
 @spaces.GPU(duration=60)
 def generate(
     face_np, subject, add_prompt, add_neg, cfg, ip_scale, steps, w, h, upscale, up_factor,
     progress=gr.Progress(track_tqdm=True),
 ):
     if face_np is None or face_np.size == 0:
         raise gr.Error("顔画像をアップロードしてください。")
     pipe.set_ip_adapter_scale(ip_scale)
     img_in = Image.fromarray(face_np)
+    # compelで長さを揃え、.unsqueeze(0)でバッチ次元を追加する
     prompt_embeds, negative_prompt_embeds = compel_proc([prompt, neg])
     prompt_embeds = prompt_embeds.unsqueeze(0)
     negative_prompt_embeds = negative_prompt_embeds.unsqueeze(0)
         prompt_embeds=prompt_embeds,
         negative_prompt_embeds=negative_prompt_embeds,
         ip_adapter_image=img_in,
+        #image=img_in,
+        #controlnet_conditioning_scale=0.9,
         num_inference_steps=int(steps) + 5,
         guidance_scale=cfg,
         width=int(w),
             )
     return result
+##############################################################################
+# 5. Gradio UI
+##############################################################################
 with gr.Blocks() as demo:
+    gr.Markdown("# InstantID – Beautiful Realistic Asians v7")
     with gr.Row():
         with gr.Column():
             face_in   = gr.Image(label="顔写真", type="numpy")
     )
 print("launching …")
+demo.queue().launch(show_error=True)