"""InstantID × Beautiful Realistic Asians v7 (ZeroGPU‑friendly, persistent cache) ポイント --------- * **import spaces を最初に**して ZeroGPU パッチを確実に適用。 * グローバル領域では CPU でモデルをロードし、CUDA への移動は `@spaces.GPU` 関数内で一度だけ実行。 * `.to("cuda")` や `torch.cuda.*` を関数外に置かないことで `RuntimeError: No CUDA GPUs are available` を回避。 """ # --------------------------------------------------------------------------- # 0. 依存ライブラリの読み込み (ZeroGPU パッチ → PyTorch の順) # --------------------------------------------------------------------------- import spaces # ⭐ ZeroGPU は torch より前に必須 # --- ★ Monkey‑Patch: torchvision 0.17+ で消えた functional_tensor を補完 --- import types, sys from torchvision.transforms import functional as F mod = types.ModuleType("torchvision.transforms.functional_tensor") mod.rgb_to_grayscale = F.rgb_to_grayscale sys.modules["torchvision.transforms.functional_tensor"] = mod # --------------------------------------------------------------------------- import os, subprocess, cv2, torch, gradio as gr, numpy as np from pathlib import Path from PIL import Image from diffusers import ( StableDiffusionPipeline, ControlNetModel, DPMSolverMultistepScheduler, AutoencoderKL, ) from compel import Compel from insightface.app import FaceAnalysis # --------------------------------------------------------------------------- # 1. キャッシュ用ディレクトリ # --------------------------------------------------------------------------- PERSIST_BASE = Path("/data") CACHE_ROOT = ( PERSIST_BASE / "instantid_cache" if PERSIST_BASE.exists() and os.access(PERSIST_BASE, os.W_OK) else Path.home() / ".cache" / "instantid_cache" ) print("cache →", CACHE_ROOT) MODELS_DIR = CACHE_ROOT / "models" LORA_DIR = MODELS_DIR / "Lora" # FaceID LoRA などを置く EMB_DIR = CACHE_ROOT / "embeddings" UPSCALE_DIR = CACHE_ROOT / "realesrgan" for p in (MODELS_DIR, LORA_DIR, EMB_DIR, UPSCALE_DIR): p.mkdir(parents=True, exist_ok=True) def dl(url: str, dst: Path, attempts: int = 2): """wget + リトライの簡易ダウンローダ""" if dst.exists(): print("✓", dst.relative_to(CACHE_ROOT)); return for i in range(1, attempts + 1): print(f"⬇ {dst.name} (try {i}/{attempts})") if subprocess.call(["wget", "-q", "-O", str(dst), url]) == 0: return raise RuntimeError(f"download failed → {url}") # --------------------------------------------------------------------------- # 2. 必要アセットのダウンロード # --------------------------------------------------------------------------- print("— asset check —") # 2‑A. ベース checkpoint BASE_CKPT = MODELS_DIR / "beautiful_realistic_asians_v7_fp16.safetensors" dl( "https://civitai.com/api/download/models/177164?type=Model&format=SafeTensor&size=pruned&fp=fp16", BASE_CKPT, ) # 2‑B. FaceID LoRA(Δのみ) LORA_FILE = LORA_DIR / "ip-adapter-faceid-plusv2_sd15_lora.safetensors" dl( "https://huggingface.co/h94/IP-Adapter-FaceID/resolve/main/ip-adapter-faceid-plusv2_sd15_lora.safetensors", LORA_FILE, ) # 2‑C. textual inversion Embeddings EMB_URLS = { "ng_deepnegative_v1_75t.pt": [ "https://huggingface.co/datasets/gsdf/EasyNegative/resolve/main/ng_deepnegative_v1_75t.pt", "https://huggingface.co/mrpxl2/animetarotV51.safetensors/raw/cc3008c0148061896549a995cc297aef0af4ef1b/ng_deepnegative_v1_75t.pt", ], "badhandv4.pt": [ "https://huggingface.co/datasets/gsdf/ConceptLab/resolve/main/badhandv4.pt", "https://huggingface.co/nolanaatama/embeddings/raw/main/badhandv4.pt", ], "CyberRealistic_Negative-neg.pt": [ "https://huggingface.co/datasets/gsdf/ConceptLab/resolve/main/CyberRealistic_Negative-neg.pt", "https://huggingface.co/wsj1995/embeddings/raw/main/CyberRealistic_Negative-neg.civitai.info", ], "UnrealisticDream.pt": [ "https://huggingface.co/datasets/gsdf/ConceptLab/resolve/main/UnrealisticDream.pt", "https://huggingface.co/imagepipeline/UnrealisticDream/raw/main/f84133b4-aad8-44be-b9ce-7e7e3a8c111f.pt", ], } for fname, urls in EMB_URLS.items(): dst = EMB_DIR / fname for idx, u in enumerate(urls, 1): try: dl(u, dst); break except RuntimeError: if idx == len(urls): raise print(" ↳ fallback URL …") # 2‑D. Real‑ESRGAN weights (×8) RRG_WEIGHTS = UPSCALE_DIR / "RealESRGAN_x8plus.pth" RRG_URLS = [ "https://huggingface.co/NoCrypt/Superscale_RealESRGAN/resolve/main/RealESRGAN_x8plus.pth", "https://huggingface.co/ai-forever/Real-ESRGAN/raw/main/RealESRGAN_x8.pth", "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/8x_NMKD-Superscale_100k.pth", ] for idx, link in enumerate(RRG_URLS, 1): try: dl(link, RRG_WEIGHTS); break except RuntimeError: if idx == len(RRG_URLS): raise print(" ↳ fallback URL …") # --------------------------------------------------------------------------- # 3. モデル読み込み (すべて CPU) # --------------------------------------------------------------------------- device: str = "cpu" # グローバルは CPU 固定 dtype = torch.float32 # 後で GPU 化する際に float16 に # FaceAnalysis (insightface) providers = ["CPUExecutionProvider"] face_app = FaceAnalysis(name="buffalo_l", root=str(CACHE_ROOT), providers=providers) face_app.prepare(ctx_id=-1, det_size=(640, 640)) # Stable Diffusion Pipeline (CPU) pipe = StableDiffusionPipeline.from_single_file( BASE_CKPT, torch_dtype=dtype, safety_checker=None, use_safetensors=True, clip_skip=2 ) pipe.vae = AutoencoderKL.from_pretrained( "stabilityai/sd-vae-ft-mse", torch_dtype=dtype ) pipe.scheduler = DPMSolverMultistepScheduler.from_config( pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++" ) pipe.load_ip_adapter( "h94/IP-Adapter", subfolder="models", weight_name="ip-adapter-plus-face_sd15.bin", ) pipe.load_lora_weights(str(LORA_DIR), weight_name=LORA_FILE.name) pipe.set_ip_adapter_scale(0.65) # textual inversion for emb in EMB_DIR.glob("*.*"): try: pipe.load_textual_inversion(emb, token=emb.stem) print("emb loaded →", emb.stem) except Exception: print("emb skip →", emb.name) # Real‑ESRGAN (CPU) try: from basicsr.archs.rrdb_arch import RRDBNet try: from realesrgan import RealESRGAN except ImportError: from realesrgan import RealESRGANer as RealESRGAN rrdb = RRDBNet(3, 3, 64, 23, 32, scale=8) upsampler = RealESRGAN("cpu", rrdb, scale=8) upsampler.load_weights(str(RRG_WEIGHTS)) UPSCALE_OK = True except Exception as e: print("Real-ESRGAN disabled →", e) UPSCALE_OK = False # compel compel_proc = Compel( tokenizer=pipe.tokenizer, text_encoder=pipe.text_encoder, truncate_long_prompts=False, ) print("pipeline ready (CPU) ✔") # --------------------------------------------------------------------------- # 4. プロンプト定義 # --------------------------------------------------------------------------- BASE_PROMPT = ( "Cinematic photo, (best quality:1.1), ultra-realistic, photorealistic of {subject}, " "natural skin texture, bokeh, standing, front view, full body shot, thighs, " "Canon EOS R5, 85 mm, f/1.4, ISO 200, 1/160 s, RAW" ) NEG_PROMPT = ( "ng_deepnegative_v1_75t, BadDream:0.6, UnrealisticDream:0.8, badhandv4:0.9, " "(worst quality:2), (low quality:1.8), lowres, blurry, jpeg artifacts, " "painting, sketch, illustration, cartoon, anime, cgi, render, 3d, " "monochrome, grayscale, text, logo, watermark, signature, username, " "bad anatomy, malformed, deformed, extra limbs, fused fingers, missing fingers, " "missing arms, missing legs, skin blemishes, acne, age spot" ) # --------------------------------------------------------------------------- # 5. 生成関数 (GPU 処理部) # --------------------------------------------------------------------------- GPU_INITIALISED = False # 一度だけ GPU へ移動するためのフラグ @spaces.GPU(duration=60) def generate( face_np, subject, add_prompt, add_neg, cfg, ip_scale, steps, w, h, upscale, up_factor, progress=gr.Progress(track_tqdm=True), ): global GPU_INITIALISED, device, dtype, pipe, face_app, upsampler if not GPU_INITIALISED: print("\n--- first GPU initialisation ---") device = "cuda" dtype = torch.float16 pipe.to(device) pipe.vae.to(device) face_app.prepare(ctx_id=0, det_size=(640, 640)) if UPSCALE_OK: try: upsampler.model = upsampler.model.to(device) # RealESRGANer upsampler.device = device # for newer API except Exception: pass GPU_INITIALISED = True print("GPU ready ✔") if face_np is None or face_np.size == 0: raise gr.Error("顔画像をアップロードしてください。") prompt = BASE_PROMPT.format(subject=(subject.strip() or "a beautiful 20yo woman")) if add_prompt: prompt += ", " + add_prompt neg = NEG_PROMPT + (", " + add_neg if add_neg else "") pipe.set_ip_adapter_scale(ip_scale) img_in = Image.fromarray(face_np) # compel で長さを揃えバッチ化 prompt_embeds, negative_prompt_embeds = compel_proc([prompt, neg]) prompt_embeds = prompt_embeds.unsqueeze(0) negative_prompt_embeds = negative_prompt_embeds.unsqueeze(0) result = pipe( prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_prompt_embeds, ip_adapter_image=img_in, num_inference_steps=int(steps) + 5, guidance_scale=cfg, width=int(w), height=int(h), ).images[0] if upscale: if UPSCALE_OK: up, _ = upsampler.enhance( cv2.cvtColor(np.array(result), cv2.COLOR_RGB2BGR), outscale=up_factor ) result = Image.fromarray(cv2.cvtColor(up, cv2.COLOR_BGR2RGB)) else: result = result.resize( (int(result.width * up_factor), int(result.height * up_factor)), Image.LANCZOS, ) return result # --------------------------------------------------------------------------- # 6. Gradio UI # --------------------------------------------------------------------------- with gr.Blocks() as demo: gr.Markdown("# InstantID – Beautiful Realistic Asians v7 (ZeroGPU edition)") with gr.Row(): with gr.Column(): face_in = gr.Image(label="顔写真", type="numpy") subj_in = gr.Textbox(label="被写体説明", placeholder="e.g. woman in black suit, smiling") add_in = gr.Textbox(label="追加プロンプト") addneg_in = gr.Textbox(label="追加ネガティブ") ip_sld = gr.Slider(0, 1.5, 0.65, step=0.05, label="IP-Adapter scale") cfg_sld = gr.Slider(1, 15, 6, step=0.5, label="CFG") step_sld = gr.Slider(10, 50, 20, step=1, label="Steps") w_sld = gr.Slider(512, 1024, 512, step=64, label="幅") h_sld = gr.Slider(512, 1024, 768, step=64, label="高さ") up_ck = gr.Checkbox(label="アップスケール", value=True) up_fac = gr.Slider(1, 8, 2, step=1, label="倍率") btn = gr.Button("生成", variant="primary") with gr.Column(): out_img = gr.Image(label="結果") btn.click( generate, [face_in, subj_in, add_in, addneg_in, cfg_sld, ip_sld, step_sld, w_sld, h_sld, up_ck, up_fac], out_img, api_name="predict", ) print("launching …")