Spaces:

i0switch
/

my-image-generator

Running on Zero

File size: 11,985 Bytes

f47143a
 
 
 
 
 
 
 
 
2da6c3a
f47143a
 
 
 
 
 
 
2da6c3a
 
bebb126
2da6c3a
 
 
 
bebb126
f47143a
bebb126
 
 
f47143a
 
 
 
bebb126
2f16e2f
bebb126
 
f47143a
 
 
2da6c3a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f47143a
2da6c3a
 
 
 
 
 
 
 
 
bebb126
f47143a
 
 
2da6c3a
bebb126
f47143a
2da6c3a
 
 
 
 
bebb126
f47143a
2da6c3a
 
 
 
 
bebb126
f47143a
2da6c3a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bebb126
f47143a
2da6c3a
 
 
 
 
 
 
 
 
 
 
 
bebb126
f47143a
 
 
 
 
 
 
 
 
2da6c3a
f47143a
bebb126
f47143a
2da6c3a
 
 
 
 
f47143a
2da6c3a
 
 
 
f47143a
 
2da6c3a
 
 
 
bebb126
f47143a
2da6c3a
 
 
 
 
 
2f16e2f
f47143a
2da6c3a
 
 
 
 
 
f47143a
2da6c3a
f47143a
2da6c3a
 
 
 
 
bebb126
f47143a
 
 
 
 
 
 
 
 
 
 
2da6c3a
eda7d17
 
 
2da6c3a
 
eda7d17
 
 
2da6c3a
eda7d17
 
2da6c3a
bebb126
f47143a
 
 
 
 
4d7c512
2da6c3a
 
 
 
f47143a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2da6c3a
 
 
 
 
 
 
 
 
 
bebb126
f47143a
7bd2e19
4d7c512
 
2f16e2f
2da6c3a
2f16e2f
 
2da6c3a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bebb126
f47143a
 
 
2da6c3a
f47143a
2da6c3a

"""InstantID × Beautiful Realistic Asians v7 (ZeroGPU‑friendly, persistent cache)

ポイント
---------
* **import spaces を最初に**して ZeroGPU パッチを確実に適用。
* グローバル領域では CPU でモデルをロードし、CUDA への移動は
  `@spaces.GPU` 関数内で一度だけ実行。
* `.to("cuda")` や `torch.cuda.*` を関数外に置かないことで
  `RuntimeError: No CUDA GPUs are available` を回避。
"""

# ---------------------------------------------------------------------------
# 0. 依存ライブラリの読み込み (ZeroGPU パッチ → PyTorch の順)
# ---------------------------------------------------------------------------
import spaces  # ⭐ ZeroGPU は torch より前に必須

# --- ★ Monkey‑Patch: torchvision 0.17+ で消えた functional_tensor を補完 ---
import types, sys
from torchvision.transforms import functional as F

mod = types.ModuleType("torchvision.transforms.functional_tensor")
mod.rgb_to_grayscale = F.rgb_to_grayscale
sys.modules["torchvision.transforms.functional_tensor"] = mod
# ---------------------------------------------------------------------------

import os, subprocess, cv2, torch, gradio as gr, numpy as np
from pathlib import Path
from PIL import Image
from diffusers import (
    StableDiffusionPipeline,
    ControlNetModel,
    DPMSolverMultistepScheduler,
    AutoencoderKL,
)
from compel import Compel
from insightface.app import FaceAnalysis

# ---------------------------------------------------------------------------
# 1. キャッシュ用ディレクトリ
# ---------------------------------------------------------------------------
PERSIST_BASE = Path("/data")
CACHE_ROOT = (
    PERSIST_BASE / "instantid_cache"
    if PERSIST_BASE.exists() and os.access(PERSIST_BASE, os.W_OK)
    else Path.home() / ".cache" / "instantid_cache"
)
print("cache →", CACHE_ROOT)

MODELS_DIR  = CACHE_ROOT / "models"
LORA_DIR    = MODELS_DIR / "Lora"            # FaceID LoRA などを置く
EMB_DIR     = CACHE_ROOT / "embeddings"
UPSCALE_DIR = CACHE_ROOT / "realesrgan"
for p in (MODELS_DIR, LORA_DIR, EMB_DIR, UPSCALE_DIR):
    p.mkdir(parents=True, exist_ok=True)


def dl(url: str, dst: Path, attempts: int = 2):
    """wget + リトライの簡易ダウンローダ"""
    if dst.exists():
        print("✓", dst.relative_to(CACHE_ROOT)); return
    for i in range(1, attempts + 1):
        print(f"⬇ {dst.name} (try {i}/{attempts})")
        if subprocess.call(["wget", "-q", "-O", str(dst), url]) == 0:
            return
    raise RuntimeError(f"download failed → {url}")

# ---------------------------------------------------------------------------
# 2. 必要アセットのダウンロード
# ---------------------------------------------------------------------------
print("— asset check —")

# 2‑A. ベース checkpoint
BASE_CKPT = MODELS_DIR / "beautiful_realistic_asians_v7_fp16.safetensors"
dl(
    "https://civitai.com/api/download/models/177164?type=Model&format=SafeTensor&size=pruned&fp=fp16",
    BASE_CKPT,
)

# 2‑B. FaceID LoRA（Δのみ）
LORA_FILE = LORA_DIR / "ip-adapter-faceid-plusv2_sd15_lora.safetensors"
dl(
    "https://huggingface.co/h94/IP-Adapter-FaceID/resolve/main/ip-adapter-faceid-plusv2_sd15_lora.safetensors",
    LORA_FILE,
)

# 2‑C. textual inversion Embeddings
EMB_URLS = {
    "ng_deepnegative_v1_75t.pt": [
        "https://huggingface.co/datasets/gsdf/EasyNegative/resolve/main/ng_deepnegative_v1_75t.pt",
        "https://huggingface.co/mrpxl2/animetarotV51.safetensors/raw/cc3008c0148061896549a995cc297aef0af4ef1b/ng_deepnegative_v1_75t.pt",
    ],
    "badhandv4.pt": [
        "https://huggingface.co/datasets/gsdf/ConceptLab/resolve/main/badhandv4.pt",
        "https://huggingface.co/nolanaatama/embeddings/raw/main/badhandv4.pt",
    ],
    "CyberRealistic_Negative-neg.pt": [
        "https://huggingface.co/datasets/gsdf/ConceptLab/resolve/main/CyberRealistic_Negative-neg.pt",
        "https://huggingface.co/wsj1995/embeddings/raw/main/CyberRealistic_Negative-neg.civitai.info",
    ],
    "UnrealisticDream.pt": [
        "https://huggingface.co/datasets/gsdf/ConceptLab/resolve/main/UnrealisticDream.pt",
        "https://huggingface.co/imagepipeline/UnrealisticDream/raw/main/f84133b4-aad8-44be-b9ce-7e7e3a8c111f.pt",
    ],
}
for fname, urls in EMB_URLS.items():
    dst = EMB_DIR / fname
    for idx, u in enumerate(urls, 1):
        try:
            dl(u, dst); break
        except RuntimeError:
            if idx == len(urls): raise
            print("    ↳ fallback URL …")

# 2‑D. Real‑ESRGAN weights (×8)
RRG_WEIGHTS = UPSCALE_DIR / "RealESRGAN_x8plus.pth"
RRG_URLS = [
    "https://huggingface.co/NoCrypt/Superscale_RealESRGAN/resolve/main/RealESRGAN_x8plus.pth",
    "https://huggingface.co/ai-forever/Real-ESRGAN/raw/main/RealESRGAN_x8.pth",
    "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/8x_NMKD-Superscale_100k.pth",
]
for idx, link in enumerate(RRG_URLS, 1):
    try:
        dl(link, RRG_WEIGHTS); break
    except RuntimeError:
        if idx == len(RRG_URLS): raise
        print("    ↳ fallback URL …")

# ---------------------------------------------------------------------------
# 3. モデル読み込み (すべて CPU)
# ---------------------------------------------------------------------------

device: str = "cpu"      # グローバルは CPU 固定
dtype  = torch.float32    # 後で GPU 化する際に float16 に

# FaceAnalysis (insightface)
providers = ["CPUExecutionProvider"]
face_app = FaceAnalysis(name="buffalo_l", root=str(CACHE_ROOT), providers=providers)
face_app.prepare(ctx_id=-1, det_size=(640, 640))

# Stable Diffusion Pipeline (CPU)
pipe = StableDiffusionPipeline.from_single_file(
    BASE_CKPT, torch_dtype=dtype, safety_checker=None, use_safetensors=True, clip_skip=2
)
pipe.vae = AutoencoderKL.from_pretrained(
    "stabilityai/sd-vae-ft-mse", torch_dtype=dtype
)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(
    pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++"
)
pipe.load_ip_adapter(
    "h94/IP-Adapter",
    subfolder="models",
    weight_name="ip-adapter-plus-face_sd15.bin",
)
pipe.load_lora_weights(str(LORA_DIR), weight_name=LORA_FILE.name)
pipe.set_ip_adapter_scale(0.65)

# textual inversion
for emb in EMB_DIR.glob("*.*"):
    try:
        pipe.load_textual_inversion(emb, token=emb.stem)
        print("emb loaded →", emb.stem)
    except Exception:
        print("emb skip →", emb.name)

# Real‑ESRGAN (CPU)
try:
    from basicsr.archs.rrdb_arch import RRDBNet
    try:
        from realesrgan import RealESRGAN
    except ImportError:
        from realesrgan import RealESRGANer as RealESRGAN

    rrdb = RRDBNet(3, 3, 64, 23, 32, scale=8)
    upsampler = RealESRGAN("cpu", rrdb, scale=8)
    upsampler.load_weights(str(RRG_WEIGHTS))
    UPSCALE_OK = True
except Exception as e:
    print("Real-ESRGAN disabled →", e)
    UPSCALE_OK = False

# compel
compel_proc = Compel(
    tokenizer=pipe.tokenizer,
    text_encoder=pipe.text_encoder,
    truncate_long_prompts=False,
)
print("pipeline ready (CPU) ✔")

# ---------------------------------------------------------------------------
# 4. プロンプト定義
# ---------------------------------------------------------------------------
BASE_PROMPT = (
    "Cinematic photo, (best quality:1.1), ultra-realistic, photorealistic of {subject}, "
    "natural skin texture, bokeh, standing, front view, full body shot, thighs, "
    "Canon EOS R5, 85 mm, f/1.4, ISO 200, 1/160 s, RAW"
)
NEG_PROMPT = (
    "ng_deepnegative_v1_75t, BadDream:0.6, UnrealisticDream:0.8, badhandv4:0.9, "
    "(worst quality:2), (low quality:1.8), lowres, blurry, jpeg artifacts, "
    "painting, sketch, illustration, cartoon, anime, cgi, render, 3d, "
    "monochrome, grayscale, text, logo, watermark, signature, username, "
    "bad anatomy, malformed, deformed, extra limbs, fused fingers, missing fingers, "
    "missing arms, missing legs, skin blemishes, acne, age spot"
)

# ---------------------------------------------------------------------------
# 5. 生成関数 (GPU 処理部)
# ---------------------------------------------------------------------------
GPU_INITIALISED = False  # 一度だけ GPU へ移動するためのフラグ

@spaces.GPU(duration=60)
def generate(
    face_np, subject, add_prompt, add_neg, cfg, ip_scale, steps, w, h, upscale, up_factor,
    progress=gr.Progress(track_tqdm=True),
):
    global GPU_INITIALISED, device, dtype, pipe, face_app, upsampler

    if not GPU_INITIALISED:
        print("\n--- first GPU initialisation ---")
        device = "cuda"
        dtype  = torch.float16

        pipe.to(device)
        pipe.vae.to(device)
        face_app.prepare(ctx_id=0, det_size=(640, 640))
        if UPSCALE_OK:
            try:
                upsampler.model = upsampler.model.to(device)  # RealESRGANer
                upsampler.device = device                    # for newer API
            except Exception:
                pass
        GPU_INITIALISED = True
        print("GPU ready ✔")

    if face_np is None or face_np.size == 0:
        raise gr.Error("顔画像をアップロードしてください。")

    prompt = BASE_PROMPT.format(subject=(subject.strip() or "a beautiful 20yo woman"))
    if add_prompt:
        prompt += ", " + add_prompt
    neg = NEG_PROMPT + (", " + add_neg if add_neg else "")

    pipe.set_ip_adapter_scale(ip_scale)
    img_in = Image.fromarray(face_np)

    # compel で長さを揃えバッチ化
    prompt_embeds, negative_prompt_embeds = compel_proc([prompt, neg])
    prompt_embeds = prompt_embeds.unsqueeze(0)
    negative_prompt_embeds = negative_prompt_embeds.unsqueeze(0)

    result = pipe(
        prompt_embeds=prompt_embeds,
        negative_prompt_embeds=negative_prompt_embeds,
        ip_adapter_image=img_in,
        num_inference_steps=int(steps) + 5,
        guidance_scale=cfg,
        width=int(w),
        height=int(h),
    ).images[0]

    if upscale:
        if UPSCALE_OK:
            up, _ = upsampler.enhance(
                cv2.cvtColor(np.array(result), cv2.COLOR_RGB2BGR), outscale=up_factor
            )
            result = Image.fromarray(cv2.cvtColor(up, cv2.COLOR_BGR2RGB))
        else:
            result = result.resize(
                (int(result.width * up_factor), int(result.height * up_factor)),
                Image.LANCZOS,
            )
    return result

# ---------------------------------------------------------------------------
# 6. Gradio UI
# ---------------------------------------------------------------------------
with gr.Blocks() as demo:
    gr.Markdown("# InstantID – Beautiful Realistic Asians v7 (ZeroGPU edition)")
    with gr.Row():
        with gr.Column():
            face_in   = gr.Image(label="顔写真", type="numpy")
            subj_in   = gr.Textbox(label="被写体説明", placeholder="e.g. woman in black suit, smiling")
            add_in    = gr.Textbox(label="追加プロンプト")
            addneg_in = gr.Textbox(label="追加ネガティブ")
            ip_sld    = gr.Slider(0, 1.5, 0.65, step=0.05, label="IP-Adapter scale")
            cfg_sld   = gr.Slider(1, 15, 6, step=0.5, label="CFG")
            step_sld  = gr.Slider(10, 50, 20, step=1, label="Steps")
            w_sld     = gr.Slider(512, 1024, 512, step=64, label="幅")
            h_sld     = gr.Slider(512, 1024, 768, step=64, label="高さ")
            up_ck     = gr.Checkbox(label="アップスケール", value=True)
            up_fac    = gr.Slider(1, 8, 2, step=1, label="倍率")
            btn       = gr.Button("生成", variant="primary")
        with gr.Column():
            out_img = gr.Image(label="結果")

    btn.click(
        generate,
        [face_in, subj_in, add_in, addneg_in, cfg_sld, ip_sld, step_sld, w_sld, h_sld, up_ck, up_fac],
        out_img,
        api_name="predict",
    )

print("launching …")