File size: 11,985 Bytes
f47143a
 
 
 
 
 
 
 
 
2da6c3a
f47143a
 
 
 
 
 
 
2da6c3a
 
bebb126
2da6c3a
 
 
 
bebb126
f47143a
bebb126
 
 
f47143a
 
 
 
bebb126
2f16e2f
bebb126
 
f47143a
 
 
2da6c3a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f47143a
2da6c3a
 
 
 
 
 
 
 
 
bebb126
f47143a
 
 
2da6c3a
bebb126
f47143a
2da6c3a
 
 
 
 
bebb126
f47143a
2da6c3a
 
 
 
 
bebb126
f47143a
2da6c3a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bebb126
f47143a
2da6c3a
 
 
 
 
 
 
 
 
 
 
 
bebb126
f47143a
 
 
 
 
 
 
 
 
2da6c3a
f47143a
bebb126
f47143a
2da6c3a
 
 
 
 
f47143a
2da6c3a
 
 
 
f47143a
 
2da6c3a
 
 
 
bebb126
f47143a
2da6c3a
 
 
 
 
 
2f16e2f
f47143a
2da6c3a
 
 
 
 
 
f47143a
2da6c3a
f47143a
2da6c3a
 
 
 
 
bebb126
f47143a
 
 
 
 
 
 
 
 
 
 
2da6c3a
eda7d17
 
 
2da6c3a
 
eda7d17
 
 
2da6c3a
eda7d17
 
2da6c3a
bebb126
f47143a
 
 
 
 
4d7c512
2da6c3a
 
 
 
f47143a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2da6c3a
 
 
 
 
 
 
 
 
 
bebb126
f47143a
7bd2e19
4d7c512
 
2f16e2f
2da6c3a
2f16e2f
 
2da6c3a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bebb126
f47143a
 
 
2da6c3a
f47143a
2da6c3a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
"""InstantID × Beautiful Realistic Asians v7 (ZeroGPU‑friendly, persistent cache)

ポイント
---------
* **import spaces を最初に**して ZeroGPU パッチを確実に適用。
* グローバル領域では CPU でモデルをロードし、CUDA への移動は
  `@spaces.GPU` 関数内で一度だけ実行。
* `.to("cuda")` や `torch.cuda.*` を関数外に置かないことで
  `RuntimeError: No CUDA GPUs are available` を回避。
"""

# ---------------------------------------------------------------------------
# 0. 依存ライブラリの読み込み (ZeroGPU パッチ → PyTorch の順)
# ---------------------------------------------------------------------------
import spaces  # ⭐ ZeroGPU は torch より前に必須

# --- ★ Monkey‑Patch: torchvision 0.17+ で消えた functional_tensor を補完 ---
import types, sys
from torchvision.transforms import functional as F

mod = types.ModuleType("torchvision.transforms.functional_tensor")
mod.rgb_to_grayscale = F.rgb_to_grayscale
sys.modules["torchvision.transforms.functional_tensor"] = mod
# ---------------------------------------------------------------------------

import os, subprocess, cv2, torch, gradio as gr, numpy as np
from pathlib import Path
from PIL import Image
from diffusers import (
    StableDiffusionPipeline,
    ControlNetModel,
    DPMSolverMultistepScheduler,
    AutoencoderKL,
)
from compel import Compel
from insightface.app import FaceAnalysis

# ---------------------------------------------------------------------------
# 1. キャッシュ用ディレクトリ
# ---------------------------------------------------------------------------
PERSIST_BASE = Path("/data")
CACHE_ROOT = (
    PERSIST_BASE / "instantid_cache"
    if PERSIST_BASE.exists() and os.access(PERSIST_BASE, os.W_OK)
    else Path.home() / ".cache" / "instantid_cache"
)
print("cache →", CACHE_ROOT)

MODELS_DIR  = CACHE_ROOT / "models"
LORA_DIR    = MODELS_DIR / "Lora"            # FaceID LoRA などを置く
EMB_DIR     = CACHE_ROOT / "embeddings"
UPSCALE_DIR = CACHE_ROOT / "realesrgan"
for p in (MODELS_DIR, LORA_DIR, EMB_DIR, UPSCALE_DIR):
    p.mkdir(parents=True, exist_ok=True)


def dl(url: str, dst: Path, attempts: int = 2):
    """wget + リトライの簡易ダウンローダ"""
    if dst.exists():
        print("✓", dst.relative_to(CACHE_ROOT)); return
    for i in range(1, attempts + 1):
        print(f"⬇ {dst.name} (try {i}/{attempts})")
        if subprocess.call(["wget", "-q", "-O", str(dst), url]) == 0:
            return
    raise RuntimeError(f"download failed → {url}")

# ---------------------------------------------------------------------------
# 2. 必要アセットのダウンロード
# ---------------------------------------------------------------------------
print("— asset check —")

# 2‑A. ベース checkpoint
BASE_CKPT = MODELS_DIR / "beautiful_realistic_asians_v7_fp16.safetensors"
dl(
    "https://civitai.com/api/download/models/177164?type=Model&format=SafeTensor&size=pruned&fp=fp16",
    BASE_CKPT,
)

# 2‑B. FaceID LoRA(Δのみ)
LORA_FILE = LORA_DIR / "ip-adapter-faceid-plusv2_sd15_lora.safetensors"
dl(
    "https://huggingface.co/h94/IP-Adapter-FaceID/resolve/main/ip-adapter-faceid-plusv2_sd15_lora.safetensors",
    LORA_FILE,
)

# 2‑C. textual inversion Embeddings
EMB_URLS = {
    "ng_deepnegative_v1_75t.pt": [
        "https://huggingface.co/datasets/gsdf/EasyNegative/resolve/main/ng_deepnegative_v1_75t.pt",
        "https://huggingface.co/mrpxl2/animetarotV51.safetensors/raw/cc3008c0148061896549a995cc297aef0af4ef1b/ng_deepnegative_v1_75t.pt",
    ],
    "badhandv4.pt": [
        "https://huggingface.co/datasets/gsdf/ConceptLab/resolve/main/badhandv4.pt",
        "https://huggingface.co/nolanaatama/embeddings/raw/main/badhandv4.pt",
    ],
    "CyberRealistic_Negative-neg.pt": [
        "https://huggingface.co/datasets/gsdf/ConceptLab/resolve/main/CyberRealistic_Negative-neg.pt",
        "https://huggingface.co/wsj1995/embeddings/raw/main/CyberRealistic_Negative-neg.civitai.info",
    ],
    "UnrealisticDream.pt": [
        "https://huggingface.co/datasets/gsdf/ConceptLab/resolve/main/UnrealisticDream.pt",
        "https://huggingface.co/imagepipeline/UnrealisticDream/raw/main/f84133b4-aad8-44be-b9ce-7e7e3a8c111f.pt",
    ],
}
for fname, urls in EMB_URLS.items():
    dst = EMB_DIR / fname
    for idx, u in enumerate(urls, 1):
        try:
            dl(u, dst); break
        except RuntimeError:
            if idx == len(urls): raise
            print("    ↳ fallback URL …")

# 2‑D. Real‑ESRGAN weights (×8)
RRG_WEIGHTS = UPSCALE_DIR / "RealESRGAN_x8plus.pth"
RRG_URLS = [
    "https://huggingface.co/NoCrypt/Superscale_RealESRGAN/resolve/main/RealESRGAN_x8plus.pth",
    "https://huggingface.co/ai-forever/Real-ESRGAN/raw/main/RealESRGAN_x8.pth",
    "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/8x_NMKD-Superscale_100k.pth",
]
for idx, link in enumerate(RRG_URLS, 1):
    try:
        dl(link, RRG_WEIGHTS); break
    except RuntimeError:
        if idx == len(RRG_URLS): raise
        print("    ↳ fallback URL …")

# ---------------------------------------------------------------------------
# 3. モデル読み込み (すべて CPU)
# ---------------------------------------------------------------------------

device: str = "cpu"      # グローバルは CPU 固定
dtype  = torch.float32    # 後で GPU 化する際に float16 に

# FaceAnalysis (insightface)
providers = ["CPUExecutionProvider"]
face_app = FaceAnalysis(name="buffalo_l", root=str(CACHE_ROOT), providers=providers)
face_app.prepare(ctx_id=-1, det_size=(640, 640))

# Stable Diffusion Pipeline (CPU)
pipe = StableDiffusionPipeline.from_single_file(
    BASE_CKPT, torch_dtype=dtype, safety_checker=None, use_safetensors=True, clip_skip=2
)
pipe.vae = AutoencoderKL.from_pretrained(
    "stabilityai/sd-vae-ft-mse", torch_dtype=dtype
)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(
    pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++"
)
pipe.load_ip_adapter(
    "h94/IP-Adapter",
    subfolder="models",
    weight_name="ip-adapter-plus-face_sd15.bin",
)
pipe.load_lora_weights(str(LORA_DIR), weight_name=LORA_FILE.name)
pipe.set_ip_adapter_scale(0.65)

# textual inversion
for emb in EMB_DIR.glob("*.*"):
    try:
        pipe.load_textual_inversion(emb, token=emb.stem)
        print("emb loaded →", emb.stem)
    except Exception:
        print("emb skip →", emb.name)

# Real‑ESRGAN (CPU)
try:
    from basicsr.archs.rrdb_arch import RRDBNet
    try:
        from realesrgan import RealESRGAN
    except ImportError:
        from realesrgan import RealESRGANer as RealESRGAN

    rrdb = RRDBNet(3, 3, 64, 23, 32, scale=8)
    upsampler = RealESRGAN("cpu", rrdb, scale=8)
    upsampler.load_weights(str(RRG_WEIGHTS))
    UPSCALE_OK = True
except Exception as e:
    print("Real-ESRGAN disabled →", e)
    UPSCALE_OK = False

# compel
compel_proc = Compel(
    tokenizer=pipe.tokenizer,
    text_encoder=pipe.text_encoder,
    truncate_long_prompts=False,
)
print("pipeline ready (CPU) ✔")

# ---------------------------------------------------------------------------
# 4. プロンプト定義
# ---------------------------------------------------------------------------
BASE_PROMPT = (
    "Cinematic photo, (best quality:1.1), ultra-realistic, photorealistic of {subject}, "
    "natural skin texture, bokeh, standing, front view, full body shot, thighs, "
    "Canon EOS R5, 85 mm, f/1.4, ISO 200, 1/160 s, RAW"
)
NEG_PROMPT = (
    "ng_deepnegative_v1_75t, BadDream:0.6, UnrealisticDream:0.8, badhandv4:0.9, "
    "(worst quality:2), (low quality:1.8), lowres, blurry, jpeg artifacts, "
    "painting, sketch, illustration, cartoon, anime, cgi, render, 3d, "
    "monochrome, grayscale, text, logo, watermark, signature, username, "
    "bad anatomy, malformed, deformed, extra limbs, fused fingers, missing fingers, "
    "missing arms, missing legs, skin blemishes, acne, age spot"
)

# ---------------------------------------------------------------------------
# 5. 生成関数 (GPU 処理部)
# ---------------------------------------------------------------------------
GPU_INITIALISED = False  # 一度だけ GPU へ移動するためのフラグ

@spaces.GPU(duration=60)
def generate(
    face_np, subject, add_prompt, add_neg, cfg, ip_scale, steps, w, h, upscale, up_factor,
    progress=gr.Progress(track_tqdm=True),
):
    global GPU_INITIALISED, device, dtype, pipe, face_app, upsampler

    if not GPU_INITIALISED:
        print("\n--- first GPU initialisation ---")
        device = "cuda"
        dtype  = torch.float16

        pipe.to(device)
        pipe.vae.to(device)
        face_app.prepare(ctx_id=0, det_size=(640, 640))
        if UPSCALE_OK:
            try:
                upsampler.model = upsampler.model.to(device)  # RealESRGANer
                upsampler.device = device                    # for newer API
            except Exception:
                pass
        GPU_INITIALISED = True
        print("GPU ready ✔")

    if face_np is None or face_np.size == 0:
        raise gr.Error("顔画像をアップロードしてください。")

    prompt = BASE_PROMPT.format(subject=(subject.strip() or "a beautiful 20yo woman"))
    if add_prompt:
        prompt += ", " + add_prompt
    neg = NEG_PROMPT + (", " + add_neg if add_neg else "")

    pipe.set_ip_adapter_scale(ip_scale)
    img_in = Image.fromarray(face_np)

    # compel で長さを揃えバッチ化
    prompt_embeds, negative_prompt_embeds = compel_proc([prompt, neg])
    prompt_embeds = prompt_embeds.unsqueeze(0)
    negative_prompt_embeds = negative_prompt_embeds.unsqueeze(0)

    result = pipe(
        prompt_embeds=prompt_embeds,
        negative_prompt_embeds=negative_prompt_embeds,
        ip_adapter_image=img_in,
        num_inference_steps=int(steps) + 5,
        guidance_scale=cfg,
        width=int(w),
        height=int(h),
    ).images[0]

    if upscale:
        if UPSCALE_OK:
            up, _ = upsampler.enhance(
                cv2.cvtColor(np.array(result), cv2.COLOR_RGB2BGR), outscale=up_factor
            )
            result = Image.fromarray(cv2.cvtColor(up, cv2.COLOR_BGR2RGB))
        else:
            result = result.resize(
                (int(result.width * up_factor), int(result.height * up_factor)),
                Image.LANCZOS,
            )
    return result

# ---------------------------------------------------------------------------
# 6. Gradio UI
# ---------------------------------------------------------------------------
with gr.Blocks() as demo:
    gr.Markdown("# InstantID – Beautiful Realistic Asians v7 (ZeroGPU edition)")
    with gr.Row():
        with gr.Column():
            face_in   = gr.Image(label="顔写真", type="numpy")
            subj_in   = gr.Textbox(label="被写体説明", placeholder="e.g. woman in black suit, smiling")
            add_in    = gr.Textbox(label="追加プロンプト")
            addneg_in = gr.Textbox(label="追加ネガティブ")
            ip_sld    = gr.Slider(0, 1.5, 0.65, step=0.05, label="IP-Adapter scale")
            cfg_sld   = gr.Slider(1, 15, 6, step=0.5, label="CFG")
            step_sld  = gr.Slider(10, 50, 20, step=1, label="Steps")
            w_sld     = gr.Slider(512, 1024, 512, step=64, label="幅")
            h_sld     = gr.Slider(512, 1024, 768, step=64, label="高さ")
            up_ck     = gr.Checkbox(label="アップスケール", value=True)
            up_fac    = gr.Slider(1, 8, 2, step=1, label="倍率")
            btn       = gr.Button("生成", variant="primary")
        with gr.Column():
            out_img = gr.Image(label="結果")

    btn.click(
        generate,
        [face_in, subj_in, add_in, addneg_in, cfg_sld, ip_sld, step_sld, w_sld, h_sld, up_ck, up_fac],
        out_img,
        api_name="predict",
    )

print("launching …")