i0switch commited on
Commit
4330019
·
verified ·
1 Parent(s): eda7d17

Create oldapp.py

Browse files
Files changed (1) hide show
  1. oldapp.py +284 -0
oldapp.py ADDED
@@ -0,0 +1,284 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py — InstantID × Beautiful Realistic Asians v7 (ZeroGPU-friendly, persistent cache)
2
+ """Persistent-cache backend for InstantID portrait generation.
3
+ * 依存モデルは /data が書込可ならそこへ、それ以外は ~/.cache に保存
4
+ * wget を使った簡易リトライ DL
5
+ """
6
+ # --- ★ Monkey-Patch: torchvision 0.17+ で消えた functional_tensor を補完 ---
7
+ import types, sys
8
+ from torchvision.transforms import functional as F
9
+
10
+ mod = types.ModuleType("torchvision.transforms.functional_tensor")
11
+ # 必要なのは rgb_to_grayscale だけなのでこれだけエイリアス
12
+ mod.rgb_to_grayscale = F.rgb_to_grayscale
13
+ sys.modules["torchvision.transforms.functional_tensor"] = mod
14
+ # ---------------------------------------------------------------------------
15
+
16
+ import os, subprocess, cv2, torch, spaces, gradio as gr, numpy as np
17
+ from pathlib import Path
18
+ from PIL import Image
19
+ from diffusers import (
20
+ StableDiffusionPipeline, ControlNetModel,
21
+ DPMSolverMultistepScheduler, AutoencoderKL,
22
+ )
23
+ from compel import Compel
24
+ from insightface.app import FaceAnalysis
25
+
26
+ ##############################################################################
27
+ # 0. キャッシュ用ディレクトリ
28
+ ##############################################################################
29
+ PERSIST_BASE = Path("/data")
30
+ CACHE_ROOT = (
31
+ PERSIST_BASE / "instantid_cache"
32
+ if PERSIST_BASE.exists() and os.access(PERSIST_BASE, os.W_OK)
33
+ else Path.home() / ".cache" / "instantid_cache"
34
+ )
35
+ print("cache →", CACHE_ROOT)
36
+
37
+ MODELS_DIR = CACHE_ROOT / "models"
38
+ LORA_DIR = MODELS_DIR / "Lora" # FaceID LoRA などを置く
39
+ EMB_DIR = CACHE_ROOT / "embeddings"
40
+ UPSCALE_DIR = CACHE_ROOT / "realesrgan"
41
+ for p in (MODELS_DIR, LORA_DIR, EMB_DIR, UPSCALE_DIR):
42
+ p.mkdir(parents=True, exist_ok=True)
43
+
44
+ def dl(url: str, dst: Path, attempts: int = 2):
45
+ """wget + リトライの簡易ダウンローダ"""
46
+ if dst.exists():
47
+ print("✓", dst.relative_to(CACHE_ROOT)); return
48
+ for i in range(1, attempts + 1):
49
+ print(f"⬇ {dst.name} (try {i}/{attempts})")
50
+ if subprocess.call(["wget", "-q", "-O", str(dst), url]) == 0:
51
+ return
52
+ raise RuntimeError(f"download failed → {url}")
53
+
54
+ ##############################################################################
55
+ # 1. 必要アセットのダウンロード
56
+ ##############################################################################
57
+ print("— asset check —")
58
+
59
+ # 1-A. ベース checkpoint
60
+ BASE_CKPT = MODELS_DIR / "beautiful_realistic_asians_v7_fp16.safetensors"
61
+ dl(
62
+ "https://civitai.com/api/download/models/177164?type=Model&format=SafeTensor&size=pruned&fp=fp16",
63
+ BASE_CKPT,
64
+ )
65
+
66
+ # 1-B. FaceID LoRA(Δのみ)
67
+ LORA_FILE = LORA_DIR / "ip-adapter-faceid-plusv2_sd15_lora.safetensors"
68
+ dl(
69
+ "https://huggingface.co/h94/IP-Adapter-FaceID/resolve/main/ip-adapter-faceid-plusv2_sd15_lora.safetensors",
70
+ LORA_FILE,
71
+ )
72
+
73
+ # 1-C. textual inversion Embeddings
74
+ EMB_URLS = {
75
+ "ng_deepnegative_v1_75t.pt": [
76
+ "https://huggingface.co/datasets/gsdf/EasyNegative/resolve/main/ng_deepnegative_v1_75t.pt",
77
+ "https://huggingface.co/mrpxl2/animetarotV51.safetensors/raw/cc3008c0148061896549a995cc297aef0af4ef1b/ng_deepnegative_v1_75t.pt",
78
+ ],
79
+ "badhandv4.pt": [
80
+ "https://huggingface.co/datasets/gsdf/ConceptLab/resolve/main/badhandv4.pt",
81
+ "https://huggingface.co/nolanaatama/embeddings/raw/main/badhandv4.pt",
82
+ ],
83
+ "CyberRealistic_Negative-neg.pt": [
84
+ "https://huggingface.co/datasets/gsdf/ConceptLab/resolve/main/CyberRealistic_Negative-neg.pt",
85
+ "https://huggingface.co/wsj1995/embeddings/raw/main/CyberRealistic_Negative-neg.civitai.info",
86
+ ],
87
+ "UnrealisticDream.pt": [
88
+ "https://huggingface.co/datasets/gsdf/ConceptLab/resolve/main/UnrealisticDream.pt",
89
+ "https://huggingface.co/imagepipeline/UnrealisticDream/raw/main/f84133b4-aad8-44be-b9ce-7e7e3a8c111f.pt",
90
+ ],
91
+ }
92
+ for fname, urls in EMB_URLS.items():
93
+ dst = EMB_DIR / fname
94
+ for idx, u in enumerate(urls, 1):
95
+ try:
96
+ dl(u, dst); break
97
+ except RuntimeError:
98
+ if idx == len(urls): raise
99
+ print(" ↳ fallback URL …")
100
+
101
+ # 1-D. Real-ESRGAN weights (×8)
102
+ RRG_WEIGHTS = UPSCALE_DIR / "RealESRGAN_x8plus.pth"
103
+ RRG_URLS = [
104
+ "https://huggingface.co/NoCrypt/Superscale_RealESRGAN/resolve/main/RealESRGAN_x8plus.pth",
105
+ "https://huggingface.co/ai-forever/Real-ESRGAN/raw/main/RealESRGAN_x8.pth",
106
+ "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/8x_NMKD-Superscale_100k.pth",
107
+ ]
108
+ for idx, link in enumerate(RRG_URLS, 1):
109
+ try:
110
+ dl(link, RRG_WEIGHTS); break
111
+ except RuntimeError:
112
+ if idx == len(RRG_URLS): raise
113
+ print(" ↳ fallback URL …")
114
+
115
+ ##############################################################################
116
+ # 2. ランタイム初期化
117
+ ##############################################################################
118
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
119
+ dtype = torch.float16 if torch.cuda.is_available() else torch.float32
120
+ print("device:", device, "| dtype:", dtype)
121
+
122
+ providers = (
123
+ ["CUDAExecutionProvider", "CPUExecutionProvider"]
124
+ if torch.cuda.is_available()
125
+ else ["CPUExecutionProvider"]
126
+ )
127
+ face_app = FaceAnalysis(name="buffalo_l", root=str(CACHE_ROOT), providers=providers)
128
+ face_app.prepare(ctx_id=(0 if torch.cuda.is_available() else -1), det_size=(640, 640))
129
+
130
+ # ControlNet + SD パイプライン
131
+ #controlnet = ControlNetModel.from_pretrained(
132
+ # "InstantX/InstantID", subfolder="ControlNetModel", torch_dtype=dtype
133
+ #)
134
+ pipe = StableDiffusionPipeline.from_single_file(
135
+ BASE_CKPT, torch_dtype=dtype, safety_checker=None, use_safetensors=True, clip_skip=2
136
+ )
137
+ pipe.vae = AutoencoderKL.from_pretrained(
138
+ "stabilityai/sd-vae-ft-mse", torch_dtype=dtype
139
+ ).to(device)
140
+ #pipe.controlnet = controlnet
141
+ pipe.scheduler = DPMSolverMultistepScheduler.from_config(
142
+ pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++"
143
+ )
144
+
145
+ # --- ここが核心:画像エンコーダ込みで公式レポから直接ロード ------------------
146
+ pipe.load_ip_adapter(
147
+ "h94/IP-Adapter", # Hugging Face Hub ID
148
+ subfolder="models", # ip-adapter-plus-face_sd15.bin が入っているフォルダ
149
+ weight_name="ip-adapter-plus-face_sd15.bin",
150
+ )
151
+ # ---------------------------------------------------------------------------
152
+
153
+ # FaceID LoRA(差分 LoRA のみ)
154
+ pipe.load_lora_weights(str(LORA_DIR), weight_name=LORA_FILE.name)
155
+ pipe.set_ip_adapter_scale(0.65)
156
+
157
+ # textual inversion 読み込み
158
+ for emb in EMB_DIR.glob("*.*"):
159
+ try:
160
+ pipe.load_textual_inversion(emb, token=emb.stem)
161
+ print("emb loaded →", emb.stem)
162
+ except Exception:
163
+ print("emb skip →", emb.name)
164
+ pipe.to(device)
165
+
166
+ # compel プロセッサを初期化
167
+ compel_proc = Compel(
168
+ tokenizer=pipe.tokenizer,
169
+ text_encoder=pipe.text_encoder,
170
+ truncate_long_prompts=False # 長いプロンプトを切り捨てない
171
+ )
172
+ print("pipeline ready ✔")
173
+
174
+ ##############################################################################
175
+ # 3. アップスケーラ
176
+ ##############################################################################
177
+ try:
178
+ from basicsr.archs.rrdb_arch import RRDBNet
179
+ try:
180
+ from realesrgan import RealESRGAN
181
+ except ImportError:
182
+ from realesrgan import RealESRGANer as RealESRGAN
183
+ rrdb = RRDBNet(3, 3, 64, 23, 32, scale=8)
184
+ upsampler = RealESRGAN(device, rrdb, scale=8)
185
+ upsampler.load_weights(str(RRG_WEIGHTS))
186
+ UPSCALE_OK = True
187
+ except Exception as e:
188
+ print("Real-ESRGAN disabled →", e)
189
+ UPSCALE_OK = False
190
+
191
+ ##############################################################################
192
+ # 4. プロンプト & 生成関数
193
+ ##############################################################################
194
+ BASE_PROMPT = (
195
+ "Cinematic photo, (best quality:1.1), ultra-realistic, photorealistic of {subject}, "
196
+ "natural skin texture, bokeh, standing, front view, full body shot, thighs, "
197
+ "Canon EOS R5, 85 mm, f/1.4, ISO 200, 1/160 s, RAW"
198
+ )
199
+ NEG_PROMPT = (
200
+ "ng_deepnegative_v1_75t, BadDream:0.6, UnrealisticDream:0.8, badhandv4:0.9, "
201
+ "(worst quality:2), (low quality:1.8), lowres, blurry, jpeg artifacts, "
202
+ "painting, sketch, illustration, cartoon, anime, cgi, render, 3d, "
203
+ "monochrome, grayscale, text, logo, watermark, signature, username, "
204
+ "bad anatomy, malformed, deformed, extra limbs, fused fingers, missing fingers, "
205
+ "missing arms, missing legs, skin blemishes, acne, age spot"
206
+ )
207
+
208
+ @spaces.GPU(duration=60)
209
+ def generate(
210
+ face_np, subject, add_prompt, add_neg, cfg, ip_scale, steps, w, h, upscale, up_factor,
211
+ progress=gr.Progress(track_tqdm=True),
212
+ ):
213
+ if face_np is None or face_np.size == 0:
214
+ raise gr.Error("顔画像をアップロードしてください。")
215
+
216
+ prompt = BASE_PROMPT.format(subject=(subject.strip() or "a beautiful 20yo woman"))
217
+ if add_prompt:
218
+ prompt += ", " + add_prompt
219
+ neg = NEG_PROMPT + (", " + add_neg if add_neg else "")
220
+
221
+ pipe.set_ip_adapter_scale(ip_scale)
222
+ img_in = Image.fromarray(face_np)
223
+
224
+ # compelで長さを揃え、.unsqueeze(0)でバッチ次元を追加する
225
+ prompt_embeds, negative_prompt_embeds = compel_proc([prompt, neg])
226
+ prompt_embeds = prompt_embeds.unsqueeze(0)
227
+ negative_prompt_embeds = negative_prompt_embeds.unsqueeze(0)
228
+
229
+ result = pipe(
230
+ prompt_embeds=prompt_embeds,
231
+ negative_prompt_embeds=negative_prompt_embeds,
232
+ ip_adapter_image=img_in,
233
+ #image=img_in,
234
+ #controlnet_conditioning_scale=0.9,
235
+ num_inference_steps=int(steps) + 5,
236
+ guidance_scale=cfg,
237
+ width=int(w),
238
+ height=int(h),
239
+ ).images[0]
240
+
241
+ if upscale:
242
+ if UPSCALE_OK:
243
+ up, _ = upsampler.enhance(
244
+ cv2.cvtColor(np.array(result), cv2.COLOR_RGB2BGR), outscale=up_factor
245
+ )
246
+ result = Image.fromarray(cv2.cvtColor(up, cv2.COLOR_BGR2RGB))
247
+ else:
248
+ result = result.resize(
249
+ (int(result.width * up_factor), int(result.height * up_factor)),
250
+ Image.LANCZOS,
251
+ )
252
+ return result
253
+
254
+ ##############################################################################
255
+ # 5. Gradio UI
256
+ ##############################################################################
257
+ with gr.Blocks() as demo:
258
+ gr.Markdown("# InstantID – Beautiful Realistic Asians v7")
259
+ with gr.Row():
260
+ with gr.Column():
261
+ face_in = gr.Image(label="顔写真", type="numpy")
262
+ subj_in = gr.Textbox(label="被写体説明", placeholder="e.g. woman in black suit, smiling")
263
+ add_in = gr.Textbox(label="追加プロンプト")
264
+ addneg_in = gr.Textbox(label="追加ネガティブ")
265
+ ip_sld = gr.Slider(0, 1.5, 0.65, step=0.05, label="IP-Adapter scale")
266
+ cfg_sld = gr.Slider(1, 15, 6, step=0.5, label="CFG")
267
+ step_sld = gr.Slider(10, 50, 20, step=1, label="Steps")
268
+ w_sld = gr.Slider(512, 1024, 512, step=64, label="幅")
269
+ h_sld = gr.Slider(512, 1024, 768, step=64, label="高さ")
270
+ up_ck = gr.Checkbox(label="アップスケール", value=True)
271
+ up_fac = gr.Slider(1, 8, 2, step=1, label="倍率")
272
+ btn = gr.Button("生成", variant="primary")
273
+ with gr.Column():
274
+ out_img = gr.Image(label="結果")
275
+
276
+ btn.click(
277
+ generate,
278
+ [face_in, subj_in, add_in, addneg_in, cfg_sld, ip_sld, step_sld, w_sld, h_sld, up_ck, up_fac],
279
+ out_img,
280
+ api_name="predict",
281
+ )
282
+
283
+ print("launching …")
284
+ demo.queue().launch(show_error=True)