Spaces:

yongyeol
/

mk3d

Runtime error

App Files Files Community

yongyeol commited on Jul 7

Commit

7591227

verified ·

1 Parent(s): 42194c1

Update app.py

Browse files

Files changed (1) hide show

app.py +100 -98

app.py CHANGED Viewed

@@ -1,22 +1,8 @@
-# ───────────────────────────────────────────────────────────
-# app.py – Gradio Space: Text ➜ 2D (Kontext) ➜ 3D (Hunyuan3D)
-# -----------------------------------------------------------
-# Requirements (add to requirements.txt):
-#   torch>=2.2.0
-#   diffusers>=0.27.0
-#   hy3dgen   # Hunyuan3D official PyPI after Jan‑2025
-#   trimesh
-#   gradio==4.26.0
-#   pillow
-# -----------------------------------------------------------
-# NOTE: • Set the following secrets in the Space **Settings → Secrets**
-#         HF_TOKEN            – your Hugging Face access token (for gated models)
-#         BFL_API_KEY         – optional, required if using Black‑Forest Labs usage tracking
-#       • GPU (A10G/16 GB↑) is strongly recommended.
-#       • Hunyuan3D installs a CUDA‑based custom rasteriser at runtime; build
-#         wheels are provided on Linux/Windows. See model card instructions.
-# ---------------------------------------------------------------------------
 import os
 import tempfile
 from typing import List, Tuple
@@ -24,138 +10,155 @@ from typing import List, Tuple
 import gradio as gr
 import torch
 from PIL import Image
-from huggingface_hub import login as hf_login
 HF_TOKEN = os.getenv("HF_TOKEN")
 if not HF_TOKEN:
     raise RuntimeError(
         "HF_TOKEN이 설정되지 않았습니다. Space Settings → Secrets에서 "
         "HF_TOKEN=your_read_token 을 등록한 뒤 재시작하세요."
     )
-from huggingface_hub import login
 login(token=HF_TOKEN, add_to_git_credential=False)
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
-# ─────────── Load FLUX .1 Kontext (2D) ───────────
-from diffusers import FluxKontextPipeline, FluxPipeline  # FluxPipeline = text‑to‑image variant
-print("[+] Loading FLUX.1 Kontext [dev] …")
-kontext_pipe = FluxKontextPipeline.from_pretrained(
-    "black-forest-labs/FLUX.1-Kontext-dev", torch_dtype=DTYPE
-).to(DEVICE)
-kontext_pipe.set_progress_bar_config(disable=True)
-print("[+] Loading FLUX.1 [dev] (text‑to‑image) …")
-text2img_pipe = FluxPipeline.from_pretrained(
-    "black-forest-labs/FLUX.1-dev", torch_dtype=DTYPE
-).to(DEVICE)
-text2img_pipe.set_progress_bar_config(disable=True)
-# ─────────── Load Hunyuan3D‑2 (3D) ───────────
-print("[+] Loading Hunyuan3D‑2 shape+texture … (this may take a while)")
-from hy3dgen.shapegen import Hunyuan3DDiTFlowMatchingPipeline
-from hy3dgen.texgen import Hunyuan3DPaintPipeline
-shape_pipe = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained(
-    "tencent/Hunyuan3D-2", torch_dtype=DTYPE
-).to(DEVICE)
-shape_pipe.set_progress_bar_config(disable=True)
-paint_pipe = Hunyuan3DPaintPipeline.from_pretrained(
-    "tencent/Hunyuan3D-2", torch_dtype=DTYPE
-).to(DEVICE)
-paint_pipe.set_progress_bar_config(disable=True)
 # ───────────────────────────────────────────────
 # Helper functions
 # ───────────────────────────────────────────────
 def generate_single_2d(prompt: str, image: Image.Image | None, guidance_scale: float) -> Image.Image:
-    """Either edit an existing image via Kontext or generate a fresh one via Flux text2img."""
     if image is None:
-        result = text2img_pipe(prompt=prompt, guidance_scale=guidance_scale).images[0]
     else:
-        result = kontext_pipe(image=image, prompt=prompt, guidance_scale=guidance_scale).images[0]
     return result
 def generate_multiview(prompt: str, base_image: Image.Image, guidance_scale: float) -> List[Image.Image]:
-    """Generate four canonical views (front / back / left / right) by re‑prompting Kontext."""
     views = [
-        ("front view", base_image),
-        (
-            "left side view",
-            kontext_pipe(image=base_image, prompt=f"{prompt}, left side view", guidance_scale=guidance_scale).images[0],
-        ),
-        (
-            "right side view",
-            kontext_pipe(image=base_image, prompt=f"{prompt}, right side view", guidance_scale=guidance_scale).images[0],
-        ),
-        (
-            "back view",
-            kontext_pipe(image=base_image, prompt=f"{prompt}, back view", guidance_scale=guidance_scale).images[0],
-        ),
     ]
-    # Return only images, keep order [front, left, right, back]
-    return [v[1] for v in views]
 def build_3d_mesh(prompt: str, images: List[Image.Image]) -> str:
-    """Call Hunyuan3D pipelines to build geometry then paint texture. Returns path to GLB."""
-    # For single‑view use first image; multi‑view (≤6) accepted by Hunyuan3D
     single_or_multi = images if len(images) > 1 else images[0]
-    mesh = shape_pipe(image=single_or_multi, prompt=prompt)[0]
-    mesh = paint_pipe(mesh, image=single_or_multi)
     tmpdir = tempfile.mkdtemp()
     out_path = os.path.join(tmpdir, "mesh.glb")
-    mesh.export(out_path)  # trimesh export
     return out_path
-# ─────────── Gradio interface ───────────
-CSS = """
-footer {visibility: hidden;}
-"""
-def workflow(prompt: str, input_image: Image.Image | None, multiview: bool, guidance_scale: float) -> Tuple[List[Image.Image], str, str]:
-    """Main inference wrapper."""
     if not prompt:
         raise gr.Error("프롬프트(설명)를 입력하세요 📌")
-    # 1️⃣ 2D Generation / Editing
     base_img = generate_single_2d(prompt, input_image, guidance_scale)
-    images = [base_img]
-    if multiview:
-        images = generate_multiview(prompt, base_img, guidance_scale)
-    # 2️⃣ 3D Generation
     model_path = build_3d_mesh(prompt, images)
-    return images, model_path, model_path  # gallery, viewer, file download
 def build_ui():
-    with gr.Blocks(css=CSS, title="Text ➜ 2D ➜ 3D (Kontext × Hunyuan3D)") as demo:
-        gr.Markdown("# 🌀 텍스트 → 2D → 3D 생성기")
-        gr.Markdown(
-            "Kontext로 일관된 2D 이미지를 만든 뒤, Hunyuan3D‑2로 텍스처 3D 메시에스를 생성합니다.\n"
-            "⏱️ 첫 실행은 모델 로딩으로 시간이 걸립니다."
-        )
         with gr.Row():
             with gr.Column():
                 prompt = gr.Textbox(label="프롬프트 / 설명", placeholder="예: 파란 모자를 쓴 귀여운 로봇")
                 input_image = gr.Image(label="(선택) 편집할 참조 이미지", type="pil")
-                multiview = gr.Checkbox(label="멀티뷰(좌/우/후면 포함) 3D 품질 향상", value=True)
-                guidance = gr.Slider(0.5, 7.5, 2.5, step=0.1, label="Guidance Scale (Kontext)")
                 run_btn = gr.Button("🚀 생성하기", variant="primary")
             with gr.Column():
-                gallery = gr.Gallery(label="🎨 2D 결과", show_label=True, columns=2, height="auto")
                 model3d = gr.Model3D(label="🧱 3D 미리보기", clear_color=[1, 1, 1, 0])
                 download = gr.File(label="⬇️ GLB 다운로드")
@@ -167,7 +170,6 @@ def build_ui():
             scroll_to_output=True,
             show_progress="full",
         )
     return demo

+# ────────────────────────────────────────────────────────────────────────────
+# app.py – Text ➜ 2D (FLUX-mini Kontext) ➜ 3D (Hunyuan3D-2)
+#   • Fits into 16 GB system RAM: 경량 모델 + lazy loading + offload
+#   • Requires: GPU (A10G 24 GB ideal, T4 16 GB OK with fp16)
+# ────────────────────────────────────────────────────────────────────────────
 import os
 import tempfile
 from typing import List, Tuple
 import gradio as gr
 import torch
 from PIL import Image
+from huggingface_hub import login
+# ─────────────────────── Auth ───────────────────────
 HF_TOKEN = os.getenv("HF_TOKEN")
 if not HF_TOKEN:
     raise RuntimeError(
         "HF_TOKEN이 설정되지 않았습니다. Space Settings → Secrets에서 "
         "HF_TOKEN=your_read_token 을 등록한 뒤 재시작하세요."
     )
 login(token=HF_TOKEN, add_to_git_credential=False)
+# ─────────────────────── Device & dtype ───────────────────────
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
+# ─────────────────────── Lazy loaders ───────────────────────
+from diffusers import FluxKontextPipeline, FluxPipeline
+from accelerate import init_empty_weights, load_checkpoint_and_dispatch
+# Global caches
+kontext_pipe = None  # type: FluxKontextPipeline | None
+_text2img_pipe = None  # type: FluxPipeline | None
+shape_pipe = None
+paint_pipe = None
+MINI_KONTEXT_REPO = "black-forest-labs/FLUX.1-Kontext-mini"
+MINI_T2I_REPO = "black-forest-labs/FLUX.1-mini"
+HUNYUAN_REPO = "tencent/Hunyuan3D-2"
+def load_kontext() -> FluxKontextPipeline:
+    global kontext_pipe
+    if kontext_pipe is None:
+        print("[+] Loading FLUX.1-Kontext-mini … (low_cpu_mem_usage)")
+        kontext_pipe = FluxKontextPipeline.from_pretrained(
+            MINI_KONTEXT_REPO,
+            torch_dtype=DTYPE,
+            device_map="auto",
+            low_cpu_mem_usage=True,
+        )
+        kontext_pipe.set_progress_bar_config(disable=True)
+    return kontext_pipe
+def load_text2img() -> FluxPipeline:
+    """Lazy-load light text→image model only when 필요."""
+    global _text2img_pipe
+    if _text2img_pipe is None:
+        print("[+] Loading FLUX.1-mini (text → image)…")
+        _text2img_pipe = FluxPipeline.from_pretrained(
+            MINI_T2I_REPO,
+            torch_dtype=DTYPE,
+            device_map="auto",
+            low_cpu_mem_usage=True,
+        )
+        _text2img_pipe.set_progress_bar_config(disable=True)
+    return _text2img_pipe
+def load_hunyuan() -> tuple:
+    global shape_pipe, paint_pipe
+    if shape_pipe is None or paint_pipe is None:
+        print("[+] Loading Hunyuan3D-2 (shape & texture)…")
+        from hy3dgen.shapegen import Hunyuan3DDiTFlowMatchingPipeline
+        from hy3dgen.texgen import Hunyuan3DPaintPipeline
+        shape_pipe = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained(
+            HUNYUAN_REPO,
+            torch_dtype=DTYPE,
+            device_map="auto",
+            low_cpu_mem_usage=True,
+        )
+        shape_pipe.set_progress_bar_config(disable=True)
+        paint_pipe = Hunyuan3DPaintPipeline.from_pretrained(
+            HUNYUAN_REPO,
+            torch_dtype=DTYPE,
+            device_map="auto",
+            low_cpu_mem_usage=True,
+        )
+        paint_pipe.set_progress_bar_config(disable=True)
+    return shape_pipe, paint_pipe
 # ───────────────────────────────────────────────
 # Helper functions
 # ───────────────────────────────────────────────
 def generate_single_2d(prompt: str, image: Image.Image | None, guidance_scale: float) -> Image.Image:
+    kontext = load_kontext()
     if image is None:
+        # 텍스트→이미지 : 경량 text2img 파이프라인 사용
+        t2i = load_text2img()
+        result = t2i(prompt=prompt, guidance_scale=guidance_scale).images[0]
     else:
+        result = kontext(image=image, prompt=prompt, guidance_scale=guidance_scale).images[0]
     return result
 def generate_multiview(prompt: str, base_image: Image.Image, guidance_scale: float) -> List[Image.Image]:
+    kontext = load_kontext()
     views = [
+        base_image,
+        kontext(image=base_image, prompt=f"{prompt}, left side view", guidance_scale=guidance_scale).images[0],
+        kontext(image=base_image, prompt=f"{prompt}, right side view", guidance_scale=guidance_scale).images[0],
+        kontext(image=base_image, prompt=f"{prompt}, back view", guidance_scale=guidance_scale).images[0],
     ]
+    return views  # [front, left, right, back]
 def build_3d_mesh(prompt: str, images: List[Image.Image]) -> str:
+    shape, paint = load_hunyuan()
     single_or_multi = images if len(images) > 1 else images[0]
+    mesh = shape(image=single_or_multi, prompt=prompt)[0]
+    mesh = paint(mesh, image=single_or_multi)
     tmpdir = tempfile.mkdtemp()
     out_path = os.path.join(tmpdir, "mesh.glb")
+    mesh.export(out_path)
     return out_path
+# ──────────────────────────────── UI ────────────────────────────────
+CSS = """footer {visibility:hidden;}"""
+def workflow(prompt: str, input_image: Image.Image | None, multiview: bool, guidance_scale: float):
     if not prompt:
         raise gr.Error("프롬프트(설명)를 입력하세요 📌")
     base_img = generate_single_2d(prompt, input_image, guidance_scale)
+    images = generate_multiview(prompt, base_img, guidance_scale) if multiview else [base_img]
     model_path = build_3d_mesh(prompt, images)
+    return images, model_path, model_path
 def build_ui():
+    with gr.Blocks(css=CSS, title="Text ➜ 2D ➜ 3D (mini)") as demo:
+        gr.Markdown("# 🌀 텍스트 → 2D → 3D 생성기 (경량 버전)")
+        gr.Markdown("Kontext-mini + Hunyuan3D-2. 16 GB RAM에서도 동작합니다.")
         with gr.Row():
             with gr.Column():
                 prompt = gr.Textbox(label="프롬프트 / 설명", placeholder="예: 파란 모자를 쓴 귀여운 로봇")
                 input_image = gr.Image(label="(선택) 편집할 참조 이미지", type="pil")
+                multiview = gr.Checkbox(label="멀티뷰(좌/우/후면 포함)", value=True)
+                guidance = gr.Slider(0.5, 7.5, 2.5, step=0.1, label="Guidance Scale")
                 run_btn = gr.Button("🚀 생성하기", variant="primary")
             with gr.Column():
+                gallery = gr.Gallery(label="🎨 2D 결과", columns=2, height="auto")
                 model3d = gr.Model3D(label="🧱 3D 미리보기", clear_color=[1, 1, 1, 0])
                 download = gr.File(label="⬇️ GLB 다운로드")
             scroll_to_output=True,
             show_progress="full",
         )
     return demo