Spaces:

yongyeol
/

mk3d

Runtime error

File size: 7,787 Bytes

# ────────────────────────────────────────────────────────────────────────────
# app.py – Text ➜ 2D (FLUX-mini Kontext) ➜ 3D (Hunyuan3D-2)
#   • Fits into 16 GB system RAM: 경량 모델 + lazy loading + offload
#   • Requires: GPU (A10G 24 GB ideal, T4 16 GB OK with fp16)
# ────────────────────────────────────────────────────────────────────────────
import os
import tempfile
from typing import List, Tuple

import gradio as gr
import torch
from PIL import Image
from huggingface_hub import login

# ─────────────────────── Auth ───────────────────────
HF_TOKEN = os.getenv("HF_TOKEN")
if not HF_TOKEN:
    raise RuntimeError(
        "HF_TOKEN이 설정되지 않았습니다. Space Settings → Secrets에서 "
        "HF_TOKEN=your_read_token 을 등록한 뒤 재시작하세요."
    )
login(token=HF_TOKEN, add_to_git_credential=False)

# ─────────────────────── Device & dtype ───────────────────────
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32

# ─────────────────────── Lazy loaders ───────────────────────
from diffusers import FluxKontextPipeline, FluxPipeline
from accelerate import init_empty_weights, load_checkpoint_and_dispatch

# Global caches
kontext_pipe = None  # type: FluxKontextPipeline | None
_text2img_pipe = None  # type: FluxPipeline | None
shape_pipe = None
paint_pipe = None

MINI_KONTEXT_REPO = "black-forest-labs/FLUX.1-Kontext-mini"
MINI_T2I_REPO = "black-forest-labs/FLUX.1-mini"
HUNYUAN_REPO = "tencent/Hunyuan3D-2"


def load_kontext() -> FluxKontextPipeline:
    global kontext_pipe
    if kontext_pipe is None:
        print("[+] Loading FLUX.1-Kontext-mini … (low_cpu_mem_usage)")
        kontext_pipe = FluxKontextPipeline.from_pretrained(
            MINI_KONTEXT_REPO,
            torch_dtype=DTYPE,
            device_map="auto",
            low_cpu_mem_usage=True,
        )
        kontext_pipe.set_progress_bar_config(disable=True)
    return kontext_pipe


def load_text2img() -> FluxPipeline:
    """Lazy-load light text→image model only when 필요."""
    global _text2img_pipe
    if _text2img_pipe is None:
        print("[+] Loading FLUX.1-mini (text → image)…")
        _text2img_pipe = FluxPipeline.from_pretrained(
            MINI_T2I_REPO,
            torch_dtype=DTYPE,
            device_map="auto",
            low_cpu_mem_usage=True,
        )
        _text2img_pipe.set_progress_bar_config(disable=True)
    return _text2img_pipe


def load_hunyuan() -> tuple:
    global shape_pipe, paint_pipe
    if shape_pipe is None or paint_pipe is None:
        print("[+] Loading Hunyuan3D-2 (shape & texture)…")
        from hy3dgen.shapegen import Hunyuan3DDiTFlowMatchingPipeline
        from hy3dgen.texgen import Hunyuan3DPaintPipeline

        shape_pipe = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained(
            HUNYUAN_REPO,
            torch_dtype=DTYPE,
            device_map="auto",
            low_cpu_mem_usage=True,
        )
        shape_pipe.set_progress_bar_config(disable=True)

        paint_pipe = Hunyuan3DPaintPipeline.from_pretrained(
            HUNYUAN_REPO,
            torch_dtype=DTYPE,
            device_map="auto",
            low_cpu_mem_usage=True,
        )
        paint_pipe.set_progress_bar_config(disable=True)
    return shape_pipe, paint_pipe

# ───────────────────────────────────────────────
# Helper functions
# ───────────────────────────────────────────────

def generate_single_2d(prompt: str, image: Image.Image | None, guidance_scale: float) -> Image.Image:
    kontext = load_kontext()
    if image is None:
        # 텍스트→이미지 : 경량 text2img 파이프라인 사용
        t2i = load_text2img()
        result = t2i(prompt=prompt, guidance_scale=guidance_scale).images[0]
    else:
        result = kontext(image=image, prompt=prompt, guidance_scale=guidance_scale).images[0]
    return result


def generate_multiview(prompt: str, base_image: Image.Image, guidance_scale: float) -> List[Image.Image]:
    kontext = load_kontext()
    views = [
        base_image,
        kontext(image=base_image, prompt=f"{prompt}, left side view", guidance_scale=guidance_scale).images[0],
        kontext(image=base_image, prompt=f"{prompt}, right side view", guidance_scale=guidance_scale).images[0],
        kontext(image=base_image, prompt=f"{prompt}, back view", guidance_scale=guidance_scale).images[0],
    ]
    return views  # [front, left, right, back]


def build_3d_mesh(prompt: str, images: List[Image.Image]) -> str:
    shape, paint = load_hunyuan()
    single_or_multi = images if len(images) > 1 else images[0]
    mesh = shape(image=single_or_multi, prompt=prompt)[0]
    mesh = paint(mesh, image=single_or_multi)

    tmpdir = tempfile.mkdtemp()
    out_path = os.path.join(tmpdir, "mesh.glb")
    mesh.export(out_path)
    return out_path

# ──────────────────────────────── UI ────────────────────────────────
CSS = """footer {visibility:hidden;}"""


def workflow(prompt: str, input_image: Image.Image | None, multiview: bool, guidance_scale: float):
    if not prompt:
        raise gr.Error("프롬프트(설명)를 입력하세요 📌")

    base_img = generate_single_2d(prompt, input_image, guidance_scale)
    images = generate_multiview(prompt, base_img, guidance_scale) if multiview else [base_img]

    model_path = build_3d_mesh(prompt, images)
    return images, model_path, model_path


def build_ui():
    with gr.Blocks(css=CSS, title="Text ➜ 2D ➜ 3D (mini)") as demo:
        gr.Markdown("# 🌀 텍스트 → 2D → 3D 생성기 (경량 버전)")
        gr.Markdown("Kontext-mini + Hunyuan3D-2. 16 GB RAM에서도 동작합니다.")

        with gr.Row():
            with gr.Column():
                prompt = gr.Textbox(label="프롬프트 / 설명", placeholder="예: 파란 모자를 쓴 귀여운 로봇")
                input_image = gr.Image(label="(선택) 편집할 참조 이미지", type="pil")
                multiview = gr.Checkbox(label="멀티뷰(좌/우/후면 포함)", value=True)
                guidance = gr.Slider(0.5, 7.5, 2.5, step=0.1, label="Guidance Scale")
                run_btn = gr.Button("🚀 생성하기", variant="primary")
            with gr.Column():
                gallery = gr.Gallery(label="🎨 2D 결과", columns=2, height="auto")
                model3d = gr.Model3D(label="🧱 3D 미리보기", clear_color=[1, 1, 1, 0])
                download = gr.File(label="⬇️ GLB 다운로드")

        run_btn.click(
            fn=workflow,
            inputs=[prompt, input_image, multiview, guidance],
            outputs=[gallery, model3d, download],
            api_name="generate",
            scroll_to_output=True,
            show_progress="full",
        )
    return demo


if __name__ == "__main__":
    build_ui().queue(max_size=3, concurrency_count=1).launch()