|
|
|
|
|
|
|
|
|
|
|
import os |
|
import tempfile |
|
from typing import List |
|
|
|
import gradio as gr |
|
import torch |
|
from PIL import Image |
|
from huggingface_hub import login |
|
|
|
|
|
HF_TOKEN = os.getenv("HF_TOKEN") |
|
if not HF_TOKEN: |
|
raise RuntimeError( |
|
"HF_TOKEN이 설정되지 않았습니다. Space Settings → Secrets에서 " |
|
"HF_TOKEN=<your_read_token> 을 등록한 뒤 재시작하세요." |
|
) |
|
login(token=HF_TOKEN, add_to_git_credential=False) |
|
|
|
|
|
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" |
|
DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32 |
|
|
|
|
|
from diffusers import FluxKontextPipeline, FluxPipeline |
|
|
|
|
|
kontext_pipe: FluxKontextPipeline | None = None |
|
_text2img_pipe: FluxPipeline | None = None |
|
shape_pipe = None |
|
paint_pipe = None |
|
|
|
|
|
MINI_KONTEXT_REPO = "black-forest-labs/FLUX.1-Kontext-dev" |
|
MINI_T2I_REPO = "black-forest-labs/FLUX.1-schnell" |
|
HUNYUAN_REPO = "tencent/Hunyuan3D-2" |
|
|
|
DEVICE_MAP_STRATEGY = "balanced" |
|
|
|
|
|
|
|
def load_kontext() -> FluxKontextPipeline: |
|
"""Lazy-load FLUX.1-Kontext-dev (image-to-image editing).""" |
|
global kontext_pipe |
|
if kontext_pipe is None: |
|
print("[+] Loading FLUX.1-Kontext-dev … (balanced offload)") |
|
kontext_pipe = FluxKontextPipeline.from_pretrained( |
|
MINI_KONTEXT_REPO, |
|
torch_dtype=DTYPE, |
|
device_map=DEVICE_MAP_STRATEGY, |
|
low_cpu_mem_usage=True, |
|
token=HF_TOKEN, |
|
trust_remote_code=True, |
|
) |
|
kontext_pipe.set_progress_bar_config(disable=True) |
|
return kontext_pipe |
|
|
|
|
|
def load_text2img() -> FluxPipeline: |
|
"""Lazy-load FLUX.1-schnell (text-to-image).""" |
|
global _text2img_pipe |
|
if _text2img_pipe is None: |
|
print("[+] Loading FLUX.1-schnell (text→image)…") |
|
_text2img_pipe = FluxPipeline.from_pretrained( |
|
MINI_T2I_REPO, |
|
torch_dtype=DTYPE, |
|
device_map=DEVICE_MAP_STRATEGY, |
|
low_cpu_mem_usage=True, |
|
token=HF_TOKEN, |
|
trust_remote_code=True, |
|
) |
|
_text2img_pipe.set_progress_bar_config(disable=True) |
|
return _text2img_pipe |
|
|
|
|
|
def load_hunyuan(): |
|
"""Lazy-load Hunyuan3D-2 shape & texture pipelines.""" |
|
global shape_pipe, paint_pipe |
|
if shape_pipe is None or paint_pipe is None: |
|
print("[+] Loading Hunyuan3D-2 (shape & texture)…") |
|
from hy3dgen.shapegen import Hunyuan3DDiTFlowMatchingPipeline |
|
from hy3dgen.texgen import Hunyuan3DPaintPipeline |
|
|
|
shape_pipe = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained( |
|
HUNYUAN_REPO, |
|
torch_dtype=DTYPE, |
|
device_map=DEVICE_MAP_STRATEGY, |
|
low_cpu_mem_usage=True, |
|
token=HF_TOKEN, |
|
trust_remote_code=True, |
|
) |
|
shape_pipe.set_progress_bar_config(disable=True) |
|
|
|
paint_pipe = Hunyuan3DPaintPipeline.from_pretrained( |
|
HUNYUAN_REPO, |
|
torch_dtype=DTYPE, |
|
device_map=DEVICE_MAP_STRATEGY, |
|
low_cpu_mem_usage=True, |
|
token=HF_TOKEN, |
|
trust_remote_code=True, |
|
) |
|
paint_pipe.set_progress_bar_config(disable=True) |
|
return shape_pipe, paint_pipe |
|
|
|
|
|
|
|
def generate_single_2d(prompt: str, image: Image.Image | None, guidance_scale: float) -> Image.Image: |
|
"""Generate a single 2D image (txt2img or img2img).""" |
|
if image is None: |
|
t2i = load_text2img() |
|
return t2i(prompt=prompt, guidance_scale=guidance_scale).images[0] |
|
|
|
kontext = load_kontext() |
|
return kontext(image=image, prompt=prompt, guidance_scale=guidance_scale).images[0] |
|
|
|
|
|
def generate_multiview(prompt: str, base_image: Image.Image, guidance_scale: float) -> List[Image.Image]: |
|
"""Generate 4-view images for better 3D reconstruction.""" |
|
kontext = load_kontext() |
|
return [ |
|
base_image, |
|
kontext(image=base_image, prompt=f"{prompt}, left side view", guidance_scale=guidance_scale).images[0], |
|
kontext(image=base_image, prompt=f"{prompt}, right side view", guidance_scale=guidance_scale).images[0], |
|
kontext(image=base_image, prompt=f"{prompt}, back view", guidance_scale=guidance_scale).images[0], |
|
] |
|
|
|
|
|
def build_3d_mesh(prompt: str, images: List[Image.Image]) -> str: |
|
"""Create GLB mesh from single or multi-view images.""" |
|
shape, paint = load_hunyuan() |
|
source = images if len(images) > 1 else images[0] |
|
|
|
mesh = shape(image=source, prompt=prompt)[0] |
|
mesh = paint(mesh, image=source) |
|
|
|
tmpdir = tempfile.mkdtemp() |
|
out_path = os.path.join(tmpdir, "mesh.glb") |
|
mesh.export(out_path) |
|
return out_path |
|
|
|
|
|
CSS = """footer {visibility:hidden;}""" |
|
|
|
|
|
def workflow(prompt: str, input_image: Image.Image | None, multiview: bool, guidance_scale: float): |
|
if not prompt: |
|
raise gr.Error("프롬프트(설명)를 입력하세요 📌") |
|
|
|
base_img = generate_single_2d(prompt, input_image, guidance_scale) |
|
images = generate_multiview(prompt, base_img, guidance_scale) if multiview else [base_img] |
|
|
|
model_path = build_3d_mesh(prompt, images) |
|
return images, model_path, model_path |
|
|
|
|
|
def build_ui(): |
|
with gr.Blocks(css=CSS, title="Text ➜ 2D ➜ 3D (mini)") as demo: |
|
gr.Markdown("# 🌀 텍스트 → 2D → 3D 생성기 (경량 버전)") |
|
gr.Markdown("Kontext-dev + Hunyuan3D-2. 16 GB RAM에서도 동작합니다.") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
prompt = gr.Textbox(label="프롬프트 / 설명", placeholder="예: 파란 모자를 쓴 귀여운 로봇") |
|
input_image = gr.Image(label="(선택) 편집할 참조 이미지", type="pil") |
|
multiview = gr.Checkbox(label="멀티뷰(좌/우/후면 포함)", value=True) |
|
guidance = gr.Slider(0.5, 7.5, 2.5, step=0.1, label="Guidance Scale") |
|
run_btn = gr.Button("🚀 생성하기", variant="primary") |
|
with gr.Column(): |
|
gallery = gr.Gallery(label="🎨 2D 결과", columns=2, height="auto") |
|
model3d = gr.Model3D(label="🧱 3D 미리보기", clear_color=[1, 1, 1, 0]) |
|
download = gr.File(label="⬇️ GLB 다운로드") |
|
|
|
run_btn.click( |
|
fn=workflow, |
|
inputs=[prompt, input_image, multiview, guidance], |
|
outputs=[gallery, model3d, download], |
|
api_name="generate", |
|
scroll_to_output=True, |
|
show_progress="full", |
|
) |
|
return demo |
|
|
|
|
|
if __name__ == "__main__": |
|
build_ui().queue(max_size=3).launch() |
|
|