# ─────────────────────────────────────────────────────────── # app.py – Gradio Space: Text ➜ 2D (Kontext) ➜ 3D (Hunyuan3D) # ----------------------------------------------------------- # Requirements (add to requirements.txt): # torch>=2.2.0 # diffusers>=0.27.0 # hy3dgen # Hunyuan3D official PyPI after Jan‑2025 # trimesh # gradio==4.26.0 # pillow # ----------------------------------------------------------- # NOTE: • Set the following secrets in the Space **Settings → Secrets** # HF_TOKEN – your Hugging Face access token (for gated models) # BFL_API_KEY – optional, required if using Black‑Forest Labs usage tracking # • GPU (A10G/16 GB↑) is strongly recommended. # • Hunyuan3D installs a CUDA‑based custom rasteriser at runtime; build # wheels are provided on Linux/Windows. See model card instructions. # --------------------------------------------------------------------------- import os import tempfile from typing import List, Tuple import gradio as gr import torch from PIL import Image from huggingface_hub import login as hf_login # ─────────── Login / device ─────────── HF_TOKEN = os.getenv("HF_TOKEN") if HF_TOKEN: hf_login(token=HF_TOKEN, add_to_git_credential=False) DEVICE = "cuda" if torch.cuda.is_available() else "cpu" DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32 # ─────────── Load FLUX .1 Kontext (2D) ─────────── from diffusers import FluxKontextPipeline, FluxPipeline # FluxPipeline = text‑to‑image variant print("[+] Loading FLUX.1 Kontext [dev] …") kontext_pipe = FluxKontextPipeline.from_pretrained( "black-forest-labs/FLUX.1-Kontext-dev", torch_dtype=DTYPE ).to(DEVICE) kontext_pipe.set_progress_bar_config(disable=True) print("[+] Loading FLUX.1 [dev] (text‑to‑image) …") text2img_pipe = FluxPipeline.from_pretrained( "black-forest-labs/FLUX.1-dev", torch_dtype=DTYPE ).to(DEVICE) text2img_pipe.set_progress_bar_config(disable=True) # ─────────── Load Hunyuan3D‑2 (3D) ─────────── print("[+] Loading Hunyuan3D‑2 shape+texture … (this may take a while)") from hy3dgen.shapegen import Hunyuan3DDiTFlowMatchingPipeline from hy3dgen.texgen import Hunyuan3DPaintPipeline shape_pipe = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained( "tencent/Hunyuan3D-2", torch_dtype=DTYPE ).to(DEVICE) shape_pipe.set_progress_bar_config(disable=True) paint_pipe = Hunyuan3DPaintPipeline.from_pretrained( "tencent/Hunyuan3D-2", torch_dtype=DTYPE ).to(DEVICE) paint_pipe.set_progress_bar_config(disable=True) # ─────────────────────────────────────────────── # Helper functions # ─────────────────────────────────────────────── def generate_single_2d(prompt: str, image: Image.Image | None, guidance_scale: float) -> Image.Image: """Either edit an existing image via Kontext or generate a fresh one via Flux text2img.""" if image is None: result = text2img_pipe(prompt=prompt, guidance_scale=guidance_scale).images[0] else: result = kontext_pipe(image=image, prompt=prompt, guidance_scale=guidance_scale).images[0] return result def generate_multiview(prompt: str, base_image: Image.Image, guidance_scale: float) -> List[Image.Image]: """Generate four canonical views (front / back / left / right) by re‑prompting Kontext.""" views = [ ("front view", base_image), ( "left side view", kontext_pipe(image=base_image, prompt=f"{prompt}, left side view", guidance_scale=guidance_scale).images[0], ), ( "right side view", kontext_pipe(image=base_image, prompt=f"{prompt}, right side view", guidance_scale=guidance_scale).images[0], ), ( "back view", kontext_pipe(image=base_image, prompt=f"{prompt}, back view", guidance_scale=guidance_scale).images[0], ), ] # Return only images, keep order [front, left, right, back] return [v[1] for v in views] def build_3d_mesh(prompt: str, images: List[Image.Image]) -> str: """Call Hunyuan3D pipelines to build geometry then paint texture. Returns path to GLB.""" # For single‑view use first image; multi‑view (≤6) accepted by Hunyuan3D single_or_multi = images if len(images) > 1 else images[0] mesh = shape_pipe(image=single_or_multi, prompt=prompt)[0] mesh = paint_pipe(mesh, image=single_or_multi) tmpdir = tempfile.mkdtemp() out_path = os.path.join(tmpdir, "mesh.glb") mesh.export(out_path) # trimesh export return out_path # ─────────── Gradio interface ─────────── CSS = """ footer {visibility: hidden;} """ def workflow(prompt: str, input_image: Image.Image | None, multiview: bool, guidance_scale: float) -> Tuple[List[Image.Image], str, str]: """Main inference wrapper.""" if not prompt: raise gr.Error("프롬프트(설명)를 입력하세요 📌") # 1️⃣ 2D Generation / Editing base_img = generate_single_2d(prompt, input_image, guidance_scale) images = [base_img] if multiview: images = generate_multiview(prompt, base_img, guidance_scale) # 2️⃣ 3D Generation model_path = build_3d_mesh(prompt, images) return images, model_path, model_path # gallery, viewer, file download def build_ui(): with gr.Blocks(css=CSS, title="Text ➜ 2D ➜ 3D (Kontext × Hunyuan3D)") as demo: gr.Markdown("# 🌀 텍스트 → 2D → 3D 생성기") gr.Markdown( "Kontext로 일관된 2D 이미지를 만든 뒤, Hunyuan3D‑2로 텍스처 3D 메시에스를 생성합니다.\n" "⏱️ 첫 실행은 모델 로딩으로 시간이 걸립니다." ) with gr.Row(): with gr.Column(): prompt = gr.Textbox(label="프롬프트 / 설명", placeholder="예: 파란 모자를 쓴 귀여운 로봇") input_image = gr.Image(label="(선택) 편집할 참조 이미지", type="pil") multiview = gr.Checkbox(label="멀티뷰(좌/우/후면 포함) 3D 품질 향상", value=True) guidance = gr.Slider(0.5, 7.5, 2.5, step=0.1, label="Guidance Scale (Kontext)") run_btn = gr.Button("🚀 생성하기", variant="primary") with gr.Column(): gallery = gr.Gallery(label="🎨 2D 결과", show_label=True, columns=2, height="auto") model3d = gr.Model3D(label="🧱 3D 미리보기", clear_color=[1, 1, 1, 0]) download = gr.File(label="⬇️ GLB 다운로드") run_btn.click( fn=workflow, inputs=[prompt, input_image, multiview, guidance], outputs=[gallery, model3d, download], api_name="generate", scroll_to_output=True, show_progress="full", ) return demo if __name__ == "__main__": build_ui().queue(max_size=3, concurrency_count=1).launch()