yongyeol commited on
Commit
e29d600
·
verified ·
1 Parent(s): 239eb94

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +170 -0
app.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ───────────────────────────────────────────────────────────
2
+ # app.py – Gradio Space: Text ➜ 2D (Kontext) ➜ 3D (Hunyuan3D)
3
+ # -----------------------------------------------------------
4
+ # Requirements (add to requirements.txt):
5
+ # torch>=2.2.0
6
+ # diffusers>=0.27.0
7
+ # hy3dgen # Hunyuan3D official PyPI after Jan‑2025
8
+ # trimesh
9
+ # gradio==4.26.0
10
+ # pillow
11
+ # -----------------------------------------------------------
12
+ # NOTE: • Set the following secrets in the Space **Settings → Secrets**
13
+ # HF_TOKEN – your Hugging Face access token (for gated models)
14
+ # BFL_API_KEY – optional, required if using Black‑Forest Labs usage tracking
15
+ # • GPU (A10G/16 GB↑) is strongly recommended.
16
+ # • Hunyuan3D installs a CUDA‑based custom rasteriser at runtime; build
17
+ # wheels are provided on Linux/Windows. See model card instructions.
18
+ # ---------------------------------------------------------------------------
19
+
20
+ import os
21
+ import tempfile
22
+ from typing import List, Tuple
23
+
24
+ import gradio as gr
25
+ import torch
26
+ from PIL import Image
27
+ from huggingface_hub import login as hf_login
28
+
29
+ # ─────────── Login / device ───────────
30
+ HF_TOKEN = os.getenv("HF_TOKEN")
31
+ if HF_TOKEN:
32
+ hf_login(token=HF_TOKEN, add_to_git_credential=False)
33
+
34
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
35
+ DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
36
+
37
+ # ─────────── Load FLUX .1 Kontext (2D) ───────────
38
+ from diffusers import FluxKontextPipeline, FluxPipeline # FluxPipeline = text‑to‑image variant
39
+
40
+ print("[+] Loading FLUX.1 Kontext [dev] …")
41
+ kontext_pipe = FluxKontextPipeline.from_pretrained(
42
+ "black-forest-labs/FLUX.1-Kontext-dev", torch_dtype=DTYPE
43
+ ).to(DEVICE)
44
+ kontext_pipe.set_progress_bar_config(disable=True)
45
+
46
+ print("[+] Loading FLUX.1 [dev] (text‑to‑image) …")
47
+ text2img_pipe = FluxPipeline.from_pretrained(
48
+ "black-forest-labs/FLUX.1-dev", torch_dtype=DTYPE
49
+ ).to(DEVICE)
50
+ text2img_pipe.set_progress_bar_config(disable=True)
51
+
52
+ # ─────────── Load Hunyuan3D‑2 (3D) ───────────
53
+ print("[+] Loading Hunyuan3D‑2 shape+texture … (this may take a while)")
54
+ from hy3dgen.shapegen import Hunyuan3DDiTFlowMatchingPipeline
55
+ from hy3dgen.texgen import Hunyuan3DPaintPipeline
56
+
57
+ shape_pipe = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained(
58
+ "tencent/Hunyuan3D-2", torch_dtype=DTYPE
59
+ ).to(DEVICE)
60
+ shape_pipe.set_progress_bar_config(disable=True)
61
+
62
+ paint_pipe = Hunyuan3DPaintPipeline.from_pretrained(
63
+ "tencent/Hunyuan3D-2", torch_dtype=DTYPE
64
+ ).to(DEVICE)
65
+ paint_pipe.set_progress_bar_config(disable=True)
66
+
67
+ # ───────────────────────────────────────────────
68
+ # Helper functions
69
+ # ───────────────────────────────────────────────
70
+
71
+ def generate_single_2d(prompt: str, image: Image.Image | None, guidance_scale: float) -> Image.Image:
72
+ """Either edit an existing image via Kontext or generate a fresh one via Flux text2img."""
73
+ if image is None:
74
+ result = text2img_pipe(prompt=prompt, guidance_scale=guidance_scale).images[0]
75
+ else:
76
+ result = kontext_pipe(image=image, prompt=prompt, guidance_scale=guidance_scale).images[0]
77
+ return result
78
+
79
+
80
+ def generate_multiview(prompt: str, base_image: Image.Image, guidance_scale: float) -> List[Image.Image]:
81
+ """Generate four canonical views (front / back / left / right) by re‑prompting Kontext."""
82
+ views = [
83
+ ("front view", base_image),
84
+ (
85
+ "left side view",
86
+ kontext_pipe(image=base_image, prompt=f"{prompt}, left side view", guidance_scale=guidance_scale).images[0],
87
+ ),
88
+ (
89
+ "right side view",
90
+ kontext_pipe(image=base_image, prompt=f"{prompt}, right side view", guidance_scale=guidance_scale).images[0],
91
+ ),
92
+ (
93
+ "back view",
94
+ kontext_pipe(image=base_image, prompt=f"{prompt}, back view", guidance_scale=guidance_scale).images[0],
95
+ ),
96
+ ]
97
+ # Return only images, keep order [front, left, right, back]
98
+ return [v[1] for v in views]
99
+
100
+
101
+ def build_3d_mesh(prompt: str, images: List[Image.Image]) -> str:
102
+ """Call Hunyuan3D pipelines to build geometry then paint texture. Returns path to GLB."""
103
+ # For single‑view use first image; multi‑view (≤6) accepted by Hunyuan3D
104
+ single_or_multi = images if len(images) > 1 else images[0]
105
+ mesh = shape_pipe(image=single_or_multi, prompt=prompt)[0]
106
+ mesh = paint_pipe(mesh, image=single_or_multi)
107
+
108
+ tmpdir = tempfile.mkdtemp()
109
+ out_path = os.path.join(tmpdir, "mesh.glb")
110
+ mesh.export(out_path) # trimesh export
111
+ return out_path
112
+
113
+
114
+ # ─────────── Gradio interface ───────────
115
+ CSS = """
116
+ footer {visibility: hidden;}
117
+ """
118
+
119
+ def workflow(prompt: str, input_image: Image.Image | None, multiview: bool, guidance_scale: float) -> Tuple[List[Image.Image], str, str]:
120
+ """Main inference wrapper."""
121
+ if not prompt:
122
+ raise gr.Error("프롬프트(설명)를 입력하세요 📌")
123
+
124
+ # 1️⃣ 2D Generation / Editing
125
+ base_img = generate_single_2d(prompt, input_image, guidance_scale)
126
+ images = [base_img]
127
+
128
+ if multiview:
129
+ images = generate_multiview(prompt, base_img, guidance_scale)
130
+
131
+ # 2️⃣ 3D Generation
132
+ model_path = build_3d_mesh(prompt, images)
133
+
134
+ return images, model_path, model_path # gallery, viewer, file download
135
+
136
+
137
+ def build_ui():
138
+ with gr.Blocks(css=CSS, title="Text ➜ 2D ➜ 3D (Kontext × Hunyuan3D)") as demo:
139
+ gr.Markdown("# 🌀 텍스트 → 2D → 3D 생성기")
140
+ gr.Markdown(
141
+ "Kontext로 일관된 2D 이미지를 만든 뒤, Hunyuan3D‑2로 텍스처 3D 메시에스를 생성합니다.\n"
142
+ "⏱️ 첫 실행은 모델 로딩으로 시간이 걸립니다."
143
+ )
144
+
145
+ with gr.Row():
146
+ with gr.Column():
147
+ prompt = gr.Textbox(label="프롬프트 / 설명", placeholder="예: 파란 모자를 쓴 귀여운 로봇")
148
+ input_image = gr.Image(label="(선택) 편집할 참조 이미지", type="pil")
149
+ multiview = gr.Checkbox(label="멀티뷰(좌/우/후면 포함) 3D 품질 향상", value=True)
150
+ guidance = gr.Slider(0.5, 7.5, 2.5, step=0.1, label="Guidance Scale (Kontext)")
151
+ run_btn = gr.Button("🚀 생성하기", variant="primary")
152
+ with gr.Column():
153
+ gallery = gr.Gallery(label="🎨 2D 결과", show_label=True, columns=2, height="auto")
154
+ model3d = gr.Model3D(label="🧱 3D 미리보기", clear_color=[1, 1, 1, 0])
155
+ download = gr.File(label="⬇️ GLB 다운로드")
156
+
157
+ run_btn.click(
158
+ fn=workflow,
159
+ inputs=[prompt, input_image, multiview, guidance],
160
+ outputs=[gallery, model3d, download],
161
+ api_name="generate",
162
+ scroll_to_output=True,
163
+ show_progress="full",
164
+ )
165
+
166
+ return demo
167
+
168
+
169
+ if __name__ == "__main__":
170
+ build_ui().queue(max_size=3, concurrency_count=1).launch()