yongyeol commited on
Commit
7591227
·
verified ·
1 Parent(s): 42194c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -98
app.py CHANGED
@@ -1,22 +1,8 @@
1
- # ───────────────────────────────────────────────────────────
2
- # app.py – Gradio Space: Text ➜ 2D (Kontext) ➜ 3D (Hunyuan3D)
3
- # -----------------------------------------------------------
4
- # Requirements (add to requirements.txt):
5
- # torch>=2.2.0
6
- # diffusers>=0.27.0
7
- # hy3dgen # Hunyuan3D official PyPI after Jan‑2025
8
- # trimesh
9
- # gradio==4.26.0
10
- # pillow
11
- # -----------------------------------------------------------
12
- # NOTE: • Set the following secrets in the Space **Settings → Secrets**
13
- # HF_TOKEN – your Hugging Face access token (for gated models)
14
- # BFL_API_KEY – optional, required if using Black‑Forest Labs usage tracking
15
- # • GPU (A10G/16 GB↑) is strongly recommended.
16
- # • Hunyuan3D installs a CUDA‑based custom rasteriser at runtime; build
17
- # wheels are provided on Linux/Windows. See model card instructions.
18
- # ---------------------------------------------------------------------------
19
-
20
  import os
21
  import tempfile
22
  from typing import List, Tuple
@@ -24,138 +10,155 @@ from typing import List, Tuple
24
  import gradio as gr
25
  import torch
26
  from PIL import Image
27
- from huggingface_hub import login as hf_login
28
 
 
29
  HF_TOKEN = os.getenv("HF_TOKEN")
30
  if not HF_TOKEN:
31
  raise RuntimeError(
32
  "HF_TOKEN이 설정되지 않았습니다. Space Settings → Secrets에서 "
33
  "HF_TOKEN=your_read_token 을 등록한 뒤 재시작하세요."
34
  )
35
-
36
- from huggingface_hub import login
37
  login(token=HF_TOKEN, add_to_git_credential=False)
38
 
 
39
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
40
  DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
41
 
42
- # ─────────── Load FLUX .1 Kontext (2D) ───────────
43
- from diffusers import FluxKontextPipeline, FluxPipeline # FluxPipeline = text‑to‑image variant
44
-
45
- print("[+] Loading FLUX.1 Kontext [dev] …")
46
- kontext_pipe = FluxKontextPipeline.from_pretrained(
47
- "black-forest-labs/FLUX.1-Kontext-dev", torch_dtype=DTYPE
48
- ).to(DEVICE)
49
- kontext_pipe.set_progress_bar_config(disable=True)
50
-
51
- print("[+] Loading FLUX.1 [dev] (text‑to‑image) …")
52
- text2img_pipe = FluxPipeline.from_pretrained(
53
- "black-forest-labs/FLUX.1-dev", torch_dtype=DTYPE
54
- ).to(DEVICE)
55
- text2img_pipe.set_progress_bar_config(disable=True)
56
-
57
- # ─────────── Load Hunyuan3D‑2 (3D) ───────────
58
- print("[+] Loading Hunyuan3D‑2 shape+texture … (this may take a while)")
59
- from hy3dgen.shapegen import Hunyuan3DDiTFlowMatchingPipeline
60
- from hy3dgen.texgen import Hunyuan3DPaintPipeline
61
-
62
- shape_pipe = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained(
63
- "tencent/Hunyuan3D-2", torch_dtype=DTYPE
64
- ).to(DEVICE)
65
- shape_pipe.set_progress_bar_config(disable=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
- paint_pipe = Hunyuan3DPaintPipeline.from_pretrained(
68
- "tencent/Hunyuan3D-2", torch_dtype=DTYPE
69
- ).to(DEVICE)
70
- paint_pipe.set_progress_bar_config(disable=True)
 
 
 
 
71
 
72
  # ───────────────────────────────────────────────
73
  # Helper functions
74
  # ───────────────────────────────────────────────
75
 
76
  def generate_single_2d(prompt: str, image: Image.Image | None, guidance_scale: float) -> Image.Image:
77
- """Either edit an existing image via Kontext or generate a fresh one via Flux text2img."""
78
  if image is None:
79
- result = text2img_pipe(prompt=prompt, guidance_scale=guidance_scale).images[0]
 
 
80
  else:
81
- result = kontext_pipe(image=image, prompt=prompt, guidance_scale=guidance_scale).images[0]
82
  return result
83
 
84
 
85
  def generate_multiview(prompt: str, base_image: Image.Image, guidance_scale: float) -> List[Image.Image]:
86
- """Generate four canonical views (front / back / left / right) by re‑prompting Kontext."""
87
  views = [
88
- ("front view", base_image),
89
- (
90
- "left side view",
91
- kontext_pipe(image=base_image, prompt=f"{prompt}, left side view", guidance_scale=guidance_scale).images[0],
92
- ),
93
- (
94
- "right side view",
95
- kontext_pipe(image=base_image, prompt=f"{prompt}, right side view", guidance_scale=guidance_scale).images[0],
96
- ),
97
- (
98
- "back view",
99
- kontext_pipe(image=base_image, prompt=f"{prompt}, back view", guidance_scale=guidance_scale).images[0],
100
- ),
101
  ]
102
- # Return only images, keep order [front, left, right, back]
103
- return [v[1] for v in views]
104
 
105
 
106
  def build_3d_mesh(prompt: str, images: List[Image.Image]) -> str:
107
- """Call Hunyuan3D pipelines to build geometry then paint texture. Returns path to GLB."""
108
- # For single‑view use first image; multi‑view (≤6) accepted by Hunyuan3D
109
  single_or_multi = images if len(images) > 1 else images[0]
110
- mesh = shape_pipe(image=single_or_multi, prompt=prompt)[0]
111
- mesh = paint_pipe(mesh, image=single_or_multi)
112
 
113
  tmpdir = tempfile.mkdtemp()
114
  out_path = os.path.join(tmpdir, "mesh.glb")
115
- mesh.export(out_path) # trimesh export
116
  return out_path
117
 
 
 
118
 
119
- # ─────────── Gradio interface ───────────
120
- CSS = """
121
- footer {visibility: hidden;}
122
- """
123
 
124
- def workflow(prompt: str, input_image: Image.Image | None, multiview: bool, guidance_scale: float) -> Tuple[List[Image.Image], str, str]:
125
- """Main inference wrapper."""
126
  if not prompt:
127
  raise gr.Error("프롬프트(설명)를 입력하세요 📌")
128
 
129
- # 1️⃣ 2D Generation / Editing
130
  base_img = generate_single_2d(prompt, input_image, guidance_scale)
131
- images = [base_img]
132
 
133
- if multiview:
134
- images = generate_multiview(prompt, base_img, guidance_scale)
135
-
136
- # 2️⃣ 3D Generation
137
  model_path = build_3d_mesh(prompt, images)
138
-
139
- return images, model_path, model_path # gallery, viewer, file download
140
 
141
 
142
  def build_ui():
143
- with gr.Blocks(css=CSS, title="Text ➜ 2D ➜ 3D (Kontext × Hunyuan3D)") as demo:
144
- gr.Markdown("# 🌀 텍스트 → 2D → 3D 생성기")
145
- gr.Markdown(
146
- "Kontext로 일관된 2D 이미지를 만든 뒤, Hunyuan3D‑2로 텍스처 3D 메시에스를 생성합니다.\n"
147
- "⏱️ 첫 실행은 모델 로딩으로 시간이 걸립니다."
148
- )
149
 
150
  with gr.Row():
151
  with gr.Column():
152
  prompt = gr.Textbox(label="프롬프트 / 설명", placeholder="예: 파란 모자를 쓴 귀여운 로봇")
153
  input_image = gr.Image(label="(선택) 편집할 참조 이미지", type="pil")
154
- multiview = gr.Checkbox(label="멀티뷰(좌/우/후면 포함) 3D 품질 향상", value=True)
155
- guidance = gr.Slider(0.5, 7.5, 2.5, step=0.1, label="Guidance Scale (Kontext)")
156
  run_btn = gr.Button("🚀 생성하기", variant="primary")
157
  with gr.Column():
158
- gallery = gr.Gallery(label="🎨 2D 결과", show_label=True, columns=2, height="auto")
159
  model3d = gr.Model3D(label="🧱 3D 미리보기", clear_color=[1, 1, 1, 0])
160
  download = gr.File(label="⬇️ GLB 다운로드")
161
 
@@ -167,7 +170,6 @@ def build_ui():
167
  scroll_to_output=True,
168
  show_progress="full",
169
  )
170
-
171
  return demo
172
 
173
 
 
1
+ # ────────────────────────────────────────────────────────────────────────────
2
+ # app.py – Text ➜ 2D (FLUX-mini Kontext) ➜ 3D (Hunyuan3D-2)
3
+ # Fits into 16 GB system RAM: 경량 모델 + lazy loading + offload
4
+ # Requires: GPU (A10G 24 GB ideal, T4 16 GB OK with fp16)
5
+ # ────────────────────────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  import os
7
  import tempfile
8
  from typing import List, Tuple
 
10
  import gradio as gr
11
  import torch
12
  from PIL import Image
13
+ from huggingface_hub import login
14
 
15
+ # ─────────────────────── Auth ───────────────────────
16
  HF_TOKEN = os.getenv("HF_TOKEN")
17
  if not HF_TOKEN:
18
  raise RuntimeError(
19
  "HF_TOKEN이 설정되지 않았습니다. Space Settings → Secrets에서 "
20
  "HF_TOKEN=your_read_token 을 등록한 뒤 재시작하세요."
21
  )
 
 
22
  login(token=HF_TOKEN, add_to_git_credential=False)
23
 
24
+ # ─────────────────────── Device & dtype ───────────────────────
25
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
26
  DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
27
 
28
+ # ─────────────────────── Lazy loaders ───────────────────────
29
+ from diffusers import FluxKontextPipeline, FluxPipeline
30
+ from accelerate import init_empty_weights, load_checkpoint_and_dispatch
31
+
32
+ # Global caches
33
+ kontext_pipe = None # type: FluxKontextPipeline | None
34
+ _text2img_pipe = None # type: FluxPipeline | None
35
+ shape_pipe = None
36
+ paint_pipe = None
37
+
38
+ MINI_KONTEXT_REPO = "black-forest-labs/FLUX.1-Kontext-mini"
39
+ MINI_T2I_REPO = "black-forest-labs/FLUX.1-mini"
40
+ HUNYUAN_REPO = "tencent/Hunyuan3D-2"
41
+
42
+
43
+ def load_kontext() -> FluxKontextPipeline:
44
+ global kontext_pipe
45
+ if kontext_pipe is None:
46
+ print("[+] Loading FLUX.1-Kontext-mini (low_cpu_mem_usage)")
47
+ kontext_pipe = FluxKontextPipeline.from_pretrained(
48
+ MINI_KONTEXT_REPO,
49
+ torch_dtype=DTYPE,
50
+ device_map="auto",
51
+ low_cpu_mem_usage=True,
52
+ )
53
+ kontext_pipe.set_progress_bar_config(disable=True)
54
+ return kontext_pipe
55
+
56
+
57
+ def load_text2img() -> FluxPipeline:
58
+ """Lazy-load light text→image model only when 필요."""
59
+ global _text2img_pipe
60
+ if _text2img_pipe is None:
61
+ print("[+] Loading FLUX.1-mini (text → image)…")
62
+ _text2img_pipe = FluxPipeline.from_pretrained(
63
+ MINI_T2I_REPO,
64
+ torch_dtype=DTYPE,
65
+ device_map="auto",
66
+ low_cpu_mem_usage=True,
67
+ )
68
+ _text2img_pipe.set_progress_bar_config(disable=True)
69
+ return _text2img_pipe
70
+
71
+
72
+ def load_hunyuan() -> tuple:
73
+ global shape_pipe, paint_pipe
74
+ if shape_pipe is None or paint_pipe is None:
75
+ print("[+] Loading Hunyuan3D-2 (shape & texture)…")
76
+ from hy3dgen.shapegen import Hunyuan3DDiTFlowMatchingPipeline
77
+ from hy3dgen.texgen import Hunyuan3DPaintPipeline
78
+
79
+ shape_pipe = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained(
80
+ HUNYUAN_REPO,
81
+ torch_dtype=DTYPE,
82
+ device_map="auto",
83
+ low_cpu_mem_usage=True,
84
+ )
85
+ shape_pipe.set_progress_bar_config(disable=True)
86
 
87
+ paint_pipe = Hunyuan3DPaintPipeline.from_pretrained(
88
+ HUNYUAN_REPO,
89
+ torch_dtype=DTYPE,
90
+ device_map="auto",
91
+ low_cpu_mem_usage=True,
92
+ )
93
+ paint_pipe.set_progress_bar_config(disable=True)
94
+ return shape_pipe, paint_pipe
95
 
96
  # ───────────────────────────────────────────────
97
  # Helper functions
98
  # ───────────────────────────────────────────────
99
 
100
  def generate_single_2d(prompt: str, image: Image.Image | None, guidance_scale: float) -> Image.Image:
101
+ kontext = load_kontext()
102
  if image is None:
103
+ # 텍스트→이미지 : 경량 text2img 파이프라인 사용
104
+ t2i = load_text2img()
105
+ result = t2i(prompt=prompt, guidance_scale=guidance_scale).images[0]
106
  else:
107
+ result = kontext(image=image, prompt=prompt, guidance_scale=guidance_scale).images[0]
108
  return result
109
 
110
 
111
  def generate_multiview(prompt: str, base_image: Image.Image, guidance_scale: float) -> List[Image.Image]:
112
+ kontext = load_kontext()
113
  views = [
114
+ base_image,
115
+ kontext(image=base_image, prompt=f"{prompt}, left side view", guidance_scale=guidance_scale).images[0],
116
+ kontext(image=base_image, prompt=f"{prompt}, right side view", guidance_scale=guidance_scale).images[0],
117
+ kontext(image=base_image, prompt=f"{prompt}, back view", guidance_scale=guidance_scale).images[0],
 
 
 
 
 
 
 
 
 
118
  ]
119
+ return views # [front, left, right, back]
 
120
 
121
 
122
  def build_3d_mesh(prompt: str, images: List[Image.Image]) -> str:
123
+ shape, paint = load_hunyuan()
 
124
  single_or_multi = images if len(images) > 1 else images[0]
125
+ mesh = shape(image=single_or_multi, prompt=prompt)[0]
126
+ mesh = paint(mesh, image=single_or_multi)
127
 
128
  tmpdir = tempfile.mkdtemp()
129
  out_path = os.path.join(tmpdir, "mesh.glb")
130
+ mesh.export(out_path)
131
  return out_path
132
 
133
+ # ──────────────────────────────── UI ────────────────────────────────
134
+ CSS = """footer {visibility:hidden;}"""
135
 
 
 
 
 
136
 
137
+ def workflow(prompt: str, input_image: Image.Image | None, multiview: bool, guidance_scale: float):
 
138
  if not prompt:
139
  raise gr.Error("프롬프트(설명)를 입력하세요 📌")
140
 
 
141
  base_img = generate_single_2d(prompt, input_image, guidance_scale)
142
+ images = generate_multiview(prompt, base_img, guidance_scale) if multiview else [base_img]
143
 
 
 
 
 
144
  model_path = build_3d_mesh(prompt, images)
145
+ return images, model_path, model_path
 
146
 
147
 
148
  def build_ui():
149
+ with gr.Blocks(css=CSS, title="Text ➜ 2D ➜ 3D (mini)") as demo:
150
+ gr.Markdown("# 🌀 텍스트 → 2D → 3D 생성기 (경량 버전)")
151
+ gr.Markdown("Kontext-mini + Hunyuan3D-2. 16 GB RAM에서도 동작합니다.")
 
 
 
152
 
153
  with gr.Row():
154
  with gr.Column():
155
  prompt = gr.Textbox(label="프롬프트 / 설명", placeholder="예: 파란 모자를 쓴 귀여운 로봇")
156
  input_image = gr.Image(label="(선택) 편집할 참조 이미지", type="pil")
157
+ multiview = gr.Checkbox(label="멀티뷰(좌/우/후면 포함)", value=True)
158
+ guidance = gr.Slider(0.5, 7.5, 2.5, step=0.1, label="Guidance Scale")
159
  run_btn = gr.Button("🚀 생성하기", variant="primary")
160
  with gr.Column():
161
+ gallery = gr.Gallery(label="🎨 2D 결과", columns=2, height="auto")
162
  model3d = gr.Model3D(label="🧱 3D 미리보기", clear_color=[1, 1, 1, 0])
163
  download = gr.File(label="⬇️ GLB 다운로드")
164
 
 
170
  scroll_to_output=True,
171
  show_progress="full",
172
  )
 
173
  return demo
174
 
175