File size: 12,054 Bytes
b880652
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d635e38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16dfcc8
cd41f5f
 
 
 
 
 
 
 
b1b52ab
 
db6a3b7
b1b52ab
db6a3b7
cd41f5f
b1b52ab
258ea5a
b1b52ab
c260ece
a481d7a
b1b52ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a481d7a
b1b52ab
 
a481d7a
b1b52ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db6a3b7
b1b52ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db6a3b7
b1b52ab
 
 
 
 
 
 
 
 
 
 
a481d7a
b1b52ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db6a3b7
d635e38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db6a3b7
 
b1b52ab
 
 
 
 
db6a3b7
 
b1b52ab
c666caf
b1b52ab
 
 
 
 
258ea5a
 
b1b52ab
258ea5a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
import gradio as gr
from gradio_litmodel3d import LitModel3D

import os
import shutil
os.environ['SPCONV_ALGO'] = 'native'
from typing import *
import torch
import numpy as np
import imageio
import uuid
from easydict import EasyDict as edict
from PIL import Image
from trellis.pipelines import TrellisImageTo3DPipeline
from trellis.representations import Gaussian, MeshExtractResult
from trellis.utils import render_utils, postprocessing_utils


MAX_SEED = np.iinfo(np.int32).max
TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
os.makedirs(TMP_DIR, exist_ok=True)


def start_session(req: gr.Request):
    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
    print(f'Creating user directory: {user_dir}')
    os.makedirs(user_dir, exist_ok=True)
    
def end_session(req: gr.Request):
    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
    print(f'Removing user directory: {user_dir}')
    shutil.rmtree(user_dir)

def preprocess_image(image: Image.Image) -> Tuple[str, Image.Image]:
    processed_image = pipeline.preprocess_image(image)
    return processed_image

def pack_state(gs: Gaussian, mesh: MeshExtractResult, trial_id: str) -> dict:
    return {
        'gaussian': {
            **gs.init_params,
            '_xyz': gs._xyz.cpu().numpy(),
            '_features_dc': gs._features_dc.cpu().numpy(),
            '_scaling': gs._scaling.cpu().numpy(),
            '_rotation': gs._rotation.cpu().numpy(),
            '_opacity': gs._opacity.cpu().numpy(),
        },
        'mesh': {
            'vertices': mesh.vertices.cpu().numpy(),
            'faces': mesh.faces.cpu().numpy(),
        },
        'trial_id': trial_id,
    }
    
def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
    gs = Gaussian(
        aabb=state['gaussian']['aabb'],
        sh_degree=state['gaussian']['sh_degree'],
        mininum_kernel_size=state['gaussian']['mininum_kernel_size'],
        scaling_bias=state['gaussian']['scaling_bias'],
        opacity_bias=state['gaussian']['opacity_bias'],
        scaling_activation=state['gaussian']['scaling_activation'],
    )
    gs._xyz = torch.tensor(state['gaussian']['_xyz'], device='cuda')
    gs._features_dc = torch.tensor(state['gaussian']['_features_dc'], device='cuda')
    gs._scaling = torch.tensor(state['gaussian']['_scaling'], device='cuda')
    gs._rotation = torch.tensor(state['gaussian']['_rotation'], device='cuda')
    gs._opacity = torch.tensor(state['gaussian']['_opacity'], device='cuda')
    
    mesh = edict(
        vertices=torch.tensor(state['mesh']['vertices'], device='cuda'),
        faces=torch.tensor(state['mesh']['faces'], device='cuda'),
    )
    
    return gs, mesh, state['trial_id']

def get_seed(randomize_seed: bool, seed: int) -> int:
    """Get the random seed."""
    return np.random.randint(0, MAX_SEED) if randomize_seed else seed

def image_to_3d(
    image: Image.Image,
    seed: int,
    ss_guidance_strength: float,
    ss_sampling_steps: int,
    slat_guidance_strength: float,
    slat_sampling_steps: int,
    req: gr.Request,
    progress: gr.Progress = gr.Progress()
) -> Tuple[dict, str, str, str]:
    """
    Convert an image to a 3D model with improved memory management and progress tracking.
    """
    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
    progress(0, desc="Initializing...")
    
    # Clear CUDA cache before starting
    torch.cuda.empty_cache()
    
    try:
        # Generate 3D model with progress updates
        progress(0.1, desc="Running 3D generation pipeline...")
        outputs = pipeline.run(
            image,
            seed=seed,
            formats=["gaussian", "mesh"],
            preprocess_image=False,
            sparse_structure_sampler_params={
                "steps": ss_sampling_steps,
                "cfg_strength": ss_guidance_strength,
            },
            slat_sampler_params={
                "steps": slat_sampling_steps,
                "cfg_strength": slat_guidance_strength,
            },
        )
        
        progress(0.4, desc="Generating video preview...")
        # Generate video frames in batches to manage memory
        batch_size = 30  # Process 30 frames at a time
        num_frames = 120
        video = []
        video_geo = []
        
        for i in range(0, num_frames, batch_size):
            end_idx = min(i + batch_size, num_frames)
            batch_frames = render_utils.render_video(
                outputs['gaussian'][0], 
                num_frames=end_idx - i, 
                start_frame=i
            )['color']
            batch_geo = render_utils.render_video(
                outputs['mesh'][0], 
                num_frames=end_idx - i,
                start_frame=i
            )['normal']
            
            video.extend(batch_frames)
            video_geo.extend(batch_geo)
            
            # Clear cache after each batch
            torch.cuda.empty_cache()
            progress(0.4 + (0.3 * i / num_frames), desc=f"Rendering frames {i} to {end_idx}...")
        
        # Combine video frames
        video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
        
        # Generate unique ID and save video
        trial_id = str(uuid.uuid4())
        video_path = os.path.join(user_dir, f"{trial_id}.mp4")
        progress(0.7, desc="Saving video...")
        imageio.mimsave(video_path, video, fps=15)
        
        # Clear video data from memory
        del video
        del video_geo
        torch.cuda.empty_cache()
        
        # Generate and save full-quality GLB
        progress(0.8, desc="Generating full-quality GLB...")
        glb = postprocessing_utils.to_glb(
            outputs['gaussian'][0], 
            outputs['mesh'][0],
            simplify=0.0,
            texture_size=2048,
            verbose=False
        )
        glb_path = os.path.join(user_dir, f"{trial_id}_full.glb")
        progress(0.9, desc="Saving GLB file...")
        glb.export(glb_path)
        
        # Pack state for reduced version
        progress(0.95, desc="Finalizing...")
        state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], trial_id)
        
        # Final cleanup
        torch.cuda.empty_cache()
        progress(1.0, desc="Complete!")
        
        return state, video_path, glb_path, glb_path
        
    except Exception as e:
        # Clean up on error
        torch.cuda.empty_cache()
        raise gr.Error(f"Processing failed: {str(e)}")

def extract_reduced_glb(
    state: dict,
    mesh_simplify: float,
    texture_size: int,
    req: gr.Request,
    progress: gr.Progress = gr.Progress()
) -> Tuple[str, str]:
    """
    Extract a reduced-quality GLB file with progress tracking.
    """
    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
    
    try:
        progress(0.1, desc="Unpacking model state...")
        gs, mesh, trial_id = unpack_state(state)
        
        progress(0.3, desc="Generating reduced GLB...")
        glb = postprocessing_utils.to_glb(
            gs, mesh,
            simplify=mesh_simplify,
            texture_size=texture_size,
            verbose=False
        )
        
        progress(0.8, desc="Saving reduced GLB...")
        glb_path = os.path.join(user_dir, f"{trial_id}_reduced.glb")
        glb.export(glb_path)
        
        progress(0.9, desc="Cleaning up...")
        torch.cuda.empty_cache()
        
        progress(1.0, desc="Complete!")
        return glb_path, glb_path
        
    except Exception as e:
        torch.cuda.empty_cache()
        raise gr.Error(f"GLB reduction failed: {str(e)}")

with gr.Blocks(delete_cache=(600, 600)) as demo:
    gr.Markdown("""
    ## Image to 3D Asset with [TRELLIS](https://trellis3d.github.io/)
    * Upload an image and click "Generate" to create a 3D model
    * You can download either:
        * The full-quality GLB file (larger size, highest quality)
        * A reduced version with customizable quality settings (smaller size)
    """)
    
    with gr.Row():
        with gr.Column():
            image_prompt = gr.Image(label="Image Prompt", format="png", image_mode="RGBA", type="pil", height=300)
            
            with gr.Accordion(label="Generation Settings", open=False):
                seed = gr.Slider(0, MAX_SEED, label="Seed", value=0, step=1)
                randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
                gr.Markdown("Stage 1: Sparse Structure Generation")
                with gr.Row():
                    ss_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
                    ss_sampling_steps = gr.Slider(1, 500, label="Sampling Steps", value=12, step=1)
                gr.Markdown("Stage 2: Structured Latent Generation")
                with gr.Row():
                    slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
                    slat_sampling_steps = gr.Slider(1, 500, label="Sampling Steps", value=12, step=1)

            generate_btn = gr.Button("Generate")
            
            with gr.Accordion(label="Reduced GLB Settings", open=False):
                mesh_simplify = gr.Slider(0.0, 0.98, label="Mesh Simplification", value=0.95, step=0.01)
                texture_size = gr.Slider(512, 2048, label="Texture Size", value=1024, step=512)
            
            extract_reduced_btn = gr.Button("Extract Reduced GLB", interactive=False)

        with gr.Column():
            video_output = gr.Video(label="Generated 3D Asset Preview", autoplay=True, loop=True, height=300)
            model_output = LitModel3D(label="3D Model Preview", exposure=20.0, height=300)
            gr.Markdown("### Download Options")
            with gr.Row():
                download_full = gr.DownloadButton(label="Download Full-Quality GLB", interactive=False)
                download_reduced = gr.DownloadButton(label="Download Reduced GLB", interactive=False)
            
    output_buf = gr.State()

    # Example images
    with gr.Row():
        examples = gr.Examples(
            examples=[
                f'assets/example_image/{image}'
                for image in os.listdir("assets/example_image")
            ],
            inputs=[image_prompt],
            fn=preprocess_image,
            outputs=[image_prompt],
            run_on_click=True,
            examples_per_page=64,
        )

    # Event handlers
    demo.load(start_session)
    demo.unload(end_session)
    
    image_prompt.upload(
        preprocess_image,
        inputs=[image_prompt],
        outputs=[image_prompt],
    )

    generate_btn.click(
        get_seed,
        inputs=[randomize_seed, seed],
        outputs=[seed],
    ).then(
        image_to_3d,
        inputs=[image_prompt, seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps],
        outputs=[output_buf, video_output, model_output, download_full],
    ).then(
        lambda: (gr.Button(interactive=True), gr.Button(interactive=True), gr.Button(interactive=False)),
        outputs=[download_full, extract_reduced_btn, download_reduced],
    )

    extract_reduced_btn.click(
        extract_reduced_glb,
        inputs=[output_buf, mesh_simplify, texture_size],
        outputs=[model_output, download_reduced],
    ).then(
        lambda: gr.Button(interactive=True),
        outputs=[download_reduced],
    )

if __name__ == "__main__":
    # Set some CUDA memory management options
    torch.cuda.empty_cache()
    torch.backends.cudnn.benchmark = True
    
    # Initialize pipeline
    pipeline = TrellisImageTo3DPipeline.from_pretrained("JeffreyXiang/TRELLIS-image-large")
    pipeline.cuda()
    
    try:
        # Preload rembg with minimal memory usage
        test_img = np.zeros((256, 256, 3), dtype=np.uint8)  # Smaller test image
        pipeline.preprocess_image(Image.fromarray(test_img))
        del test_img
        torch.cuda.empty_cache()
    except:
        pass
    
    demo.launch()