Spaces:

cronos3k
/

TRELLIS-Download_Raw_Mesh_Function

Runtime error

File size: 12,054 Bytes

import gradio as gr
from gradio_litmodel3d import LitModel3D

import os
import shutil
os.environ['SPCONV_ALGO'] = 'native'
from typing import *
import torch
import numpy as np
import imageio
import uuid
from easydict import EasyDict as edict
from PIL import Image
from trellis.pipelines import TrellisImageTo3DPipeline
from trellis.representations import Gaussian, MeshExtractResult
from trellis.utils import render_utils, postprocessing_utils


MAX_SEED = np.iinfo(np.int32).max
TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
os.makedirs(TMP_DIR, exist_ok=True)


def start_session(req: gr.Request):
    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
    print(f'Creating user directory: {user_dir}')
    os.makedirs(user_dir, exist_ok=True)
    
def end_session(req: gr.Request):
    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
    print(f'Removing user directory: {user_dir}')
    shutil.rmtree(user_dir)

def preprocess_image(image: Image.Image) -> Tuple[str, Image.Image]:
    processed_image = pipeline.preprocess_image(image)
    return processed_image

def pack_state(gs: Gaussian, mesh: MeshExtractResult, trial_id: str) -> dict:
    return {
        'gaussian': {
            **gs.init_params,
            '_xyz': gs._xyz.cpu().numpy(),
            '_features_dc': gs._features_dc.cpu().numpy(),
            '_scaling': gs._scaling.cpu().numpy(),
            '_rotation': gs._rotation.cpu().numpy(),
            '_opacity': gs._opacity.cpu().numpy(),
        },
        'mesh': {
            'vertices': mesh.vertices.cpu().numpy(),
            'faces': mesh.faces.cpu().numpy(),
        },
        'trial_id': trial_id,
    }
    
def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
    gs = Gaussian(
        aabb=state['gaussian']['aabb'],
        sh_degree=state['gaussian']['sh_degree'],
        mininum_kernel_size=state['gaussian']['mininum_kernel_size'],
        scaling_bias=state['gaussian']['scaling_bias'],
        opacity_bias=state['gaussian']['opacity_bias'],
        scaling_activation=state['gaussian']['scaling_activation'],
    )
    gs._xyz = torch.tensor(state['gaussian']['_xyz'], device='cuda')
    gs._features_dc = torch.tensor(state['gaussian']['_features_dc'], device='cuda')
    gs._scaling = torch.tensor(state['gaussian']['_scaling'], device='cuda')
    gs._rotation = torch.tensor(state['gaussian']['_rotation'], device='cuda')
    gs._opacity = torch.tensor(state['gaussian']['_opacity'], device='cuda')
    
    mesh = edict(
        vertices=torch.tensor(state['mesh']['vertices'], device='cuda'),
        faces=torch.tensor(state['mesh']['faces'], device='cuda'),
    )
    
    return gs, mesh, state['trial_id']

def get_seed(randomize_seed: bool, seed: int) -> int:
    """Get the random seed."""
    return np.random.randint(0, MAX_SEED) if randomize_seed else seed

def image_to_3d(
    image: Image.Image,
    seed: int,
    ss_guidance_strength: float,
    ss_sampling_steps: int,
    slat_guidance_strength: float,
    slat_sampling_steps: int,
    req: gr.Request,
    progress: gr.Progress = gr.Progress()
) -> Tuple[dict, str, str, str]:
    """
    Convert an image to a 3D model with improved memory management and progress tracking.
    """
    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
    progress(0, desc="Initializing...")
    
    # Clear CUDA cache before starting
    torch.cuda.empty_cache()
    
    try:
        # Generate 3D model with progress updates
        progress(0.1, desc="Running 3D generation pipeline...")
        outputs = pipeline.run(
            image,
            seed=seed,
            formats=["gaussian", "mesh"],
            preprocess_image=False,
            sparse_structure_sampler_params={
                "steps": ss_sampling_steps,
                "cfg_strength": ss_guidance_strength,
            },
            slat_sampler_params={
                "steps": slat_sampling_steps,
                "cfg_strength": slat_guidance_strength,
            },
        )
        
        progress(0.4, desc="Generating video preview...")
        # Generate video frames in batches to manage memory
        batch_size = 30  # Process 30 frames at a time
        num_frames = 120
        video = []
        video_geo = []
        
        for i in range(0, num_frames, batch_size):
            end_idx = min(i + batch_size, num_frames)
            batch_frames = render_utils.render_video(
                outputs['gaussian'][0], 
                num_frames=end_idx - i, 
                start_frame=i
            )['color']
            batch_geo = render_utils.render_video(
                outputs['mesh'][0], 
                num_frames=end_idx - i,
                start_frame=i
            )['normal']
            
            video.extend(batch_frames)
            video_geo.extend(batch_geo)
            
            # Clear cache after each batch
            torch.cuda.empty_cache()
            progress(0.4 + (0.3 * i / num_frames), desc=f"Rendering frames {i} to {end_idx}...")
        
        # Combine video frames
        video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
        
        # Generate unique ID and save video
        trial_id = str(uuid.uuid4())
        video_path = os.path.join(user_dir, f"{trial_id}.mp4")
        progress(0.7, desc="Saving video...")
        imageio.mimsave(video_path, video, fps=15)
        
        # Clear video data from memory
        del video
        del video_geo
        torch.cuda.empty_cache()
        
        # Generate and save full-quality GLB
        progress(0.8, desc="Generating full-quality GLB...")
        glb = postprocessing_utils.to_glb(
            outputs['gaussian'][0], 
            outputs['mesh'][0],
            simplify=0.0,
            texture_size=2048,
            verbose=False
        )
        glb_path = os.path.join(user_dir, f"{trial_id}_full.glb")
        progress(0.9, desc="Saving GLB file...")
        glb.export(glb_path)
        
        # Pack state for reduced version
        progress(0.95, desc="Finalizing...")
        state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], trial_id)
        
        # Final cleanup
        torch.cuda.empty_cache()
        progress(1.0, desc="Complete!")
        
        return state, video_path, glb_path, glb_path
        
    except Exception as e:
        # Clean up on error
        torch.cuda.empty_cache()
        raise gr.Error(f"Processing failed: {str(e)}")

def extract_reduced_glb(
    state: dict,
    mesh_simplify: float,
    texture_size: int,
    req: gr.Request,
    progress: gr.Progress = gr.Progress()
) -> Tuple[str, str]:
    """
    Extract a reduced-quality GLB file with progress tracking.
    """
    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
    
    try:
        progress(0.1, desc="Unpacking model state...")
        gs, mesh, trial_id = unpack_state(state)
        
        progress(0.3, desc="Generating reduced GLB...")
        glb = postprocessing_utils.to_glb(
            gs, mesh,
            simplify=mesh_simplify,
            texture_size=texture_size,
            verbose=False
        )
        
        progress(0.8, desc="Saving reduced GLB...")
        glb_path = os.path.join(user_dir, f"{trial_id}_reduced.glb")
        glb.export(glb_path)
        
        progress(0.9, desc="Cleaning up...")
        torch.cuda.empty_cache()
        
        progress(1.0, desc="Complete!")
        return glb_path, glb_path
        
    except Exception as e:
        torch.cuda.empty_cache()
        raise gr.Error(f"GLB reduction failed: {str(e)}")

with gr.Blocks(delete_cache=(600, 600)) as demo:
    gr.Markdown("""
    ## Image to 3D Asset with [TRELLIS](https://trellis3d.github.io/)
    * Upload an image and click "Generate" to create a 3D model
    * You can download either:
        * The full-quality GLB file (larger size, highest quality)
        * A reduced version with customizable quality settings (smaller size)
    """)
    
    with gr.Row():
        with gr.Column():
            image_prompt = gr.Image(label="Image Prompt", format="png", image_mode="RGBA", type="pil", height=300)
            
            with gr.Accordion(label="Generation Settings", open=False):
                seed = gr.Slider(0, MAX_SEED, label="Seed", value=0, step=1)
                randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
                gr.Markdown("Stage 1: Sparse Structure Generation")
                with gr.Row():
                    ss_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
                    ss_sampling_steps = gr.Slider(1, 500, label="Sampling Steps", value=12, step=1)
                gr.Markdown("Stage 2: Structured Latent Generation")
                with gr.Row():
                    slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
                    slat_sampling_steps = gr.Slider(1, 500, label="Sampling Steps", value=12, step=1)

            generate_btn = gr.Button("Generate")
            
            with gr.Accordion(label="Reduced GLB Settings", open=False):
                mesh_simplify = gr.Slider(0.0, 0.98, label="Mesh Simplification", value=0.95, step=0.01)
                texture_size = gr.Slider(512, 2048, label="Texture Size", value=1024, step=512)
            
            extract_reduced_btn = gr.Button("Extract Reduced GLB", interactive=False)

        with gr.Column():
            video_output = gr.Video(label="Generated 3D Asset Preview", autoplay=True, loop=True, height=300)
            model_output = LitModel3D(label="3D Model Preview", exposure=20.0, height=300)
            gr.Markdown("### Download Options")
            with gr.Row():
                download_full = gr.DownloadButton(label="Download Full-Quality GLB", interactive=False)
                download_reduced = gr.DownloadButton(label="Download Reduced GLB", interactive=False)
            
    output_buf = gr.State()

    # Example images
    with gr.Row():
        examples = gr.Examples(
            examples=[
                f'assets/example_image/{image}'
                for image in os.listdir("assets/example_image")
            ],
            inputs=[image_prompt],
            fn=preprocess_image,
            outputs=[image_prompt],
            run_on_click=True,
            examples_per_page=64,
        )

    # Event handlers
    demo.load(start_session)
    demo.unload(end_session)
    
    image_prompt.upload(
        preprocess_image,
        inputs=[image_prompt],
        outputs=[image_prompt],
    )

    generate_btn.click(
        get_seed,
        inputs=[randomize_seed, seed],
        outputs=[seed],
    ).then(
        image_to_3d,
        inputs=[image_prompt, seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps],
        outputs=[output_buf, video_output, model_output, download_full],
    ).then(
        lambda: (gr.Button(interactive=True), gr.Button(interactive=True), gr.Button(interactive=False)),
        outputs=[download_full, extract_reduced_btn, download_reduced],
    )

    extract_reduced_btn.click(
        extract_reduced_glb,
        inputs=[output_buf, mesh_simplify, texture_size],
        outputs=[model_output, download_reduced],
    ).then(
        lambda: gr.Button(interactive=True),
        outputs=[download_reduced],
    )

if __name__ == "__main__":
    # Set some CUDA memory management options
    torch.cuda.empty_cache()
    torch.backends.cudnn.benchmark = True
    
    # Initialize pipeline
    pipeline = TrellisImageTo3DPipeline.from_pretrained("JeffreyXiang/TRELLIS-image-large")
    pipeline.cuda()
    
    try:
        # Preload rembg with minimal memory usage
        test_img = np.zeros((256, 256, 3), dtype=np.uint8)  # Smaller test image
        pipeline.preprocess_image(Image.fromarray(test_img))
        del test_img
        torch.cuda.empty_cache()
    except:
        pass
    
    demo.launch()