cronos3k's picture
Update app.py
d635e38 verified
raw
history blame
12.1 kB
import gradio as gr
from gradio_litmodel3d import LitModel3D
import os
import shutil
os.environ['SPCONV_ALGO'] = 'native'
from typing import *
import torch
import numpy as np
import imageio
import uuid
from easydict import EasyDict as edict
from PIL import Image
from trellis.pipelines import TrellisImageTo3DPipeline
from trellis.representations import Gaussian, MeshExtractResult
from trellis.utils import render_utils, postprocessing_utils
MAX_SEED = np.iinfo(np.int32).max
TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
os.makedirs(TMP_DIR, exist_ok=True)
def start_session(req: gr.Request):
user_dir = os.path.join(TMP_DIR, str(req.session_hash))
print(f'Creating user directory: {user_dir}')
os.makedirs(user_dir, exist_ok=True)
def end_session(req: gr.Request):
user_dir = os.path.join(TMP_DIR, str(req.session_hash))
print(f'Removing user directory: {user_dir}')
shutil.rmtree(user_dir)
def preprocess_image(image: Image.Image) -> Tuple[str, Image.Image]:
processed_image = pipeline.preprocess_image(image)
return processed_image
def pack_state(gs: Gaussian, mesh: MeshExtractResult, trial_id: str) -> dict:
return {
'gaussian': {
**gs.init_params,
'_xyz': gs._xyz.cpu().numpy(),
'_features_dc': gs._features_dc.cpu().numpy(),
'_scaling': gs._scaling.cpu().numpy(),
'_rotation': gs._rotation.cpu().numpy(),
'_opacity': gs._opacity.cpu().numpy(),
},
'mesh': {
'vertices': mesh.vertices.cpu().numpy(),
'faces': mesh.faces.cpu().numpy(),
},
'trial_id': trial_id,
}
def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
gs = Gaussian(
aabb=state['gaussian']['aabb'],
sh_degree=state['gaussian']['sh_degree'],
mininum_kernel_size=state['gaussian']['mininum_kernel_size'],
scaling_bias=state['gaussian']['scaling_bias'],
opacity_bias=state['gaussian']['opacity_bias'],
scaling_activation=state['gaussian']['scaling_activation'],
)
gs._xyz = torch.tensor(state['gaussian']['_xyz'], device='cuda')
gs._features_dc = torch.tensor(state['gaussian']['_features_dc'], device='cuda')
gs._scaling = torch.tensor(state['gaussian']['_scaling'], device='cuda')
gs._rotation = torch.tensor(state['gaussian']['_rotation'], device='cuda')
gs._opacity = torch.tensor(state['gaussian']['_opacity'], device='cuda')
mesh = edict(
vertices=torch.tensor(state['mesh']['vertices'], device='cuda'),
faces=torch.tensor(state['mesh']['faces'], device='cuda'),
)
return gs, mesh, state['trial_id']
def get_seed(randomize_seed: bool, seed: int) -> int:
"""Get the random seed."""
return np.random.randint(0, MAX_SEED) if randomize_seed else seed
def image_to_3d(
image: Image.Image,
seed: int,
ss_guidance_strength: float,
ss_sampling_steps: int,
slat_guidance_strength: float,
slat_sampling_steps: int,
req: gr.Request,
progress: gr.Progress = gr.Progress()
) -> Tuple[dict, str, str, str]:
"""
Convert an image to a 3D model with improved memory management and progress tracking.
"""
user_dir = os.path.join(TMP_DIR, str(req.session_hash))
progress(0, desc="Initializing...")
# Clear CUDA cache before starting
torch.cuda.empty_cache()
try:
# Generate 3D model with progress updates
progress(0.1, desc="Running 3D generation pipeline...")
outputs = pipeline.run(
image,
seed=seed,
formats=["gaussian", "mesh"],
preprocess_image=False,
sparse_structure_sampler_params={
"steps": ss_sampling_steps,
"cfg_strength": ss_guidance_strength,
},
slat_sampler_params={
"steps": slat_sampling_steps,
"cfg_strength": slat_guidance_strength,
},
)
progress(0.4, desc="Generating video preview...")
# Generate video frames in batches to manage memory
batch_size = 30 # Process 30 frames at a time
num_frames = 120
video = []
video_geo = []
for i in range(0, num_frames, batch_size):
end_idx = min(i + batch_size, num_frames)
batch_frames = render_utils.render_video(
outputs['gaussian'][0],
num_frames=end_idx - i,
start_frame=i
)['color']
batch_geo = render_utils.render_video(
outputs['mesh'][0],
num_frames=end_idx - i,
start_frame=i
)['normal']
video.extend(batch_frames)
video_geo.extend(batch_geo)
# Clear cache after each batch
torch.cuda.empty_cache()
progress(0.4 + (0.3 * i / num_frames), desc=f"Rendering frames {i} to {end_idx}...")
# Combine video frames
video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
# Generate unique ID and save video
trial_id = str(uuid.uuid4())
video_path = os.path.join(user_dir, f"{trial_id}.mp4")
progress(0.7, desc="Saving video...")
imageio.mimsave(video_path, video, fps=15)
# Clear video data from memory
del video
del video_geo
torch.cuda.empty_cache()
# Generate and save full-quality GLB
progress(0.8, desc="Generating full-quality GLB...")
glb = postprocessing_utils.to_glb(
outputs['gaussian'][0],
outputs['mesh'][0],
simplify=0.0,
texture_size=2048,
verbose=False
)
glb_path = os.path.join(user_dir, f"{trial_id}_full.glb")
progress(0.9, desc="Saving GLB file...")
glb.export(glb_path)
# Pack state for reduced version
progress(0.95, desc="Finalizing...")
state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], trial_id)
# Final cleanup
torch.cuda.empty_cache()
progress(1.0, desc="Complete!")
return state, video_path, glb_path, glb_path
except Exception as e:
# Clean up on error
torch.cuda.empty_cache()
raise gr.Error(f"Processing failed: {str(e)}")
def extract_reduced_glb(
state: dict,
mesh_simplify: float,
texture_size: int,
req: gr.Request,
progress: gr.Progress = gr.Progress()
) -> Tuple[str, str]:
"""
Extract a reduced-quality GLB file with progress tracking.
"""
user_dir = os.path.join(TMP_DIR, str(req.session_hash))
try:
progress(0.1, desc="Unpacking model state...")
gs, mesh, trial_id = unpack_state(state)
progress(0.3, desc="Generating reduced GLB...")
glb = postprocessing_utils.to_glb(
gs, mesh,
simplify=mesh_simplify,
texture_size=texture_size,
verbose=False
)
progress(0.8, desc="Saving reduced GLB...")
glb_path = os.path.join(user_dir, f"{trial_id}_reduced.glb")
glb.export(glb_path)
progress(0.9, desc="Cleaning up...")
torch.cuda.empty_cache()
progress(1.0, desc="Complete!")
return glb_path, glb_path
except Exception as e:
torch.cuda.empty_cache()
raise gr.Error(f"GLB reduction failed: {str(e)}")
with gr.Blocks(delete_cache=(600, 600)) as demo:
gr.Markdown("""
## Image to 3D Asset with [TRELLIS](https://trellis3d.github.io/)
* Upload an image and click "Generate" to create a 3D model
* You can download either:
* The full-quality GLB file (larger size, highest quality)
* A reduced version with customizable quality settings (smaller size)
""")
with gr.Row():
with gr.Column():
image_prompt = gr.Image(label="Image Prompt", format="png", image_mode="RGBA", type="pil", height=300)
with gr.Accordion(label="Generation Settings", open=False):
seed = gr.Slider(0, MAX_SEED, label="Seed", value=0, step=1)
randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
gr.Markdown("Stage 1: Sparse Structure Generation")
with gr.Row():
ss_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
ss_sampling_steps = gr.Slider(1, 500, label="Sampling Steps", value=12, step=1)
gr.Markdown("Stage 2: Structured Latent Generation")
with gr.Row():
slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
slat_sampling_steps = gr.Slider(1, 500, label="Sampling Steps", value=12, step=1)
generate_btn = gr.Button("Generate")
with gr.Accordion(label="Reduced GLB Settings", open=False):
mesh_simplify = gr.Slider(0.0, 0.98, label="Mesh Simplification", value=0.95, step=0.01)
texture_size = gr.Slider(512, 2048, label="Texture Size", value=1024, step=512)
extract_reduced_btn = gr.Button("Extract Reduced GLB", interactive=False)
with gr.Column():
video_output = gr.Video(label="Generated 3D Asset Preview", autoplay=True, loop=True, height=300)
model_output = LitModel3D(label="3D Model Preview", exposure=20.0, height=300)
gr.Markdown("### Download Options")
with gr.Row():
download_full = gr.DownloadButton(label="Download Full-Quality GLB", interactive=False)
download_reduced = gr.DownloadButton(label="Download Reduced GLB", interactive=False)
output_buf = gr.State()
# Example images
with gr.Row():
examples = gr.Examples(
examples=[
f'assets/example_image/{image}'
for image in os.listdir("assets/example_image")
],
inputs=[image_prompt],
fn=preprocess_image,
outputs=[image_prompt],
run_on_click=True,
examples_per_page=64,
)
# Event handlers
demo.load(start_session)
demo.unload(end_session)
image_prompt.upload(
preprocess_image,
inputs=[image_prompt],
outputs=[image_prompt],
)
generate_btn.click(
get_seed,
inputs=[randomize_seed, seed],
outputs=[seed],
).then(
image_to_3d,
inputs=[image_prompt, seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps],
outputs=[output_buf, video_output, model_output, download_full],
).then(
lambda: (gr.Button(interactive=True), gr.Button(interactive=True), gr.Button(interactive=False)),
outputs=[download_full, extract_reduced_btn, download_reduced],
)
extract_reduced_btn.click(
extract_reduced_glb,
inputs=[output_buf, mesh_simplify, texture_size],
outputs=[model_output, download_reduced],
).then(
lambda: gr.Button(interactive=True),
outputs=[download_reduced],
)
if __name__ == "__main__":
# Set some CUDA memory management options
torch.cuda.empty_cache()
torch.backends.cudnn.benchmark = True
# Initialize pipeline
pipeline = TrellisImageTo3DPipeline.from_pretrained("JeffreyXiang/TRELLIS-image-large")
pipeline.cuda()
try:
# Preload rembg with minimal memory usage
test_img = np.zeros((256, 256, 3), dtype=np.uint8) # Smaller test image
pipeline.preprocess_image(Image.fromarray(test_img))
del test_img
torch.cuda.empty_cache()
except:
pass
demo.launch()