Spaces:

VAST-AI
/

SeqTex

Running on Zero

File size: 11,778 Bytes

1d5bb62

import numpy as np
import torch
from einops import rearrange
from PIL import Image
from utils.image_generation import generate_image_condition
from utils.mesh_utils import Mesh
from utils.render_utils import render_views
from utils.texture_generation import generate_texture

import gradio as gr
from gradio_litmodel3d import LitModel3D

EXAMPLES = [
    ["examples/birdhouse.glb", True, False, False, False, 42, "First View", "SDXL", False, "A rustic birdhouse featuring a snow-covered roof, wood textures, and two decorative cardinal birds. It has a circular entryway and conveys a winter-themed aesthetic."],
    ["examples/mario.glb", False, False, False, True, 6666, "Third View", "FLUX", True, "Mario, a cartoon character wearing a red cap and blue overalls, with brown hair and a mustache, and white gloves, in a fighting pose. The clothes he wears are not in a reflection mode."],
]

def tensor_to_pil(tensor, mask=None, normalize: bool = True):
    """
    Convert tensor to PIL Image.
    :param tensor: torch.Tensor, shape can be (Nv, H, W, C), (Nv, C, H, W), (H, W, C), (C, H, W)
    :param mask: torch.Tensor, shape same as tensor, effective when C=3
    :return: PIL.Image
    """
    # Move to cpu
    tensor = tensor.detach()
    if tensor.is_cuda:
        tensor = tensor.cpu()
    if mask is not None and mask.is_cuda:
        mask = mask.cpu()

    # Convert to float32
    tensor = tensor.float()
    if mask is not None:
        mask = mask.float()

    if normalize:
        tensor = (tensor + 1.0) / 2.0
    tensor = torch.clamp(tensor, 0.0, 1.0)
    if mask is not None:
        if mask.shape[-1] not in [1, 3]:
            mask = mask.unsqueeze(-1)
        tensor = torch.cat([tensor, mask], dim=-1)

    shape = tensor.shape
    # 4D: (Nv, H, W, C) or (Nv, C, H, W)
    if len(shape) == 4:
        Nv = shape[0]
        if shape[-1] in [3, 4]:  # (Nv, H, W, C)
            tensor = rearrange(tensor, 'nv h w c -> h (nv w) c')
        else:  # (Nv, C, H, W)
            tensor = rearrange(tensor, 'nv c h w -> h (nv w) c')
    # 3D: (H, W, C) or (C, H, W)
    elif len(shape) == 3:
        if shape[-1] in [3, 4]:  # (H, W, C)
            tensor = rearrange(tensor, 'h w c -> h w c')
        else:  # (C, H, W)
            tensor = rearrange(tensor, 'c h w -> h w c')
    else:
        raise ValueError(f"Unsupported tensor shape: {shape}")

    # Convert to numpy
    np_img = (tensor.numpy() * 255).round().astype(np.uint8)

    # Create PIL Image
    if np_img.shape[2] == 3:
        return Image.fromarray(np_img, mode="RGB")
    elif np_img.shape[2] == 4:
        return Image.fromarray(np_img, mode="RGBA")
    else:
        raise ValueError("Only support 3 or 4 channel images.")
    
if __name__ == '__main__':
    with gr.Blocks() as demo:
        gr.Markdown("# 🎨 SeqTex: Generate Mesh Textures in Video Sequence")
        
        gr.Markdown("""
        ## 🚀 Welcome to SeqTex!
        **SeqTex** is a cutting-edge AI system that generates high-quality textures for 3D meshes using image prompts (here we use image generator to get them from textual prompts). 
        
        Choose to either **try our example models** below or **upload your own 3D mesh** to create stunning textures.
        """)

        gr.Markdown("---")

        gr.Markdown("## 🔧 Step 1: Upload & Process 3D Mesh")
        gr.Markdown("""
        **📋 How to prepare your 3D mesh:**
        - Upload your 3D mesh in **.obj** or **.glb** format
        - **💡 Pro Tip**: 
          - For optimal results, ensure your mesh includes only one part with <span style="color:#e74c3c; font-weight:bold;">UV parameterization</span>
          - Otherwise, we'll combine all parts and generate UV parameterization using *xAtlas* (may take longer for high-poly meshes; may also fail for certain meshes)
        - **⚠️ Important**: We recommend adjusting your model using *Mesh Orientation Adjustments* to be **Z-UP oriented** for best results
        """)
        position_map_tensor, normal_map_tensor, position_images_tensor, normal_images_tensor, mask_images_tensor, w2cs, mesh, mvp_matrix = gr.State(), gr.State(), gr.State(), gr.State(), gr.State(), gr.State(), gr.State(), gr.State()

        # fixed_texture_map = Image.open("image.webp").convert("RGB")
        # Step 1
        with gr.Row():
            with gr.Column():
                mesh_upload = gr.File(label="📁 Upload 3D Mesh", file_types=[".obj", ".glb"])
                # uv_tool = gr.Radio(["xAtlas", "UVAtlas"], label="UV parameterizer", value="xAtlas")
                
                gr.Markdown("**🔄 Mesh Orientation Adjustments** (if needed):")
                y2z = gr.Checkbox(label="Y → Z Transform", value=False, info="Rotate: Y becomes Z, -Z becomes Y")
                y2x = gr.Checkbox(label="Y → X Transform", value=False, info="Rotate: Y becomes X, -X becomes Y")
                z2x = gr.Checkbox(label="Z → X Transform", value=False, info="Rotate: Z becomes X, -X becomes Z")
                upside_down = gr.Checkbox(label="🔃 Flip Vertically", value=False, info="Fix upside-down mesh orientation")
    
            with gr.Column():
                step1_button = gr.Button("🔄 Process Mesh & Generate Views", variant="primary")
                step1_progress = gr.Textbox(label="📊 Processing Status", interactive=False)
                model_input = gr.Model3D(label="📐 Processed 3D Model", height=500)

        with gr.Row(equal_height=True):
            rgb_views = gr.Image(label="📷 Generated Views (Front, Back, Left, Right)", type="pil", scale=3)
            position_map = gr.Image(label="🗺️ Position Map", type="pil", scale=1)
            normal_map = gr.Image(label="🧭 Normal Map", type="pil", scale=1)

        step1_button.click(
            Mesh.process,
            inputs=[mesh_upload, gr.State("xAtlas"), y2z, y2x, z2x, upside_down],
            outputs=[position_map_tensor, normal_map_tensor, position_images_tensor, normal_images_tensor, mask_images_tensor, w2cs, mesh, mvp_matrix, step1_progress]
        ).then(
            tensor_to_pil,
            inputs=[normal_images_tensor, mask_images_tensor],
            outputs=[rgb_views]
        ).then(
            tensor_to_pil,
            inputs=[position_map_tensor],
            outputs=[position_map]
        ).then(
            tensor_to_pil,
            inputs=[normal_map_tensor],
            outputs=[normal_map]
        ).then(
            Mesh.export,
            inputs=[mesh],
            outputs=[model_input]
        )

        # Step 2
        gr.Markdown("---")
        gr.Markdown("## 👁️ Step 2: Select View & Generate Image Condition")
        gr.Markdown("""
        **📋 How to generate image condition:**
        - Your mesh will be rendered from **four viewpoints** (front, back, left, right)
        - Choose **one view** as your image condition
        - Enter a **descriptive text prompt** for the desired texture
        - Select your preferred AI model:
          - <span style="color:#27ae60; font-weight:bold;">🎯 SDXL</span>: Fast generation with depth + normal control, better details
          - <span style="color:#3498db; font-weight:bold;">⚡ FLUX</span>: High-quality generation with depth control (slower due to CPU offloading). Better work with **Edge Refinement**
        """)
        with gr.Row():
            with gr.Column():
                img_condition_seed = gr.Number(label="🎲 Random Seed", minimum=0, maximum=9999, step=1, value=42, info="Change for different results")
                selected_view = gr.Radio(["First View", "Second View", "Third View", "Fourth View"], label="📐 Camera View", value="First View", info="Choose which viewpoint to use as reference")
                with gr.Row():
                    model_choice = gr.Radio(["SDXL", "FLUX"], label="🤖 AI Model", value="SDXL", info="SDXL: Fast, depth+normal control | FLUX: High-quality, slower processing")
                    edge_refinement = gr.Checkbox(label="✨ Edge Refinement", value=True, info="Smooth boundary artifacts (recommended for cleaner results)")
                text_prompt = gr.Textbox(label="💬 Texture Description", placeholder="Describe the desired texture appearance (e.g., 'rustic wooden surface with weathered paint')", lines=2)
                step2_button = gr.Button("🎯 Generate Image Condition", variant="primary")
                step2_progress = gr.Textbox(label="📊 Generation Status", interactive=False)
                
            with gr.Column():
                condition_image = gr.Image(label="🖼️ Generated Image Condition", type="pil") # , interactive=False

        step2_button.click(
            generate_image_condition,
            inputs=[position_images_tensor, normal_images_tensor, mask_images_tensor, w2cs, text_prompt, selected_view, img_condition_seed, model_choice, edge_refinement],
            outputs=[condition_image, step2_progress],
            concurrency_id="gpu_intensive"
        )

        # Step 3
        gr.Markdown("---")
        gr.Markdown("## 🎨 Step 3: Generate Final Texture")
        gr.Markdown("""
        **📋 How to generate final texture:**
        - The **SeqTex pipeline** will create a complete texture map for your model
        - View the results from multiple angles and download your textured 3D model (the viewport is a little bit dark)
        """)
        texture_map_tensor, mv_out_tensor = gr.State(), gr.State()
        with gr.Row():
            with gr.Column(scale=1):
                step3_button = gr.Button("🎨 Generate Final Texture", variant="primary")
                step3_progress = gr.Textbox(label="📊 Texture Generation Status", interactive=False)
                texture_map = gr.Image(label="🏆 Generated Texture Map", interactive=False)
            with gr.Column(scale=2):
                rendered_imgs = gr.Image(label="🖼️ Final Rendered Views")
                mv_branch_imgs = gr.Image(label="🖼️ SeqTex Direct Output")
            with gr.Column(scale=1.5):
                # model_display = gr.Model3D(label="🏆 Final Textured Model", height=500)
                model_display = LitModel3D(label="Model with Texture", 
                                           exposure=30.0, 
                                           height=500)

        step3_button.click(
            generate_texture,  
            inputs=[position_map_tensor, normal_map_tensor, position_images_tensor, normal_images_tensor, condition_image, text_prompt, selected_view],
            outputs=[texture_map_tensor, mv_out_tensor, step3_progress],
            concurrency_id="gpu_intensive"
        ).then(
            tensor_to_pil,
            inputs=[texture_map_tensor, gr.State(None), gr.State(False)],
            outputs=[texture_map]
        ).then(
            tensor_to_pil,
            inputs=[mv_out_tensor, gr.State(None), gr.State(False)],
            outputs=[mv_branch_imgs]
        ).then(
            render_views,
            inputs=[mesh, texture_map_tensor, mvp_matrix],
            outputs=[rendered_imgs]
        ).then(
            Mesh.export,
            inputs=[mesh, gr.State(None), texture_map],
            outputs=[model_display]
        )

        # Add example inputs for user convenience
        gr.Markdown("---")
        gr.Markdown("## 🚀 Try Our Examples")
        gr.Markdown("**Quick Start**: Click on any example below to see SeqTex in action with pre-configured settings!")
        gr.Examples(
            examples=EXAMPLES,
            inputs=[mesh_upload, y2z, y2x, z2x, upside_down, img_condition_seed, selected_view, model_choice, edge_refinement, text_prompt],
            cache_examples=False
        )

    demo.launch(server_name="0.0.0.0", server_port=52424)