import numpy as np import torch from einops import rearrange from PIL import Image from utils.image_generation import generate_image_condition from utils.mesh_utils import Mesh from utils.render_utils import render_views from utils.texture_generation import generate_texture import gradio as gr from gradio_litmodel3d import LitModel3D EXAMPLES = [ ["examples/birdhouse.glb", True, False, False, False, 42, "First View", "SDXL", False, "A rustic birdhouse featuring a snow-covered roof, wood textures, and two decorative cardinal birds. It has a circular entryway and conveys a winter-themed aesthetic."], ["examples/mario.glb", False, False, False, True, 6666, "Third View", "FLUX", True, "Mario, a cartoon character wearing a red cap and blue overalls, with brown hair and a mustache, and white gloves, in a fighting pose. The clothes he wears are not in a reflection mode."], ] def tensor_to_pil(tensor, mask=None, normalize: bool = True): """ Convert tensor to PIL Image. :param tensor: torch.Tensor, shape can be (Nv, H, W, C), (Nv, C, H, W), (H, W, C), (C, H, W) :param mask: torch.Tensor, shape same as tensor, effective when C=3 :return: PIL.Image """ # Move to cpu tensor = tensor.detach() if tensor.is_cuda: tensor = tensor.cpu() if mask is not None and mask.is_cuda: mask = mask.cpu() # Convert to float32 tensor = tensor.float() if mask is not None: mask = mask.float() if normalize: tensor = (tensor + 1.0) / 2.0 tensor = torch.clamp(tensor, 0.0, 1.0) if mask is not None: if mask.shape[-1] not in [1, 3]: mask = mask.unsqueeze(-1) tensor = torch.cat([tensor, mask], dim=-1) shape = tensor.shape # 4D: (Nv, H, W, C) or (Nv, C, H, W) if len(shape) == 4: Nv = shape[0] if shape[-1] in [3, 4]: # (Nv, H, W, C) tensor = rearrange(tensor, 'nv h w c -> h (nv w) c') else: # (Nv, C, H, W) tensor = rearrange(tensor, 'nv c h w -> h (nv w) c') # 3D: (H, W, C) or (C, H, W) elif len(shape) == 3: if shape[-1] in [3, 4]: # (H, W, C) tensor = rearrange(tensor, 'h w c -> h w c') else: # (C, H, W) tensor = rearrange(tensor, 'c h w -> h w c') else: raise ValueError(f"Unsupported tensor shape: {shape}") # Convert to numpy np_img = (tensor.numpy() * 255).round().astype(np.uint8) # Create PIL Image if np_img.shape[2] == 3: return Image.fromarray(np_img, mode="RGB") elif np_img.shape[2] == 4: return Image.fromarray(np_img, mode="RGBA") else: raise ValueError("Only support 3 or 4 channel images.") if __name__ == '__main__': with gr.Blocks() as demo: gr.Markdown("# 🎨 SeqTex: Generate Mesh Textures in Video Sequence") gr.Markdown(""" ## πŸš€ Welcome to SeqTex! **SeqTex** is a cutting-edge AI system that generates high-quality textures for 3D meshes using image prompts (here we use image generator to get them from textual prompts). Choose to either **try our example models** below or **upload your own 3D mesh** to create stunning textures. """) gr.Markdown("---") gr.Markdown("## πŸ”§ Step 1: Upload & Process 3D Mesh") gr.Markdown(""" **πŸ“‹ How to prepare your 3D mesh:** - Upload your 3D mesh in **.obj** or **.glb** format - **πŸ’‘ Pro Tip**: - For optimal results, ensure your mesh includes only one part with UV parameterization - Otherwise, we'll combine all parts and generate UV parameterization using *xAtlas* (may take longer for high-poly meshes; may also fail for certain meshes) - **⚠️ Important**: We recommend adjusting your model using *Mesh Orientation Adjustments* to be **Z-UP oriented** for best results """) position_map_tensor, normal_map_tensor, position_images_tensor, normal_images_tensor, mask_images_tensor, w2cs, mesh, mvp_matrix = gr.State(), gr.State(), gr.State(), gr.State(), gr.State(), gr.State(), gr.State(), gr.State() # fixed_texture_map = Image.open("image.webp").convert("RGB") # Step 1 with gr.Row(): with gr.Column(): mesh_upload = gr.File(label="πŸ“ Upload 3D Mesh", file_types=[".obj", ".glb"]) # uv_tool = gr.Radio(["xAtlas", "UVAtlas"], label="UV parameterizer", value="xAtlas") gr.Markdown("**πŸ”„ Mesh Orientation Adjustments** (if needed):") y2z = gr.Checkbox(label="Y β†’ Z Transform", value=False, info="Rotate: Y becomes Z, -Z becomes Y") y2x = gr.Checkbox(label="Y β†’ X Transform", value=False, info="Rotate: Y becomes X, -X becomes Y") z2x = gr.Checkbox(label="Z β†’ X Transform", value=False, info="Rotate: Z becomes X, -X becomes Z") upside_down = gr.Checkbox(label="πŸ”ƒ Flip Vertically", value=False, info="Fix upside-down mesh orientation") with gr.Column(): step1_button = gr.Button("πŸ”„ Process Mesh & Generate Views", variant="primary") step1_progress = gr.Textbox(label="πŸ“Š Processing Status", interactive=False) model_input = gr.Model3D(label="πŸ“ Processed 3D Model", height=500) with gr.Row(equal_height=True): rgb_views = gr.Image(label="πŸ“· Generated Views (Front, Back, Left, Right)", type="pil", scale=3) position_map = gr.Image(label="πŸ—ΊοΈ Position Map", type="pil", scale=1) normal_map = gr.Image(label="🧭 Normal Map", type="pil", scale=1) step1_button.click( Mesh.process, inputs=[mesh_upload, gr.State("xAtlas"), y2z, y2x, z2x, upside_down], outputs=[position_map_tensor, normal_map_tensor, position_images_tensor, normal_images_tensor, mask_images_tensor, w2cs, mesh, mvp_matrix, step1_progress] ).then( tensor_to_pil, inputs=[normal_images_tensor, mask_images_tensor], outputs=[rgb_views] ).then( tensor_to_pil, inputs=[position_map_tensor], outputs=[position_map] ).then( tensor_to_pil, inputs=[normal_map_tensor], outputs=[normal_map] ).then( Mesh.export, inputs=[mesh], outputs=[model_input] ) # Step 2 gr.Markdown("---") gr.Markdown("## πŸ‘οΈ Step 2: Select View & Generate Image Condition") gr.Markdown(""" **πŸ“‹ How to generate image condition:** - Your mesh will be rendered from **four viewpoints** (front, back, left, right) - Choose **one view** as your image condition - Enter a **descriptive text prompt** for the desired texture - Select your preferred AI model: - 🎯 SDXL: Fast generation with depth + normal control, better details - ⚑ FLUX: High-quality generation with depth control (slower due to CPU offloading). Better work with **Edge Refinement** """) with gr.Row(): with gr.Column(): img_condition_seed = gr.Number(label="🎲 Random Seed", minimum=0, maximum=9999, step=1, value=42, info="Change for different results") selected_view = gr.Radio(["First View", "Second View", "Third View", "Fourth View"], label="πŸ“ Camera View", value="First View", info="Choose which viewpoint to use as reference") with gr.Row(): model_choice = gr.Radio(["SDXL", "FLUX"], label="πŸ€– AI Model", value="SDXL", info="SDXL: Fast, depth+normal control | FLUX: High-quality, slower processing") edge_refinement = gr.Checkbox(label="✨ Edge Refinement", value=True, info="Smooth boundary artifacts (recommended for cleaner results)") text_prompt = gr.Textbox(label="πŸ’¬ Texture Description", placeholder="Describe the desired texture appearance (e.g., 'rustic wooden surface with weathered paint')", lines=2) step2_button = gr.Button("🎯 Generate Image Condition", variant="primary") step2_progress = gr.Textbox(label="πŸ“Š Generation Status", interactive=False) with gr.Column(): condition_image = gr.Image(label="πŸ–ΌοΈ Generated Image Condition", type="pil") # , interactive=False step2_button.click( generate_image_condition, inputs=[position_images_tensor, normal_images_tensor, mask_images_tensor, w2cs, text_prompt, selected_view, img_condition_seed, model_choice, edge_refinement], outputs=[condition_image, step2_progress], concurrency_id="gpu_intensive" ) # Step 3 gr.Markdown("---") gr.Markdown("## 🎨 Step 3: Generate Final Texture") gr.Markdown(""" **πŸ“‹ How to generate final texture:** - The **SeqTex pipeline** will create a complete texture map for your model - View the results from multiple angles and download your textured 3D model (the viewport is a little bit dark) """) texture_map_tensor, mv_out_tensor = gr.State(), gr.State() with gr.Row(): with gr.Column(scale=1): step3_button = gr.Button("🎨 Generate Final Texture", variant="primary") step3_progress = gr.Textbox(label="πŸ“Š Texture Generation Status", interactive=False) texture_map = gr.Image(label="πŸ† Generated Texture Map", interactive=False) with gr.Column(scale=2): rendered_imgs = gr.Image(label="πŸ–ΌοΈ Final Rendered Views") mv_branch_imgs = gr.Image(label="πŸ–ΌοΈ SeqTex Direct Output") with gr.Column(scale=1.5): # model_display = gr.Model3D(label="πŸ† Final Textured Model", height=500) model_display = LitModel3D(label="Model with Texture", exposure=30.0, height=500) step3_button.click( generate_texture, inputs=[position_map_tensor, normal_map_tensor, position_images_tensor, normal_images_tensor, condition_image, text_prompt, selected_view], outputs=[texture_map_tensor, mv_out_tensor, step3_progress], concurrency_id="gpu_intensive" ).then( tensor_to_pil, inputs=[texture_map_tensor, gr.State(None), gr.State(False)], outputs=[texture_map] ).then( tensor_to_pil, inputs=[mv_out_tensor, gr.State(None), gr.State(False)], outputs=[mv_branch_imgs] ).then( render_views, inputs=[mesh, texture_map_tensor, mvp_matrix], outputs=[rendered_imgs] ).then( Mesh.export, inputs=[mesh, gr.State(None), texture_map], outputs=[model_display] ) # Add example inputs for user convenience gr.Markdown("---") gr.Markdown("## πŸš€ Try Our Examples") gr.Markdown("**Quick Start**: Click on any example below to see SeqTex in action with pre-configured settings!") gr.Examples( examples=EXAMPLES, inputs=[mesh_upload, y2z, y2x, z2x, upside_down, img_condition_seed, selected_view, model_choice, edge_refinement, text_prompt], cache_examples=False ) demo.launch(server_name="0.0.0.0", server_port=52424)