Spaces:

stepfun-ai
/

Step1X-3D

Running

App Files Files Community

ReubenSun commited on May 29

Commit

55f226f

1 Parent(s): bc373eb

texture sync

Browse files

Files changed (11) hide show

step1x3d_texture/pipelines/ig2mv_sdxl_pipeline.py +131 -3
step1x3d_texture/pipelines/step1x_3d_texture_synthesis_pipeline.py +17 -8
step1x3d_texture/renderer/geometry.py +0 -151
step1x3d_texture/renderer/project.py +0 -875
step1x3d_texture/renderer/shader.py +0 -127
step1x3d_texture/{renderer → texture_sync}/__init__.py +0 -0
step1x3d_texture/texture_sync/geometry.py +141 -0
step1x3d_texture/texture_sync/project.py +521 -0
step1x3d_texture/texture_sync/shader.py +118 -0
step1x3d_texture/texture_sync/step_sync.py +125 -0
step1x3d_texture/{renderer → texture_sync}/voronoi.py +0 -0

step1x3d_texture/pipelines/ig2mv_sdxl_pipeline.py CHANGED Viewed

@@ -51,6 +51,20 @@ from ..models.attention_processor import (
     DecoupledMVRowSelfAttnProcessor2_0,
     set_unet_2d_condition_attn_processor,
 )
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
@@ -70,6 +84,27 @@ def retrieve_latents(
         raise AttributeError("Could not access latents of provided encoder_output")
 class IG2MVSDXLPipeline(StableDiffusionXLPipeline, CustomAdapterMixin):
     def __init__(
         self,
@@ -309,6 +344,8 @@ class IG2MVSDXLPipeline(StableDiffusionXLPipeline, CustomAdapterMixin):
         # Image condition
         reference_image: Optional[PipelineImageInput] = None,
         reference_conditioning_scale: Optional[float] = 1.0,
         **kwargs,
     ):
         r"""
@@ -556,6 +593,27 @@ class IG2MVSDXLPipeline(StableDiffusionXLPipeline, CustomAdapterMixin):
             latents,
         )
         # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
         extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
@@ -709,6 +767,36 @@ class IG2MVSDXLPipeline(StableDiffusionXLPipeline, CustomAdapterMixin):
             ).to(device=device, dtype=latents.dtype)
         self._num_timesteps = len(timesteps)
         with self.progress_bar(total=num_inference_steps) as progress_bar:
             for i, t in enumerate(timesteps):
                 if self.interrupt:
@@ -768,9 +856,49 @@ class IG2MVSDXLPipeline(StableDiffusionXLPipeline, CustomAdapterMixin):
                 # compute the previous noisy sample x_t -> x_t-1
                 latents_dtype = latents.dtype
-                latents = self.scheduler.step(
-                    noise_pred, t, latents, **extra_step_kwargs, return_dict=False
-                )[0]
                 if latents.dtype != latents_dtype:
                     if torch.backends.mps.is_available():
                         # some platforms (eg. apple mps) misbehave due to a pytorch bug: https://github.com/pytorch/pytorch/pull/99272

     DecoupledMVRowSelfAttnProcessor2_0,
     set_unet_2d_condition_attn_processor,
 )
+import random
+from ..texture_sync.project import UVProjection as UVP
+from ..texture_sync.step_sync import step_tex_sync
+from trimesh import Trimesh
+from torchvision.transforms import Compose, Resize, GaussianBlur, InterpolationMode
+from diffusers.utils import (
+	BaseOutput,
+    numpy_to_pil,
+	pt_to_pil,
+	is_accelerate_available,
+	is_accelerate_version,
+	logging,
+	replace_example_docstring
+	)
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
         raise AttributeError("Could not access latents of provided encoder_output")
+@torch.no_grad()
+def composite_rendered_view(scheduler, backgrounds, foregrounds, masks, t):
+    composited_images = []
+    for i, (background, foreground, mask) in enumerate(zip(backgrounds, foregrounds, masks)):
+        if t > 0:
+            alphas_cumprod = scheduler.alphas_cumprod[t]
+            noise = torch.normal(0, 1, background.shape, device=background.device)
+            background = (1-alphas_cumprod) * noise + alphas_cumprod * background
+        composited = foreground * mask + background * (1-mask)
+        composited_images.append(composited)
+    composited_tensor = torch.stack(composited_images)
+    return composited_tensor
+@torch.no_grad()
+def encode_latents(vae, imgs):
+    imgs = (imgs-0.5)*2
+    latents = vae.encode(imgs).latent_dist.sample()
+    latents = vae.config.scaling_factor * latents
+    return latents
 class IG2MVSDXLPipeline(StableDiffusionXLPipeline, CustomAdapterMixin):
     def __init__(
         self,
         # Image condition
         reference_image: Optional[PipelineImageInput] = None,
         reference_conditioning_scale: Optional[float] = 1.0,
+        mesh: Optional[Trimesh] = None,
+        texture_sync_config: Optional[dict] = None,
         **kwargs,
     ):
         r"""
             latents,
         )
+        # texture patams init
+        texture_size = texture_sync_config["texture_size"]
+        latent_size = texture_sync_config["latent_size"]
+        elevations = texture_sync_config["elevations"]
+        azimuths = texture_sync_config["azimuths"]
+        texture_sync_ratio = texture_sync_config["texture_sync_ratio"]
+        camera_poses = [(elv, azim) for elv, azim in zip(elevations, azimuths)]
+        uvp = UVP(texture_size=texture_size, render_size=latent_size, sampling_mode="nearest", channels=4, device=self._execution_device)
+        uvp.load_mesh(mesh, scale_factor=1.0, autouv=True)
+        uvp.set_cameras_and_render_settings(camera_poses, centers=None, camera_distance=texture_sync_config["camera_distance"], scale=((1.0, 1.0, 1.0),))
+        latent_tex = uvp.set_noise_texture()
+        noise_views = uvp.render_textured_views()
+        foregrounds = [view[:-1] for view in noise_views]
+        masks = [view[-1:] for view in noise_views]
+        if texture_sync_ratio>0:
+            composited_tensor = composite_rendered_view(self.scheduler, latents, foregrounds, masks, int(timesteps[0].cpu().item())+1)
+            latents = composited_tensor.type(latents.dtype)
+        uvp.to("cpu")
         # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
         extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
             ).to(device=device, dtype=latents.dtype)
         self._num_timesteps = len(timesteps)
+        # texture sync params
+        exp_start = texture_sync_config["exp_start"]
+        exp_end = texture_sync_config["exp_end"]
+        shuffle_background_change = texture_sync_config["shuffle_background_change"]
+        shuffle_background_end = texture_sync_config["shuffle_background_end"]
+        num_timesteps = self.scheduler.config.num_train_timesteps
+        uvp.to(self._execution_device)
+        color_constants = {"black": [-1, -1, -1], "white": [1, 1, 1], "maroon": [0, -1, -1],
+			"red": [1, -1, -1], "olive": [0, 0, -1], "yellow": [1, 1, -1],
+			"green": [-1, 0, -1], "lime": [-1 ,1, -1], "teal": [-1, 0, 0],
+			"aqua": [-1, 1, 1], "navy": [-1, -1, 0], "blue": [-1, -1, 1],
+			"purple": [0, -1 , 0], "fuchsia": [1, -1, 1]}
+        color_names = list(color_constants.keys())
+        background_colors = [random.choice(list(color_constants.keys())) for i in range(len(camera_poses))]
+        intermediate_results = []
+        self.upcast_vae()
+        self.vae.config.force_upcast = True
+        color_images = torch.FloatTensor([color_constants[name] for name in color_names]).reshape(-1,3,1,1).to(dtype=torch.float32, device=self._execution_device)
+        color_images = torch.ones(
+            (1,1,latent_size*8, latent_size*8),
+            device=self._execution_device,
+            dtype=torch.float32
+        ) * color_images
+        color_images = ((0.5*color_images)+0.5)
+        color_latents = encode_latents(self.vae, color_images).to(dtype=self.text_encoder_2.dtype)
+        color_latents = {color[0]:color[1] for color in zip(color_names, [latent for latent in color_latents])}
         with self.progress_bar(total=num_inference_steps) as progress_bar:
             for i, t in enumerate(timesteps):
                 if self.interrupt:
                 # compute the previous noisy sample x_t -> x_t-1
                 latents_dtype = latents.dtype
+                # texture sync
+                current_exp = ((exp_end-exp_start) * i / num_inference_steps) + exp_start
+                if t > (1-texture_sync_ratio)*num_timesteps:
+                    step_results = step_tex_sync(
+                        scheduler=self.scheduler,
+                        uvp=uvp,
+                        model_output=noise_pred,
+                        timestep=t,
+                        sample=latents,
+                        texture=latent_tex,
+                        return_dict=True,
+                        main_views=[],
+                        exp= current_exp,
+                        **extra_step_kwargs
+                    )
+                    pred_original_sample = step_results["pred_original_sample"]
+                    latents = step_results["prev_sample"]
+                    latent_tex = step_results["prev_tex"]
+                    # Composit latent foreground with random color background
+                    background_latents = [color_latents[color] for color in background_colors]
+                    composited_tensor = composite_rendered_view(self.scheduler, background_latents, latents, masks, t)
+                    latents = composited_tensor.type(latents.dtype)
+                    intermediate_results.append((latents.to("cpu"), pred_original_sample.to("cpu")))
+                else:
+                    step_results = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=True)
+                    pred_original_sample = step_results["pred_original_sample"]
+                    latents = step_results["prev_sample"]
+                    latent_tex = None
+                    intermediate_results.append((latents.to("cpu"), pred_original_sample.to("cpu")))
+                # 2. Shuffle background colors; only black and white used after certain timestep
+                if (1-t/num_timesteps) < shuffle_background_change:
+                    background_colors = [random.choice(list(color_constants.keys())) for i in range(len(camera_poses))]
+                elif (1-t/num_timesteps) < shuffle_background_end:
+                    background_colors = [random.choice(["black","white"]) for i in range(len(camera_poses))]
+                else:
+                    background_colors = background_colors
+                del noise_pred
                 if latents.dtype != latents_dtype:
                     if torch.backends.mps.is_available():
                         # some platforms (eg. apple mps) misbehave due to a pytorch bug: https://github.com/pytorch/pytorch/pull/99272

step1x3d_texture/pipelines/step1x_3d_texture_synthesis_pipeline.py CHANGED Viewed

@@ -24,7 +24,6 @@ import trimesh
 import xatlas
 import scipy.sparse
 from scipy.sparse.linalg import spsolve
 from step1x3d_geometry.models.pipelines.pipeline_utils import smart_load_model
@@ -36,7 +35,7 @@ class Step1X3DTextureConfig:
         self.unet_model = None
         self.lora_model = None
         self.adapter_path = "stepfun-ai/Step1X-3D"
-        self.scheduler = None
         self.num_views = 6
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.dtype = torch.float16
@@ -61,6 +60,20 @@ class Step1X3DTextureConfig:
         self.bake_exp = 4
         self.merge_method = "fast"
 class Step1X3DTexturePipeline:
     def __init__(self, config):
@@ -120,11 +133,9 @@ class Step1X3DTexturePipeline:
         if unet_model is not None:
             pipe_kwargs["unet"] = UNet2DConditionModel.from_pretrained(unet_model)
-        print('VAE Loaded!')
         # Prepare pipeline
         pipe = IG2MVSDXLPipeline.from_pretrained(base_model, **pipe_kwargs)
-        print('Base model Loaded!')
         # Load scheduler if provided
         scheduler_class = None
         if scheduler == "ddpm":
@@ -138,14 +149,11 @@ class Step1X3DTexturePipeline:
             shift_scale=8.0,
             scheduler_class=scheduler_class,
         )
-        print('Scheduler Loaded!')
         pipe.init_custom_adapter(
             num_views=num_views,
             self_attn_processor=DecoupledMVRowColSelfAttnProcessor2_0,
         )
-        print(f'Load adapter from {adapter_path}/step1x-3d-ig2v.safetensors')
         pipe.load_custom_adapter(adapter_path, "step1x-3d-ig2v.safetensors")
-        print(f'Load adapter successed!')
         pipe.to(device=device, dtype=dtype)
         pipe.cond_encoder.to(device=device, dtype=dtype)
@@ -282,6 +290,7 @@ class Step1X3DTexturePipeline:
             negative_prompt=negative_prompt,
             cross_attention_kwargs={"scale": lora_scale},
             mesh=mesh_bp,
             **pipe_kwargs,
         ).images
@@ -359,7 +368,7 @@ class Step1X3DTexturePipeline:
                 width=768,
                 num_inference_steps=self.config.num_inference_steps,
                 guidance_scale=self.config.guidance_scale,
-                seed=seed if seed is not None else self.config.seed,
                 lora_scale=self.config.lora_scale,
                 reference_conditioning_scale=self.config.reference_conditioning_scale,
                 negative_prompt=self.config.negative_prompt,

 import xatlas
 import scipy.sparse
 from scipy.sparse.linalg import spsolve
 from step1x3d_geometry.models.pipelines.pipeline_utils import smart_load_model
         self.unet_model = None
         self.lora_model = None
         self.adapter_path = "stepfun-ai/Step1X-3D"
+        self.scheduler = "ddpm"
         self.num_views = 6
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.dtype = torch.float16
         self.bake_exp = 4
         self.merge_method = "fast"
+        # texture sync params
+        self.texture_sync_config = {
+            "texture_size": 1536,
+            "latent_size": 768//8,
+            "elevations": [0, 0, 0, 0, 90, -90],
+            "azimuths": [0, 90, 180, 270, 0, 0],
+            "texture_sync_ratio": 0.5,
+            "exp_end": 6.0,
+            "exp_start": 0,
+            "shuffle_background_change": 0.4,
+            "shuffle_background_end": 0.99,
+            "camera_distance": 1.8
+        }
 class Step1X3DTexturePipeline:
     def __init__(self, config):
         if unet_model is not None:
             pipe_kwargs["unet"] = UNet2DConditionModel.from_pretrained(unet_model)
         # Prepare pipeline
         pipe = IG2MVSDXLPipeline.from_pretrained(base_model, **pipe_kwargs)
         # Load scheduler if provided
         scheduler_class = None
         if scheduler == "ddpm":
             shift_scale=8.0,
             scheduler_class=scheduler_class,
         )
         pipe.init_custom_adapter(
             num_views=num_views,
             self_attn_processor=DecoupledMVRowColSelfAttnProcessor2_0,
         )
         pipe.load_custom_adapter(adapter_path, "step1x-3d-ig2v.safetensors")
         pipe.to(device=device, dtype=dtype)
         pipe.cond_encoder.to(device=device, dtype=dtype)
             negative_prompt=negative_prompt,
             cross_attention_kwargs={"scale": lora_scale},
             mesh=mesh_bp,
+            texture_sync_config=self.config.texture_sync_config,
             **pipe_kwargs,
         ).images
                 width=768,
                 num_inference_steps=self.config.num_inference_steps,
                 guidance_scale=self.config.guidance_scale,
+                seed= seed if seed is not None else self.config.seed,
                 lora_scale=self.config.lora_scale,
                 reference_conditioning_scale=self.config.reference_conditioning_scale,
                 negative_prompt=self.config.negative_prompt,

step1x3d_texture/renderer/geometry.py DELETED Viewed

@@ -1,151 +0,0 @@
-import torch
-import pytorch3d
-import torch.nn.functional as F
-from pytorch3d.ops import interpolate_face_attributes
-from pytorch3d.renderer import (
-    look_at_view_transform,
-    FoVPerspectiveCameras,
-    AmbientLights,
-    PointLights,
-    DirectionalLights,
-    Materials,
-    RasterizationSettings,
-    MeshRenderer,
-    MeshRasterizer,
-    SoftPhongShader,
-    SoftSilhouetteShader,
-    HardPhongShader,
-    TexturesVertex,
-    TexturesUV,
-    Materials,
-)
-from pytorch3d.renderer.blending import BlendParams, hard_rgb_blend
-from pytorch3d.renderer.utils import convert_to_tensors_and_broadcast, TensorProperties
-from pytorch3d.renderer.mesh.shader import ShaderBase
-def get_cos_angle(points, normals, camera_position):
-    """
-    calculate cosine similarity between view->surface and surface normal.
-    """
-    if points.shape != normals.shape:
-        msg = "Expected points and normals to have the same shape: got %r, %r"
-        raise ValueError(msg % (points.shape, normals.shape))
-    # Ensure all inputs have same batch dimension as points
-    matched_tensors = convert_to_tensors_and_broadcast(
-        points, camera_position, device=points.device
-    )
-    _, camera_position = matched_tensors
-    # Reshape direction and color so they have all the arbitrary intermediate
-    # dimensions as points. Assume first dim = batch dim and last dim = 3.
-    points_dims = points.shape[1:-1]
-    expand_dims = (-1,) + (1,) * len(points_dims)
-    if camera_position.shape != normals.shape:
-        camera_position = camera_position.view(expand_dims + (3,))
-    normals = F.normalize(normals, p=2, dim=-1, eps=1e-6)
-    # Calculate the cosine value.
-    view_direction = camera_position - points
-    view_direction = F.normalize(view_direction, p=2, dim=-1, eps=1e-6)
-    cos_angle = torch.sum(view_direction * normals, dim=-1, keepdim=True)
-    cos_angle = cos_angle.clamp(0, 1)
-    # Cosine of the angle between the reflected light ray and the viewer
-    return cos_angle
-def _geometry_shading_with_pixels(
-    meshes, fragments, lights, cameras, materials, texels
-):
-    """
-    Render pixel space vertex position, normal(world), depth, and cos angle
-    Args:
-            meshes: Batch of meshes
-            fragments: Fragments named tuple with the outputs of rasterization
-            lights: Lights class containing a batch of lights
-            cameras: Cameras class containing a batch of cameras
-            materials: Materials class containing a batch of material properties
-            texels: texture per pixel of shape (N, H, W, K, 3)
-    Returns:
-            colors: (N, H, W, K, 3)
-            pixel_coords: (N, H, W, K, 3), camera coordinates of each intersection.
-    """
-    verts = meshes.verts_packed()  # (V, 3)
-    faces = meshes.faces_packed()  # (F, 3)
-    vertex_normals = meshes.verts_normals_packed()  # (V, 3)
-    faces_verts = verts[faces]
-    faces_normals = vertex_normals[faces]
-    pixel_coords_in_camera = interpolate_face_attributes(
-        fragments.pix_to_face, fragments.bary_coords, faces_verts
-    )
-    pixel_normals = interpolate_face_attributes(
-        fragments.pix_to_face, fragments.bary_coords, faces_normals
-    )
-    cos_angles = get_cos_angle(
-        pixel_coords_in_camera, pixel_normals, cameras.get_camera_center()
-    )
-    return pixel_coords_in_camera, pixel_normals, fragments.zbuf[..., None], cos_angles
-class HardGeometryShader(ShaderBase):
-    """
-    renders common geometric informations.
-    """
-    def forward(self, fragments, meshes, **kwargs):
-        cameras = super()._get_cameras(**kwargs)
-        texels = self.texel_from_uv(fragments, meshes)
-        lights = kwargs.get("lights", self.lights)
-        materials = kwargs.get("materials", self.materials)
-        blend_params = kwargs.get("blend_params", self.blend_params)
-        verts, normals, depths, cos_angles = _geometry_shading_with_pixels(
-            meshes=meshes,
-            fragments=fragments,
-            texels=texels,
-            lights=lights,
-            cameras=cameras,
-            materials=materials,
-        )
-        texels = meshes.sample_textures(fragments)
-        verts = hard_rgb_blend(verts, fragments, blend_params)
-        normals = hard_rgb_blend(normals, fragments, blend_params)
-        depths = hard_rgb_blend(depths, fragments, blend_params)
-        cos_angles = hard_rgb_blend(cos_angles, fragments, blend_params)
-        from IPython import embed
-        embed()
-        texels = hard_rgb_blend(texels, fragments, blend_params)
-        return verts, normals, depths, cos_angles, texels, fragments
-    def texel_from_uv(self, fragments, meshes):
-        texture_tmp = meshes.textures
-        maps_tmp = texture_tmp.maps_padded()
-        uv_color = [[[1, 0], [1, 1]], [[0, 0], [0, 1]]]
-        uv_color = (
-            torch.FloatTensor(uv_color).to(maps_tmp[0].device).type(maps_tmp[0].dtype)
-        )
-        uv_texture = TexturesUV(
-            [uv_color.clone() for t in maps_tmp],
-            texture_tmp.faces_uvs_padded(),
-            texture_tmp.verts_uvs_padded(),
-            sampling_mode="bilinear",
-        )
-        meshes.textures = uv_texture
-        texels = meshes.sample_textures(fragments)
-        meshes.textures = texture_tmp
-        texels = torch.cat((texels, texels[..., -1:] * 0), dim=-1)
-        return texels

step1x3d_texture/renderer/project.py DELETED Viewed

@@ -1,875 +0,0 @@
-import torch
-import pytorch3d
-from pytorch3d.io import load_objs_as_meshes, load_obj, save_obj, IO
-from pytorch3d.structures import Meshes
-from pytorch3d.renderer import (
-    look_at_view_transform,
-    FoVPerspectiveCameras,
-    FoVOrthographicCameras,
-    AmbientLights,
-    PointLights,
-    DirectionalLights,
-    Materials,
-    RasterizationSettings,
-    MeshRenderer,
-    MeshRasterizer,
-    TexturesUV,
-)
-from .geometry import HardGeometryShader
-from .shader import HardNChannelFlatShader
-from .voronoi import voronoi_solve
-import torch.nn.functional as F
-import open3d as o3d
-import pdb
-import kaolin as kal
-import numpy as np
-import torch
-from pytorch3d.renderer.cameras import FoVOrthographicCameras
-from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
-from pytorch3d.common.datatypes import Device
-import math
-import torch.nn.functional as F
-from trimesh import Trimesh
-from pytorch3d.structures import Meshes
-import os
-LIST_TYPE = Union[list, np.ndarray, torch.Tensor]
-_R = torch.eye(3)[None]  # (1, 3, 3)
-_T = torch.zeros(1, 3)  # (1, 3)
-_BatchFloatType = Union[float, Sequence[float], torch.Tensor]
-class CustomOrthographicCameras(FoVOrthographicCameras):
-    def compute_projection_matrix(
-        self, znear, zfar, max_x, min_x, max_y, min_y, scale_xyz
-    ) -> torch.Tensor:
-        """
-        自定义正交投影矩阵计算，继承并修改深度通道参数
-        参数维度说明:
-        - znear/zfar: (N,)
-        - max_x/min_x: (N,)
-        - max_y/min_y: (N,)
-        - scale_xyz: (N, 3)
-        """
-        K = torch.zeros((self._N, 4, 4), dtype=torch.float32, device=self.device)
-        ones = torch.ones((self._N), dtype=torch.float32, device=self.device)
-        # NOTE: OpenGL flips handedness of coordinate system between camera
-        # space and NDC space so z sign is -ve. In PyTorch3D we maintain a
-        # right handed coordinate system throughout.
-        z_sign = +1.0
-        K[:, 0, 0] = (2.0 / (max_x - min_x)) * scale_xyz[:, 0]
-        K[:, 1, 1] = (2.0 / (max_y - min_y)) * scale_xyz[:, 1]
-        K[:, 0, 3] = -(max_x + min_x) / (max_x - min_x)
-        K[:, 1, 3] = -(max_y + min_y) / (max_y - min_y)
-        K[:, 3, 3] = ones
-        # NOTE: This maps the z coordinate to the range [0, 1] and replaces the
-        # the OpenGL z normalization to [-1, 1]
-        K[:, 2, 2] = -2 * (1.0 / (zfar - znear)) * scale_xyz[:, 2]
-        K[:, 2, 3] = -(znear + zfar) / (zfar - znear)
-        return K
-    def __init__(
-        self,
-        znear: _BatchFloatType = 1.0,
-        zfar: _BatchFloatType = 100.0,
-        max_y: _BatchFloatType = 1.0,
-        min_y: _BatchFloatType = -1.0,
-        max_x: _BatchFloatType = 1.0,
-        min_x: _BatchFloatType = -1.0,
-        scale_xyz=((1.0, 1.0, 1.0),),  # (N, 3)
-        R: torch.Tensor = _R,
-        T: torch.Tensor = _T,
-        K: Optional[torch.Tensor] = None,
-        device: Device = "cpu",
-    ):
-        # 继承父类初始化逻辑
-        super().__init__(
-            znear=znear,
-            zfar=zfar,
-            max_y=max_y,
-            min_y=min_y,
-            max_x=max_x,
-            min_x=min_x,
-            scale_xyz=scale_xyz,
-            R=R,
-            T=T,
-            K=K,
-            device=device,
-        )
-def erode_torch_batch(binary_img_batch, kernel_size):
-    pad = (kernel_size - 1) // 2
-    bin_img = F.pad(
-        binary_img_batch.unsqueeze(1), pad=[pad, pad, pad, pad], mode="reflect"
-    )
-    out = -F.max_pool2d(-bin_img, kernel_size=kernel_size, stride=1, padding=0)
-    out = out.squeeze(1)
-    return out
-def dilate_torch_batch(binary_img_batch, kernel_size):
-    pad = (kernel_size - 1) // 2
-    bin_img = F.pad(binary_img_batch, pad=[pad, pad, pad, pad], mode="reflect")
-    out = F.max_pool2d(bin_img, kernel_size=kernel_size, stride=1, padding=0)
-    out = out.squeeze()
-    return out
-# Pytorch3D based renderering functions, managed in a class
-# Render size is recommended to be the same as your latent view size
-# DO NOT USE "bilinear" sampling when you are handling latents.
-# Stable Diffusion has 4 latent channels so use channels=4
-class UVProjection:
-    def __init__(
-        self,
-        texture_size=96,
-        render_size=64,
-        sampling_mode="nearest",
-        channels=3,
-        device=None,
-    ):
-        self.channels = channels
-        self.device = device or torch.device("cpu")
-        self.lights = AmbientLights(
-            ambient_color=((1.0,) * channels,), device=self.device
-        )
-        self.target_size = (texture_size, texture_size)
-        self.render_size = render_size
-        self.sampling_mode = sampling_mode
-    # Load obj mesh, rescale the mesh to fit into the bounding box
-    def load_mesh(self, mesh, scale_factor=2.0, auto_center=True, autouv=False):
-        if isinstance(mesh, Trimesh):
-            vertices = torch.tensor(mesh.vertices, dtype=torch.float32).to(self.device)
-            faces = torch.tensor(mesh.faces, dtype=torch.int64).to(self.device)
-            mesh = Meshes(verts=[vertices], faces=[faces])
-            verts = mesh.verts_packed()
-            mesh = mesh.update_padded(verts[None, :, :])
-        elif isinstance(mesh, str) and os.path.isfile(mesh):
-            mesh = load_objs_as_meshes([mesh_path], device=self.device)
-            if auto_center:
-                verts = mesh.verts_packed()
-                max_bb = (verts - 0).max(0)[0]
-                min_bb = (verts - 0).min(0)[0]
-                scale = (max_bb - min_bb).max() / 2
-                center = (max_bb + min_bb) / 2
-                mesh.offset_verts_(-center)
-                mesh.scale_verts_((scale_factor / float(scale)))
-            else:
-                mesh.scale_verts_((scale_factor))
-        if autouv or (mesh.textures is None):
-            mesh = self.uv_unwrap(mesh)
-        self.mesh = mesh
-    def load_glb_mesh(
-        self, mesh_path, trimesh, scale_factor=1.0, auto_center=True, autouv=False
-    ):
-        from pytorch3d.io.experimental_gltf_io import MeshGlbFormat
-        io = IO()
-        io.register_meshes_format(MeshGlbFormat())
-        with open(mesh_path, "rb") as f:
-            mesh = io.load_mesh(f, include_textures=True, device=self.device)
-        if auto_center:
-            verts = mesh.verts_packed()
-            max_bb = (verts - 0).max(0)[0]
-            min_bb = (verts - 0).min(0)[0]
-            scale = (max_bb - min_bb).max() / 2
-            center = (max_bb + min_bb) / 2
-            mesh.offset_verts_(-center)
-            mesh.scale_verts_((scale_factor / float(scale)))
-            verts = mesh.verts_packed()
-            # T = torch.tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]], device=verts.device, dtype=verts.dtype)
-            # T = torch.tensor([[0, 0, 1], [0, 1, 0], [-1, 0, 0]], device=verts.device, dtype=verts.dtype)
-            # verts = verts @ T
-            mesh = mesh.update_padded(verts[None, :, :])
-        else:
-            mesh.scale_verts_((scale_factor))
-        if autouv or (mesh.textures is None):
-            mesh = self.uv_unwrap(mesh)
-        self.mesh = mesh
-    # Save obj mesh
-    def save_mesh(self, mesh_path, texture):
-        save_obj(
-            mesh_path,
-            self.mesh.verts_list()[0],
-            self.mesh.faces_list()[0],
-            verts_uvs=self.mesh.textures.verts_uvs_list()[0],
-            faces_uvs=self.mesh.textures.faces_uvs_list()[0],
-            texture_map=texture,
-        )
-    # Code referred to TEXTure code (https://github.com/TEXTurePaper/TEXTurePaper.git)
-    def uv_unwrap(self, mesh):
-        verts_list = mesh.verts_list()[0]
-        faces_list = mesh.faces_list()[0]
-        import xatlas
-        import numpy as np
-        v_np = verts_list.cpu().numpy()
-        f_np = faces_list.int().cpu().numpy()
-        atlas = xatlas.Atlas()
-        atlas.add_mesh(v_np, f_np)
-        chart_options = xatlas.ChartOptions()
-        chart_options.max_iterations = 4
-        atlas.generate(chart_options=chart_options)
-        vmapping, ft_np, vt_np = atlas[0]  # [N], [M, 3], [N, 2]
-        vt = (
-            torch.from_numpy(vt_np.astype(np.float32))
-            .type(verts_list.dtype)
-            .to(mesh.device)
-        )
-        ft = (
-            torch.from_numpy(ft_np.astype(np.int64))
-            .type(faces_list.dtype)
-            .to(mesh.device)
-        )
-        new_map = torch.zeros(self.target_size + (self.channels,), device=mesh.device)
-        new_tex = TexturesUV([new_map], [ft], [vt], sampling_mode=self.sampling_mode)
-        mesh.textures = new_tex
-        return mesh
-    """
-		A functions that disconnect faces in the mesh according to
-		its UV seams. The number of vertices are made equal to the
-		number of unique vertices its UV layout, while the faces list
-		is intact.
-	"""
-    def disconnect_faces(self):
-        mesh = self.mesh
-        verts_list = mesh.verts_list()
-        faces_list = mesh.faces_list()
-        verts_uvs_list = mesh.textures.verts_uvs_list()
-        faces_uvs_list = mesh.textures.faces_uvs_list()
-        packed_list = [v[f] for v, f in zip(verts_list, faces_list)]
-        verts_disconnect_list = [
-            torch.zeros(
-                (verts_uvs_list[i].shape[0], 3),
-                dtype=verts_list[0].dtype,
-                device=verts_list[0].device,
-            )
-            for i in range(len(verts_list))
-        ]
-        for i in range(len(verts_list)):
-            verts_disconnect_list[i][faces_uvs_list] = packed_list[i]
-        assert not mesh.has_verts_normals(), "Not implemented for vertex normals"
-        self.mesh_d = Meshes(verts_disconnect_list, faces_uvs_list, mesh.textures)
-        return self.mesh_d
-    """
-		A function that construct a temp mesh for back-projection.
-		Take a disconnected mesh and a rasterizer, the function calculates
-		the projected faces as the UV, as use its original UV with pseudo
-		z value as world space geometry.
-	"""
-    def construct_uv_mesh(self):
-        mesh = self.mesh_d
-        verts_list = mesh.verts_list()
-        verts_uvs_list = mesh.textures.verts_uvs_list()
-        # faces_list = [torch.flip(faces, [-1]) for faces in mesh.faces_list()]
-        new_verts_list = []
-        for i, (verts, verts_uv) in enumerate(zip(verts_list, verts_uvs_list)):
-            verts = verts.clone()
-            verts_uv = verts_uv.clone()
-            verts[..., 0:2] = verts_uv[..., :]
-            verts = (verts - 0.5) * 2
-            verts[..., 2] *= 1
-            new_verts_list.append(verts)
-        textures_uv = mesh.textures.clone()
-        self.mesh_uv = Meshes(new_verts_list, mesh.faces_list(), textures_uv)
-        return self.mesh_uv
-    # Set texture for the current mesh.
-    def set_texture_map(self, texture):
-        new_map = texture.permute(1, 2, 0)
-        new_map = new_map.to(self.device)
-        new_tex = TexturesUV(
-            [new_map],
-            self.mesh.textures.faces_uvs_padded(),
-            self.mesh.textures.verts_uvs_padded(),
-            sampling_mode=self.sampling_mode,
-        )
-        self.mesh.textures = new_tex
-    # Set the initial normal noise texture
-    # No generator here for replication of the experiment result. Add one as you wish
-    def set_noise_texture(self, channels=None):
-        if not channels:
-            channels = self.channels
-        noise_texture = torch.normal(
-            0, 1, (channels,) + self.target_size, device=self.device
-        )
-        self.set_texture_map(noise_texture)
-        return noise_texture
-    # Set the cameras given the camera poses and centers
-    def set_cameras(self, camera_poses, centers=None, camera_distance=2.7, scale=None):
-        elev = torch.FloatTensor([pose[0] for pose in camera_poses])
-        azim = torch.FloatTensor([pose[1] for pose in camera_poses])
-        print("camera_distance:{}".format(camera_distance))
-        R, T = look_at_view_transform(
-            dist=camera_distance, elev=elev, azim=azim, at=centers or ((0, 0, 0),)
-        )
-        # flip_mat = torch.from_numpy(np.diag([-1.0, 1.0, -1.0]) ).type(torch.FloatTensor).to(R.device)
-        # R = R@flip_mat
-        # R = R.permute(0, 2, 1)
-        # T = T*torch.from_numpy(np.array([-1.0, 1.0, -1.0])).type(torch.FloatTensor).to(R.device)
-        # print("v R size:{}, v T size:{}".format(R.size(), T.size()))
-        # c2w = self.get_c2w(elev, [camera_distance]*len(elev), azim)
-        # w2c = torch.linalg.inv(c2w)
-        # R, T= w2c[:, :3, :3], w2c[:, :3, 3]
-        print("R size:{}, T size:{}".format(R.size(), T.size()))
-        # self.cameras = CustomOrthographicCameras(device=self.device, R=R, T=T, scale_xyz=scale or ((1,1,1),), znear=0.1, min_x=-0.55, max_x=0.55, min_y=-0.55, max_y=0.55)
-        self.cameras = FoVOrthographicCameras(
-            device=self.device, R=R, T=T, scale_xyz=scale or ((1, 1, 1),)
-        )
-    # Set all necessary internal data for rendering and texture baking
-    # Can be used to refresh after changing camera positions
-    def set_cameras_and_render_settings(
-        self,
-        camera_poses,
-        centers=None,
-        camera_distance=2.7,
-        render_size=None,
-        scale=None,
-    ):
-        self.set_cameras(camera_poses, centers, camera_distance, scale=scale)
-        if render_size is None:
-            render_size = self.render_size
-        if not hasattr(self, "renderer"):
-            self.setup_renderer(size=render_size)
-        if not hasattr(self, "mesh_d"):
-            self.disconnect_faces()
-        if not hasattr(self, "mesh_uv"):
-            self.construct_uv_mesh()
-        self.calculate_tex_gradient()
-        self.calculate_visible_triangle_mask()
-        _, _, _, cos_maps, _, _ = self.render_geometry()
-        self.calculate_cos_angle_weights(cos_maps)
-    # Setup renderers for rendering
-    # max faces per bin set to 30000 to avoid overflow in many test cases.
-    # You can use default value to let pytorch3d handle that for you.
-    def setup_renderer(
-        self,
-        size=64,
-        blur=0.0,
-        face_per_pix=1,
-        perspective_correct=False,
-        channels=None,
-    ):
-        if not channels:
-            channels = self.channels
-        self.raster_settings = RasterizationSettings(
-            image_size=size,
-            blur_radius=blur,
-            faces_per_pixel=face_per_pix,
-            perspective_correct=perspective_correct,
-            cull_backfaces=True,
-            max_faces_per_bin=30000,
-        )
-        self.renderer = MeshRenderer(
-            rasterizer=MeshRasterizer(
-                cameras=self.cameras,
-                raster_settings=self.raster_settings,
-            ),
-            shader=HardNChannelFlatShader(
-                device=self.device,
-                cameras=self.cameras,
-                lights=self.lights,
-                channels=channels,
-                # materials=materials
-            ),
-        )
-    # Bake screen-space cosine weights to UV space
-    # May be able to reimplement using the generic "bake_texture" function, but it works so leave it here for now
-    @torch.enable_grad()
-    def calculate_cos_angle_weights(self, cos_angles, fill=True, channels=None):
-        if not channels:
-            channels = self.channels
-        cos_maps = []
-        tmp_mesh = self.mesh.clone()
-        for i in range(len(self.cameras)):
-            zero_map = torch.zeros(
-                self.target_size + (channels,), device=self.device, requires_grad=True
-            )
-            optimizer = torch.optim.SGD([zero_map], lr=1, momentum=0)
-            optimizer.zero_grad()
-            zero_tex = TexturesUV(
-                [zero_map],
-                self.mesh.textures.faces_uvs_padded(),
-                self.mesh.textures.verts_uvs_padded(),
-                sampling_mode=self.sampling_mode,
-            )
-            tmp_mesh.textures = zero_tex
-            images_predicted = self.renderer(
-                tmp_mesh, cameras=self.cameras[i], lights=self.lights
-            )
-            loss = torch.sum((cos_angles[i, :, :, 0:1] ** 1 - images_predicted) ** 2)
-            loss.backward()
-            optimizer.step()
-            if fill:
-                zero_map = zero_map.detach() / (self.gradient_maps[i] + 1e-8)
-                zero_map = voronoi_solve(
-                    zero_map, self.gradient_maps[i][..., 0], self.device
-                )
-            else:
-                zero_map = zero_map.detach() / (self.gradient_maps[i] + 1e-8)
-            cos_maps.append(zero_map)
-        self.cos_maps = cos_maps
-    # Get geometric info from fragment shader
-    # Can be used for generating conditioning image and cosine weights
-    # Returns some information you may not need, remember to release them for memory saving
-    @torch.no_grad()
-    def render_geometry(self, image_size=None):
-        if image_size:
-            size = self.renderer.rasterizer.raster_settings.image_size
-            self.renderer.rasterizer.raster_settings.image_size = image_size
-        shader = self.renderer.shader
-        self.renderer.shader = HardGeometryShader(
-            device=self.device, cameras=self.cameras[0], lights=self.lights
-        )
-        tmp_mesh = self.mesh.clone()
-        verts, normals, depths, cos_angles, texels, fragments = self.renderer(
-            tmp_mesh.extend(len(self.cameras)), cameras=self.cameras, lights=self.lights
-        )
-        self.renderer.shader = shader
-        if image_size:
-            self.renderer.rasterizer.raster_settings.image_size = size
-        return verts, normals, depths, cos_angles, texels, fragments
-    # Project world normal to view space and normalize
-    @torch.no_grad()
-    def decode_view_normal(self, normals):
-        w2v_mat = self.cameras.get_full_projection_transform()
-        normals_view = torch.clone(normals)[:, :, :, 0:3]
-        normals_view = normals_view.reshape(normals_view.shape[0], -1, 3)
-        normals_view = w2v_mat.transform_normals(normals_view)
-        normals_view = normals_view.reshape(normals.shape[0:3] + (3,))
-        normals_view[:, :, :, 2] *= -1
-        normals = (normals_view[..., 0:3] + 1) * normals[
-            ..., 3:
-        ] / 2 + torch.FloatTensor(((((0.5, 0.5, 1))))).to(self.device) * (
-            1 - normals[..., 3:]
-        )
-        # normals = torch.cat([normal for normal in normals], dim=1)
-        normals = normals.clamp(0, 1)
-        return normals
-    # Normalize absolute depth to inverse depth
-    @torch.no_grad()
-    def decode_normalized_depth(self, depths, batched_norm=False):
-        view_z, mask = depths.unbind(-1)
-        view_z = view_z * mask + 100 * (1 - mask)
-        inv_z = 1 / view_z
-        inv_z_min = inv_z * mask + 100 * (1 - mask)
-        if not batched_norm:
-            max_ = torch.max(inv_z, 1, keepdim=True)
-            max_ = torch.max(max_[0], 2, keepdim=True)[0]
-            min_ = torch.min(inv_z_min, 1, keepdim=True)
-            min_ = torch.min(min_[0], 2, keepdim=True)[0]
-        else:
-            max_ = torch.max(inv_z)
-            min_ = torch.min(inv_z_min)
-        inv_z = (inv_z - min_) / (max_ - min_)
-        inv_z = inv_z.clamp(0, 1)
-        inv_z = inv_z[..., None].repeat(1, 1, 1, 3)
-        return inv_z
-    # Multiple screen pixels could pass gradient to a same texel
-    # We can precalculate this gradient strength and use it to normalize gradients when we bake textures
-    @torch.enable_grad()
-    def calculate_tex_gradient(self, channels=None):
-        if not channels:
-            channels = self.channels
-        tmp_mesh = self.mesh.clone()
-        gradient_maps = []
-        for i in range(len(self.cameras)):
-            zero_map = torch.zeros(
-                self.target_size + (channels,), device=self.device, requires_grad=True
-            )
-            optimizer = torch.optim.SGD([zero_map], lr=1, momentum=0)
-            optimizer.zero_grad()
-            zero_tex = TexturesUV(
-                [zero_map],
-                self.mesh.textures.faces_uvs_padded(),
-                self.mesh.textures.verts_uvs_padded(),
-                sampling_mode=self.sampling_mode,
-            )
-            tmp_mesh.textures = zero_tex
-            images_predicted = self.renderer(
-                tmp_mesh, cameras=self.cameras[i], lights=self.lights
-            )
-            loss = torch.sum((1 - images_predicted) ** 2)
-            loss.backward()
-            optimizer.step()
-            gradient_maps.append(zero_map.detach())
-        self.gradient_maps = gradient_maps
-    # Get the UV space masks of triangles visible in each view
-    # First get face ids from each view, then filter pixels on UV space to generate masks
-    @torch.no_grad()
-    def get_c2w(
-        self,
-        elevation_deg: LIST_TYPE,
-        distance: LIST_TYPE,
-        azimuth_deg: Optional[LIST_TYPE],
-        num_views: Optional[int] = 1,
-        device: Optional[str] = None,
-    ) -> torch.FloatTensor:
-        if azimuth_deg is None:
-            assert (
-                num_views is not None
-            ), "num_views must be provided if azimuth_deg is None."
-            azimuth_deg = torch.linspace(
-                0, 360, num_views + 1, dtype=torch.float32, device=device
-            )[:-1]
-        else:
-            num_views = len(azimuth_deg)
-        def list_to_pt(
-            x: LIST_TYPE,
-            dtype: Optional[torch.dtype] = None,
-            device: Optional[str] = None,
-        ) -> torch.Tensor:
-            if isinstance(x, list) or isinstance(x, np.ndarray):
-                return torch.tensor(x, dtype=dtype, device=device)
-            return x.to(dtype=dtype)
-        azimuth_deg = list_to_pt(azimuth_deg, dtype=torch.float32, device=device)
-        elevation_deg = list_to_pt(elevation_deg, dtype=torch.float32, device=device)
-        camera_distances = list_to_pt(distance, dtype=torch.float32, device=device)
-        elevation = elevation_deg * math.pi / 180
-        azimuth = azimuth_deg * math.pi / 180
-        camera_positions = torch.stack(
-            [
-                camera_distances * torch.cos(elevation) * torch.cos(azimuth),
-                camera_distances * torch.cos(elevation) * torch.sin(azimuth),
-                camera_distances * torch.sin(elevation),
-            ],
-            dim=-1,
-        )
-        center = torch.zeros_like(camera_positions)
-        up = torch.tensor([0, 0, 1], dtype=torch.float32, device=device)[
-            None, :
-        ].repeat(num_views, 1)
-        lookat = F.normalize(center - camera_positions, dim=-1)
-        right = F.normalize(torch.cross(lookat, up, dim=-1), dim=-1)
-        up = F.normalize(torch.cross(right, lookat, dim=-1), dim=-1)
-        c2w3x4 = torch.cat(
-            [torch.stack([right, up, -lookat], dim=-1), camera_positions[:, :, None]],
-            dim=-1,
-        )
-        c2w = torch.cat([c2w3x4, torch.zeros_like(c2w3x4[:, :1])], dim=1)
-        c2w[:, 3, 3] = 1.0
-        return c2w
-    @torch.no_grad()
-    def calculate_visible_triangle_mask(self, channels=None, image_size=(512, 512)):
-        if not channels:
-            channels = self.channels
-        pix2face_list = []
-        for i in range(len(self.cameras)):
-            self.renderer.rasterizer.raster_settings.image_size = image_size
-            pix2face = self.renderer.rasterizer(
-                self.mesh_d, cameras=self.cameras[i]
-            ).pix_to_face
-            self.renderer.rasterizer.raster_settings.image_size = self.render_size
-            pix2face_list.append(pix2face)
-        if not hasattr(self, "mesh_uv"):
-            self.construct_uv_mesh()
-        raster_settings = RasterizationSettings(
-            image_size=self.target_size,
-            blur_radius=0,
-            faces_per_pixel=1,
-            perspective_correct=False,
-            cull_backfaces=False,
-            max_faces_per_bin=30000,
-        )
-        R, T = look_at_view_transform(dist=2, elev=0, azim=0)
-        # flip_mat = torch.from_numpy(np.diag([-1.0, 1.0, -1.0]) ).type(torch.FloatTensor).to(R.device)
-        # R = R@flip_mat
-        # T = T*torch.tensor(np.array([-1.0, 1.0, -1.0])).type(torch.FloatTensor).to(R.device)
-        # c2w = self.get_c2w([0], [1.8], [0])
-        # w2c = torch.linalg.inv(c2w)[:, :3,:]
-        # R, T= w2c[:, :3,:3], w2c[:, :3, 3]
-        # print("R size:{}, T size:{}".format(R.size(), T.size()))
-        cameras = FoVOrthographicCameras(device=self.device, R=R, T=T)
-        # cameras = CustomOrthographicCameras(device=self.device, R=R, T=T)
-        # cameras = CustomOrthographicCameras(device=self.device, R=R, T=T, znear=0.1, min_x=-0.55, max_x=0.55, min_y=-0.55, max_y=0.55)
-        rasterizer = MeshRasterizer(cameras=cameras, raster_settings=raster_settings)
-        uv_pix2face = rasterizer(self.mesh_uv).pix_to_face
-        visible_triangles = []
-        for i in range(len(pix2face_list)):
-            valid_faceid = torch.unique(pix2face_list[i])
-            valid_faceid = valid_faceid[1:] if valid_faceid[0] == -1 else valid_faceid
-            mask = torch.isin(uv_pix2face[0], valid_faceid, assume_unique=False)
-            # uv_pix2face[0][~mask] = -1
-            triangle_mask = torch.ones(self.target_size + (1,), device=self.device)
-            triangle_mask[~mask] = 0
-            triangle_mask[:, 1:][triangle_mask[:, :-1] > 0] = 1
-            triangle_mask[:, :-1][triangle_mask[:, 1:] > 0] = 1
-            triangle_mask[1:, :][triangle_mask[:-1, :] > 0] = 1
-            triangle_mask[:-1, :][triangle_mask[1:, :] > 0] = 1
-            visible_triangles.append(triangle_mask)
-        self.visible_triangles = visible_triangles
-    # Render the current mesh and texture from current cameras
-    def render_textured_views(self):
-        meshes = self.mesh.extend(len(self.cameras))
-        images_predicted = self.renderer(
-            meshes, cameras=self.cameras, lights=self.lights
-        )
-        return [image.permute(2, 0, 1) for image in images_predicted]
-    @torch.no_grad()
-    def get_point_validation_by_o3d(
-        self, points, eye_position, hidden_point_removal_radius=200
-    ):
-        point_visibility = torch.zeros((points.shape[0]), device=points.device).bool()
-        pcd = o3d.geometry.PointCloud(
-            points=o3d.utility.Vector3dVector(points.cpu().numpy())
-        )
-        camera_pose = (
-            eye_position.get_camera_center().squeeze().cpu().numpy().astype(np.float64)
-        )
-        # o3d_camera = [0, 0, diameter]
-        diameter = np.linalg.norm(
-            np.asarray(pcd.get_max_bound()) - np.asarray(pcd.get_min_bound())
-        )
-        radius = diameter * 200  # The radius of the sperical projection
-        _, pt_map = pcd.hidden_point_removal(camera_pose, radius)
-        visible_point_ids = np.array(pt_map)
-        point_visibility[visible_point_ids] = True
-        return point_visibility
-    @torch.no_grad()
-    def hidden_judge(self, camera, texture_dim):
-        mesh = self.mesh
-        verts = mesh.verts_packed()
-        faces = mesh.faces_packed()
-        verts_uv = mesh.textures.verts_uvs_padded()[0]  # 获取打包后的 UV 坐标 (V, 2)
-        faces_uv = mesh.textures.faces_uvs_padded()[0]
-        uv_face_attr = torch.index_select(
-            verts_uv, 0, faces_uv.view(-1)
-        )  # 选择对应顶点的 UV 坐标
-        uv_face_attr = uv_face_attr.view(
-            faces.shape[0], faces_uv.shape[1], 2
-        ).unsqueeze(0)
-        x, y, z = verts[:, 0], verts[:, 1], verts[:, 2]
-        mesh_out_of_range = False
-        if (
-            x.min() < -1
-            or x.max() > 1
-            or y.min() < -1
-            or y.max() > 1
-            or z.min() < -1
-            or z.max() > 1
-        ):
-            mesh_out_of_range = True
-        face_vertices_world = kal.ops.mesh.index_vertices_by_faces(
-            verts.unsqueeze(0), faces
-        )
-        face_vertices_z = torch.zeros_like(
-            face_vertices_world[:, :, :, -1], device=verts.device
-        )
-        uv_position, face_idx = kal.render.mesh.rasterize(
-            texture_dim,
-            texture_dim,
-            face_vertices_z,
-            uv_face_attr * 2 - 1,
-            face_features=face_vertices_world,
-        )
-        uv_position = torch.clamp(uv_position, -1, 1)
-        uv_position[face_idx == -1] = 0
-        points = uv_position.reshape(-1, 3)
-        mask = points[:, 0] != 0
-        valid_points = points[mask]
-        # np.save("tmp/pcd.npy", valid_points.cpu().numpy())
-        # print(camera.get_camera_center())
-        points_visibility = self.get_point_validation_by_o3d(
-            valid_points, camera
-        ).float()
-        visibility_map = torch.zeros((texture_dim * texture_dim,)).to(self.device)
-        visibility_map[mask] = points_visibility
-        visibility_map = visibility_map.reshape((texture_dim, texture_dim))
-        return visibility_map
-    @torch.enable_grad()
-    def bake_texture(
-        self,
-        views=None,
-        main_views=[],
-        cos_weighted=True,
-        channels=None,
-        exp=None,
-        noisy=False,
-        generator=None,
-        smooth_colorize=False,
-    ):
-        if not exp:
-            exp = 1
-        if not channels:
-            channels = self.channels
-        views = [view.permute(1, 2, 0) for view in views]
-        tmp_mesh = self.mesh
-        bake_maps = [
-            torch.zeros(
-                self.target_size + (views[0].shape[2],),
-                device=self.device,
-                requires_grad=True,
-            )
-            for view in views
-        ]
-        optimizer = torch.optim.SGD(bake_maps, lr=1, momentum=0)
-        optimizer.zero_grad()
-        loss = 0
-        for i in range(len(self.cameras)):
-            bake_tex = TexturesUV(
-                [bake_maps[i]],
-                tmp_mesh.textures.faces_uvs_padded(),
-                tmp_mesh.textures.verts_uvs_padded(),
-                sampling_mode=self.sampling_mode,
-            )
-            tmp_mesh.textures = bake_tex
-            images_predicted = self.renderer(
-                tmp_mesh,
-                cameras=self.cameras[i],
-                lights=self.lights,
-                device=self.device,
-            )
-            predicted_rgb = images_predicted[..., :-1]
-            loss += (((predicted_rgb[...] - views[i])) ** 2).sum()
-        loss.backward(retain_graph=False)
-        optimizer.step()
-        total_weights = 0
-        baked = 0
-        for i in range(len(bake_maps)):
-            normalized_baked_map = bake_maps[i].detach() / (
-                self.gradient_maps[i] + 1e-8
-            )
-            bake_map = voronoi_solve(
-                normalized_baked_map, self.gradient_maps[i][..., 0], self.device
-            )
-            # bake_map = voronoi_solve(normalized_baked_map, self.visible_triangles[i].squeeze())
-            weight = self.visible_triangles[i] * (self.cos_maps[i]) ** exp
-            if smooth_colorize:
-                visibility_map = self.hidden_judge(
-                    self.cameras[i], self.target_size[0]
-                ).unsqueeze(-1)
-                weight *= visibility_map
-            if noisy:
-                noise = (
-                    torch.rand(weight.shape[:-1] + (1,), generator=generator)
-                    .type(weight.dtype)
-                    .to(weight.device)
-                )
-                weight *= noise
-            total_weights += weight
-            baked += bake_map * weight
-        baked /= total_weights + 1e-8
-        whole_visible_mask = None
-        if not smooth_colorize:
-            baked = voronoi_solve(baked, total_weights[..., 0], self.device)
-            tmp_mesh.textures = TexturesUV(
-                [baked],
-                tmp_mesh.textures.faces_uvs_padded(),
-                tmp_mesh.textures.verts_uvs_padded(),
-                sampling_mode=self.sampling_mode,
-            )
-        else:  # smooth colorize
-            baked = voronoi_solve(baked, total_weights[..., 0], self.device)
-            whole_visible_mask = self.visible_triangles[0].to(torch.int32)
-            for tensor in self.visible_triangles[1:]:
-                whole_visible_mask = torch.bitwise_or(
-                    whole_visible_mask, tensor.to(torch.int32)
-                )
-            baked *= whole_visible_mask
-            tmp_mesh.textures = TexturesUV(
-                [baked],
-                tmp_mesh.textures.faces_uvs_padded(),
-                tmp_mesh.textures.verts_uvs_padded(),
-                sampling_mode=self.sampling_mode,
-            )
-        extended_mesh = tmp_mesh.extend(len(self.cameras))
-        images_predicted = self.renderer(
-            extended_mesh, cameras=self.cameras, lights=self.lights
-        )
-        learned_views = [image.permute(2, 0, 1) for image in images_predicted]
-        return learned_views, baked.permute(2, 0, 1), total_weights.permute(2, 0, 1)
-    # Move the internel data to a specific device
-    def to(self, device):
-        for mesh_name in ["mesh", "mesh_d", "mesh_uv"]:
-            if hasattr(self, mesh_name):
-                mesh = getattr(self, mesh_name)
-                setattr(self, mesh_name, mesh.to(device))
-        for list_name in ["visible_triangles", "visibility_maps", "cos_maps"]:
-            if hasattr(self, list_name):
-                map_list = getattr(self, list_name)
-                for i in range(len(map_list)):
-                    map_list[i] = map_list[i].to(device)

step1x3d_texture/renderer/shader.py DELETED Viewed

@@ -1,127 +0,0 @@
-from typing import Optional
-import torch
-import pytorch3d
-from pytorch3d.io import load_objs_as_meshes, load_obj, save_obj
-from pytorch3d.ops import interpolate_face_attributes
-from pytorch3d.structures import Meshes
-from pytorch3d.renderer import (
-    look_at_view_transform,
-    FoVPerspectiveCameras,
-    AmbientLights,
-    PointLights,
-    DirectionalLights,
-    Materials,
-    RasterizationSettings,
-    MeshRenderer,
-    MeshRasterizer,
-    SoftPhongShader,
-    SoftSilhouetteShader,
-    HardPhongShader,
-    TexturesVertex,
-    TexturesUV,
-    Materials,
-)
-from pytorch3d.renderer.blending import BlendParams, hard_rgb_blend
-from pytorch3d.renderer.utils import convert_to_tensors_and_broadcast, TensorProperties
-from pytorch3d.renderer.lighting import AmbientLights
-from pytorch3d.renderer.materials import Materials
-from pytorch3d.renderer.mesh.shader import ShaderBase
-from pytorch3d.renderer.mesh.shading import _apply_lighting, flat_shading
-from pytorch3d.renderer.mesh.rasterizer import Fragments
-"""
-	Customized the original pytorch3d hard flat shader to support N channel flat shading
-"""
-class HardNChannelFlatShader(ShaderBase):
-    """
-    Per face lighting - the lighting model is applied using the average face
-    position and the face normal. The blending function hard assigns
-    the color of the closest face for each pixel.
-    To use the default values, simply initialize the shader with the desired
-    device e.g.
-    .. code-block::
-            shader = HardFlatShader(device=torch.device("cuda:0"))
-    """
-    def __init__(
-        self,
-        device="cpu",
-        cameras: Optional[TensorProperties] = None,
-        lights: Optional[TensorProperties] = None,
-        materials: Optional[Materials] = None,
-        blend_params: Optional[BlendParams] = None,
-        channels: int = 3,
-    ):
-        self.channels = channels
-        ones = ((1.0,) * channels,)
-        zeros = ((0.0,) * channels,)
-        if (
-            not isinstance(lights, AmbientLights)
-            or not lights.ambient_color.shape[-1] == channels
-        ):
-            lights = AmbientLights(
-                ambient_color=ones,
-                device=device,
-            )
-        if not materials or not materials.ambient_color.shape[-1] == channels:
-            materials = Materials(
-                device=device,
-                diffuse_color=zeros,
-                ambient_color=ones,
-                specular_color=zeros,
-                shininess=0.0,
-            )
-        blend_params_new = BlendParams(background_color=(1.0,) * channels)
-        if not isinstance(blend_params, BlendParams):
-            blend_params = blend_params_new
-        else:
-            background_color_ = blend_params.background_color
-            if (
-                isinstance(background_color_, Sequence[float])
-                and not len(background_color_) == channels
-            ):
-                blend_params = blend_params_new
-            if (
-                isinstance(background_color_, torch.Tensor)
-                and not background_color_.shape[-1] == channels
-            ):
-                blend_params = blend_params_new
-        super().__init__(
-            device,
-            cameras,
-            lights,
-            materials,
-            blend_params,
-        )
-    def forward(self, fragments: Fragments, meshes: Meshes, **kwargs) -> torch.Tensor:
-        cameras = super()._get_cameras(**kwargs)
-        texels = meshes.sample_textures(fragments)
-        lights = kwargs.get("lights", self.lights)
-        materials = kwargs.get("materials", self.materials)
-        blend_params = kwargs.get("blend_params", self.blend_params)
-        colors = flat_shading(
-            meshes=meshes,
-            fragments=fragments,
-            texels=texels,
-            lights=lights,
-            cameras=cameras,
-            materials=materials,
-        )
-        images = hard_rgb_blend(colors, fragments, blend_params)
-        return images

step1x3d_texture/{renderer → texture_sync}/__init__.py RENAMED Viewed

File without changes

step1x3d_texture/texture_sync/geometry.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import torch
+import pytorch3d
+import torch.nn.functional as F
+from pytorch3d.ops import interpolate_face_attributes
+from pytorch3d.renderer import (
+	look_at_view_transform,
+	FoVPerspectiveCameras,
+	AmbientLights,
+	PointLights,
+	DirectionalLights,
+	Materials,
+	RasterizationSettings,
+	MeshRenderer,
+	MeshRasterizer,
+	SoftPhongShader,
+	SoftSilhouetteShader,
+	HardPhongShader,
+	TexturesVertex,
+	TexturesUV,
+	Materials,
+)
+from pytorch3d.renderer.blending import BlendParams,hard_rgb_blend
+from pytorch3d.renderer.utils import convert_to_tensors_and_broadcast, TensorProperties
+from pytorch3d.renderer.mesh.shader import ShaderBase
+def get_cos_angle(
+	points, normals, camera_position
+):
+	'''
+		calculate cosine similarity between view->surface and surface normal.
+	'''
+	if points.shape != normals.shape:
+		msg = "Expected points and normals to have the same shape: got %r, %r"
+		raise ValueError(msg % (points.shape, normals.shape))
+	# Ensure all inputs have same batch dimension as points
+	matched_tensors = convert_to_tensors_and_broadcast(
+		points, camera_position, device=points.device
+	)
+	_, camera_position = matched_tensors
+	# Reshape direction and color so they have all the arbitrary intermediate
+	# dimensions as points. Assume first dim = batch dim and last dim = 3.
+	points_dims = points.shape[1:-1]
+	expand_dims = (-1,) + (1,) * len(points_dims)
+	if camera_position.shape != normals.shape:
+		camera_position = camera_position.view(expand_dims + (3,))
+	normals = F.normalize(normals, p=2, dim=-1, eps=1e-6)
+	# Calculate the cosine value.
+	view_direction = camera_position - points
+	view_direction = F.normalize(view_direction, p=2, dim=-1, eps=1e-6)
+	cos_angle = torch.sum(view_direction * normals, dim=-1, keepdim=True)
+	cos_angle = cos_angle.clamp(0, 1)
+	# Cosine of the angle between the reflected light ray and the viewer
+	return cos_angle
+def _geometry_shading_with_pixels(
+	meshes, fragments, lights, cameras, materials, texels
+):
+	"""
+	Render pixel space vertex position, normal(world), depth, and cos angle
+	Args:
+		meshes: Batch of meshes
+		fragments: Fragments named tuple with the outputs of rasterization
+		lights: Lights class containing a batch of lights
+		cameras: Cameras class containing a batch of cameras
+		materials: Materials class containing a batch of material properties
+		texels: texture per pixel of shape (N, H, W, K, 3)
+	Returns:
+		colors: (N, H, W, K, 3)
+		pixel_coords: (N, H, W, K, 3), camera coordinates of each intersection.
+	"""
+	verts = meshes.verts_packed()  # (V, 3)
+	faces = meshes.faces_packed()  # (F, 3)
+	vertex_normals = meshes.verts_normals_packed()  # (V, 3)
+	faces_verts = verts[faces]
+	faces_normals = vertex_normals[faces]
+	pixel_coords_in_camera = interpolate_face_attributes(
+		fragments.pix_to_face, fragments.bary_coords, faces_verts
+	)
+	pixel_normals = interpolate_face_attributes(
+		fragments.pix_to_face, fragments.bary_coords, faces_normals
+	)
+	cos_angles = get_cos_angle(pixel_coords_in_camera, pixel_normals, cameras.get_camera_center())
+	return pixel_coords_in_camera, pixel_normals, fragments.zbuf[...,None], cos_angles
+class HardGeometryShader(ShaderBase):
+	"""
+	renders common geometric informations.
+	"""
+	def forward(self, fragments, meshes, **kwargs):
+		cameras = super()._get_cameras(**kwargs)
+		texels = self.texel_from_uv(fragments, meshes)
+		lights = kwargs.get("lights", self.lights)
+		materials = kwargs.get("materials", self.materials)
+		blend_params = kwargs.get("blend_params", self.blend_params)
+		verts, normals, depths, cos_angles = _geometry_shading_with_pixels(
+			meshes=meshes,
+			fragments=fragments,
+			texels=texels,
+			lights=lights,
+			cameras=cameras,
+			materials=materials,
+		)
+		verts = hard_rgb_blend(verts, fragments, blend_params)
+		normals = hard_rgb_blend(normals, fragments, blend_params)
+		depths = hard_rgb_blend(depths, fragments, blend_params)
+		cos_angles = hard_rgb_blend(cos_angles, fragments, blend_params)
+		texels = hard_rgb_blend(texels, fragments, blend_params)
+		return verts, normals, depths, cos_angles, texels, fragments
+	def texel_from_uv(self, fragments, meshes):
+		texture_tmp = meshes.textures
+		maps_tmp = texture_tmp.maps_padded()
+		uv_color = [ [[1,0],[1,1]],[[0,0],[0,1]] ]
+		uv_color = torch.FloatTensor(uv_color).to(maps_tmp[0].device).type(maps_tmp[0].dtype)
+		uv_texture = TexturesUV([uv_color.clone() for t in maps_tmp], texture_tmp.faces_uvs_padded(), texture_tmp.verts_uvs_padded(), sampling_mode="bilinear")
+		meshes.textures = uv_texture
+		texels = meshes.sample_textures(fragments)
+		meshes.textures = texture_tmp
+		texels  = torch.cat((texels, texels[...,-1:]*0), dim=-1)
+		return texels

step1x3d_texture/texture_sync/project.py ADDED Viewed

	@@ -0,0 +1,521 @@

+import torch
+import pytorch3d
+from pytorch3d.io import load_objs_as_meshes, load_obj, save_obj, IO
+from pytorch3d.structures import Meshes
+from pytorch3d.renderer import (
+	look_at_view_transform,
+	FoVPerspectiveCameras,
+	FoVOrthographicCameras,
+	AmbientLights,
+	PointLights,
+	DirectionalLights,
+	Materials,
+	RasterizationSettings,
+	MeshRenderer,
+	MeshRasterizer,
+	TexturesUV
+)
+from .geometry import HardGeometryShader
+from .shader import HardNChannelFlatShader
+from .voronoi import voronoi_solve
+from trimesh import Trimesh
+# Pytorch3D based renderering functions, managed in a class
+# Render size is recommended to be the same as your latent view size
+# DO NOT USE "bilinear" sampling when you are handling latents.
+# Stable Diffusion has 4 latent channels so use channels=4
+class UVProjection():
+	def __init__(self, texture_size=96, render_size=64, sampling_mode="nearest", channels=3, device=None):
+		self.channels = channels
+		self.device = device or torch.device("cpu")
+		self.lights = AmbientLights(ambient_color=((1.0,)*channels,), device=self.device)
+		self.target_size = (texture_size,texture_size)
+		self.render_size = render_size
+		self.sampling_mode = sampling_mode
+	# # Load obj mesh, rescale the mesh to fit into the bounding box
+	# def load_mesh(self, mesh_path, scale_factor=2.0, auto_center=True, autouv=False):
+	# 	mesh = load_objs_as_meshes([mesh_path], device=self.device)
+	# 	if auto_center:
+	# 		verts = mesh.verts_packed()
+	# 		max_bb = (verts - 0).max(0)[0]
+	# 		min_bb = (verts - 0).min(0)[0]
+	# 		scale = (max_bb - min_bb).max()/2
+	# 		center = (max_bb+min_bb) /2
+	# 		mesh.offset_verts_(-center)
+	# 		mesh.scale_verts_((scale_factor / float(scale)))
+	# 	else:
+	# 		mesh.scale_verts_((scale_factor))
+	# 	if autouv or (mesh.textures is None):
+	# 		mesh = self.uv_unwrap(mesh)
+	# 	self.mesh = mesh
+		# Load obj mesh, rescale the mesh to fit into the bounding box
+	def load_mesh(self, mesh, scale_factor=2.0, auto_center=True, autouv=False, normals=None):
+		if isinstance(mesh, Trimesh):
+			vertices = torch.tensor(mesh.vertices, dtype=torch.float32).to(self.device)
+			faces = torch.tensor(mesh.faces, dtype=torch.int64).to(self.device)
+			if faces.ndim == 1:
+				faces = faces.unsqueeze(0)
+			mesh = Meshes(
+				verts=[vertices],
+				faces=[faces]
+			)
+			verts = mesh.verts_packed()
+			mesh = mesh.update_padded(verts[None,:, :])
+			# from pytorch3d.renderer.mesh.textures import TexturesVertex
+			# if normals is None:
+			# 	normals = mesh.verts_normals_packed()
+			# # set normals as vertext colors
+			# mesh.textures = TexturesVertex(verts_features=[normals / 2 + 0.5])
+		elif isinstance(mesh, str) and os.path.isfile(mesh):
+			mesh = load_objs_as_meshes([mesh_path], device=self.device)
+			if auto_center:
+				verts = mesh.verts_packed()
+				max_bb = (verts - 0).max(0)[0]
+				min_bb = (verts - 0).min(0)[0]
+				scale = (max_bb - min_bb).max()/2
+				center = (max_bb+min_bb) /2
+				mesh.offset_verts_(-center)
+				mesh.scale_verts_((scale_factor / float(scale)))
+			else:
+				mesh.scale_verts_((scale_factor))
+		if autouv or (mesh.textures is None):
+			mesh = self.uv_unwrap(mesh)
+		self.mesh = mesh
+	def load_glb_mesh(self, mesh_path, scale_factor=2.0, auto_center=True, autouv=False):
+		from pytorch3d.io.experimental_gltf_io import MeshGlbFormat
+		io = IO()
+		io.register_meshes_format(MeshGlbFormat())
+		with open(mesh_path, "rb") as f:
+			mesh = io.load_mesh(f, include_textures=True, device=self.device)
+		if auto_center:
+			verts = mesh.verts_packed()
+			max_bb = (verts - 0).max(0)[0]
+			min_bb = (verts - 0).min(0)[0]
+			scale = (max_bb - min_bb).max()/2
+			center = (max_bb+min_bb) /2
+			mesh.offset_verts_(-center)
+			mesh.scale_verts_((scale_factor / float(scale)))
+		else:
+			mesh.scale_verts_((scale_factor))
+		if autouv or (mesh.textures is None):
+			mesh = self.uv_unwrap(mesh)
+		self.mesh = mesh
+	# Save obj mesh
+	def save_mesh(self, mesh_path, texture):
+		save_obj(mesh_path,
+				self.mesh.verts_list()[0],
+				self.mesh.faces_list()[0],
+				verts_uvs= self.mesh.textures.verts_uvs_list()[0],
+				faces_uvs= self.mesh.textures.faces_uvs_list()[0],
+				texture_map=texture)
+	# Code referred to TEXTure code (https://github.com/TEXTurePaper/TEXTurePaper.git)
+	def uv_unwrap(self, mesh):
+		verts_list = mesh.verts_list()[0]
+		faces_list = mesh.faces_list()[0]
+		import xatlas
+		import numpy as np
+		v_np = verts_list.cpu().numpy()
+		f_np = faces_list.int().cpu().numpy()
+		atlas = xatlas.Atlas()
+		atlas.add_mesh(v_np, f_np)
+		chart_options = xatlas.ChartOptions()
+		chart_options.max_iterations = 4
+		atlas.generate(chart_options=chart_options)
+		vmapping, ft_np, vt_np = atlas[0]  # [N], [M, 3], [N, 2]
+		vt = torch.from_numpy(vt_np.astype(np.float32)).type(verts_list.dtype).to(mesh.device)
+		ft = torch.from_numpy(ft_np.astype(np.int64)).type(faces_list.dtype).to(mesh.device)
+		new_map = torch.zeros(self.target_size+(self.channels,), device=mesh.device)
+		new_tex = TexturesUV(
+			[new_map],
+			[ft],
+			[vt],
+			sampling_mode=self.sampling_mode
+			)
+		mesh.textures = new_tex
+		return mesh
+	'''
+		A functions that disconnect faces in the mesh according to
+		its UV seams. The number of vertices are made equal to the
+		number of unique vertices its UV layout, while the faces list
+		is intact.
+	'''
+	def disconnect_faces(self):
+		mesh = self.mesh
+		verts_list = mesh.verts_list()
+		faces_list = mesh.faces_list()
+		verts_uvs_list = mesh.textures.verts_uvs_list()
+		faces_uvs_list = mesh.textures.faces_uvs_list()
+		packed_list = [v[f] for v,f in zip(verts_list, faces_list)]
+		verts_disconnect_list = [
+			torch.zeros(
+				(verts_uvs_list[i].shape[0], 3),
+				dtype=verts_list[0].dtype,
+				device=verts_list[0].device
+			)
+			for i in range(len(verts_list))]
+		for i in range(len(verts_list)):
+			verts_disconnect_list[i][faces_uvs_list] = packed_list[i]
+		assert not mesh.has_verts_normals(), "Not implemented for vertex normals"
+		self.mesh_d = Meshes(verts_disconnect_list, faces_uvs_list, mesh.textures)
+		return self.mesh_d
+	'''
+		A function that construct a temp mesh for back-projection.
+		Take a disconnected mesh and a rasterizer, the function calculates
+		the projected faces as the UV, as use its original UV with pseudo
+		z value as world space geometry.
+	'''
+	def construct_uv_mesh(self):
+		mesh = self.mesh_d
+		verts_list = mesh.verts_list()
+		verts_uvs_list = mesh.textures.verts_uvs_list()
+		# faces_list = [torch.flip(faces, [-1]) for faces in mesh.faces_list()]
+		new_verts_list = []
+		for i, (verts, verts_uv) in enumerate(zip(verts_list, verts_uvs_list)):
+			verts = verts.clone()
+			verts_uv = verts_uv.clone()
+			verts[...,0:2] = verts_uv[...,:]
+			verts = (verts - 0.5) * 2
+			verts[...,2] *= 1
+			new_verts_list.append(verts)
+		textures_uv = mesh.textures.clone()
+		self.mesh_uv = Meshes(new_verts_list, mesh.faces_list(), textures_uv)
+		return self.mesh_uv
+	# Set texture for the current mesh.
+	def set_texture_map(self, texture):
+		new_map = texture.permute(1, 2, 0)
+		new_map = new_map.to(self.device)
+		new_tex = TexturesUV(
+			[new_map],
+			self.mesh.textures.faces_uvs_padded(),
+			self.mesh.textures.verts_uvs_padded(),
+			sampling_mode=self.sampling_mode
+			)
+		self.mesh.textures = new_tex
+	# Set the initial normal noise texture
+	# No generator here for replication of the experiment result. Add one as you wish
+	def set_noise_texture(self, channels=None):
+		if not channels:
+			channels = self.channels
+		noise_texture = torch.normal(0, 1, (channels,) + self.target_size, device=self.device)
+		self.set_texture_map(noise_texture)
+		return noise_texture
+	# Set the cameras given the camera poses and centers
+	def set_cameras(self, camera_poses, centers=None, camera_distance=2.7, scale=None):
+		elev = torch.FloatTensor([pose[0] for pose in camera_poses])
+		azim = torch.FloatTensor([pose[1] for pose in camera_poses])
+		R, T = look_at_view_transform(dist=camera_distance, elev=elev, azim=azim, at=centers or ((0,0,0),))
+		# self.cameras = FoVOrthographicCameras(device=self.device, R=R, T=T, scale_xyz=scale or ((1,1,1),))
+		self.cameras = FoVOrthographicCameras(device=self.device, R=R, T=T, scale_xyz=scale or ((1,1,1),), znear=0.1, min_x=-0.55, max_x=0.55, min_y=-0.55, max_y=0.55)
+	# Set all necessary internal data for rendering and texture baking
+	# Can be used to refresh after changing camera positions
+	def set_cameras_and_render_settings(self, camera_poses, centers=None, camera_distance=2.7, render_size=None, scale=None):
+		self.set_cameras(camera_poses, centers, camera_distance, scale=scale)
+		if render_size is None:
+			render_size = self.render_size
+		if not hasattr(self, "renderer"):
+			self.setup_renderer(size=render_size)
+		if not hasattr(self, "mesh_d"):
+			self.disconnect_faces()
+		if not hasattr(self, "mesh_uv"):
+			self.construct_uv_mesh()
+		self.calculate_tex_gradient()
+		self.calculate_visible_triangle_mask()
+		_,_,_,cos_maps,_, _ = self.render_geometry()
+		self.calculate_cos_angle_weights(cos_maps)
+	# Setup renderers for rendering
+	# max faces per bin set to 30000 to avoid overflow in many test cases.
+	# You can use default value to let pytorch3d handle that for you.
+	def setup_renderer(self, size=64, blur=0.0, face_per_pix=1, perspective_correct=False, channels=None):
+		if not channels:
+			channels = self.channels
+		self.raster_settings = RasterizationSettings(
+			image_size=size,
+			blur_radius=blur,
+			faces_per_pixel=face_per_pix,
+			perspective_correct=perspective_correct,
+			cull_backfaces=True,
+			max_faces_per_bin=30000,
+		)
+		self.renderer = MeshRenderer(
+			rasterizer=MeshRasterizer(
+				cameras=self.cameras,
+				raster_settings=self.raster_settings,
+			),
+			shader=HardNChannelFlatShader(
+				device=self.device,
+				cameras=self.cameras,
+				lights=self.lights,
+				channels=channels
+				# materials=materials
+			)
+		)
+	# Bake screen-space cosine weights to UV space
+	# May be able to reimplement using the generic "bake_texture" function, but it works so leave it here for now
+	@torch.enable_grad()
+	def calculate_cos_angle_weights(self, cos_angles, fill=True, channels=None):
+		if not channels:
+			channels = self.channels
+		cos_maps = []
+		tmp_mesh = self.mesh.clone()
+		for i in range(len(self.cameras)):
+			zero_map = torch.zeros(self.target_size+(channels,), device=self.device, requires_grad=True)
+			optimizer = torch.optim.SGD([zero_map], lr=1, momentum=0)
+			optimizer.zero_grad()
+			zero_tex = TexturesUV([zero_map], self.mesh.textures.faces_uvs_padded(), self.mesh.textures.verts_uvs_padded(), sampling_mode=self.sampling_mode)
+			tmp_mesh.textures = zero_tex
+			images_predicted = self.renderer(tmp_mesh, cameras=self.cameras[i], lights=self.lights)
+			loss = torch.sum((cos_angles[i,:,:,0:1]**1 - images_predicted)**2)
+			loss.backward()
+			optimizer.step()
+			if fill:
+				zero_map = zero_map.detach() / (self.gradient_maps[i] + 1E-8)
+				zero_map = voronoi_solve(zero_map, self.gradient_maps[i][...,0])
+			else:
+				zero_map = zero_map.detach() / (self.gradient_maps[i]+1E-8)
+			cos_maps.append(zero_map)
+		self.cos_maps = cos_maps
+	# Get geometric info from fragment shader
+	# Can be used for generating conditioning image and cosine weights
+	# Returns some information you may not need, remember to release them for memory saving
+	@torch.no_grad()
+	def render_geometry(self, image_size=None):
+		if image_size:
+			size = self.renderer.rasterizer.raster_settings.image_size
+			self.renderer.rasterizer.raster_settings.image_size = image_size
+		shader = self.renderer.shader
+		self.renderer.shader = HardGeometryShader(device=self.device, cameras=self.cameras[0], lights=self.lights)
+		tmp_mesh = self.mesh.clone()
+		verts, normals, depths, cos_angles, texels, fragments = self.renderer(tmp_mesh.extend(len(self.cameras)), cameras=self.cameras, lights=self.lights)
+		self.renderer.shader = shader
+		if image_size:
+			self.renderer.rasterizer.raster_settings.image_size = size
+		return verts, normals, depths, cos_angles, texels, fragments
+	# Project world normal to view space and normalize
+	@torch.no_grad()
+	def decode_view_normal(self, normals):
+		w2v_mat = self.cameras.get_full_projection_transform()
+		normals_view = torch.clone(normals)[:,:,:,0:3]
+		normals_view = normals_view.reshape(normals_view.shape[0], -1, 3)
+		normals_view = w2v_mat.transform_normals(normals_view)
+		normals_view = normals_view.reshape(normals.shape[0:3]+(3,))
+		normals_view[:,:,:,2] *= -1
+		normals = (normals_view[...,0:3]+1) * normals[...,3:] / 2 + torch.FloatTensor(((((0.5,0.5,1))))).to(self.device) * (1 - normals[...,3:])
+		# normals = torch.cat([normal for normal in normals], dim=1)
+		normals = normals.clamp(0, 1)
+		return normals
+	# Normalize absolute depth to inverse depth
+	@torch.no_grad()
+	def decode_normalized_depth(self, depths, batched_norm=False):
+		view_z, mask = depths.unbind(-1)
+		view_z = view_z * mask + 100 * (1-mask)
+		inv_z = 1 / view_z
+		inv_z_min = inv_z * mask + 100 * (1-mask)
+		if not batched_norm:
+			max_ = torch.max(inv_z, 1, keepdim=True)
+			max_ = torch.max(max_[0], 2, keepdim=True)[0]
+			min_ = torch.min(inv_z_min, 1, keepdim=True)
+			min_ = torch.min(min_[0], 2, keepdim=True)[0]
+		else:
+			max_ = torch.max(inv_z)
+			min_ = torch.min(inv_z_min)
+		inv_z = (inv_z - min_) / (max_ - min_)
+		inv_z = inv_z.clamp(0,1)
+		inv_z = inv_z[...,None].repeat(1,1,1,3)
+		return inv_z
+	# Multiple screen pixels could pass gradient to a same texel
+	# We can precalculate this gradient strength and use it to normalize gradients when we bake textures
+	@torch.enable_grad()
+	def calculate_tex_gradient(self, channels=None):
+		if not channels:
+			channels = self.channels
+		tmp_mesh = self.mesh.clone()
+		gradient_maps = []
+		for i in range(len(self.cameras)):
+			zero_map = torch.zeros(self.target_size+(channels,), device=self.device, requires_grad=True)
+			optimizer = torch.optim.SGD([zero_map], lr=1, momentum=0)
+			optimizer.zero_grad()
+			zero_tex = TexturesUV([zero_map], self.mesh.textures.faces_uvs_padded(), self.mesh.textures.verts_uvs_padded(), sampling_mode=self.sampling_mode)
+			tmp_mesh.textures = zero_tex
+			images_predicted = self.renderer(tmp_mesh, cameras=self.cameras[i], lights=self.lights)
+			loss = torch.sum((1 - images_predicted)**2)
+			loss.backward()
+			optimizer.step()
+			gradient_maps.append(zero_map.detach())
+		self.gradient_maps = gradient_maps
+	# Get the UV space masks of triangles visible in each view
+	# First get face ids from each view, then filter pixels on UV space to generate masks
+	@torch.no_grad()
+	def calculate_visible_triangle_mask(self, channels=None, image_size=(512,512)):
+		if not channels:
+			channels = self.channels
+		pix2face_list = []
+		for i in range(len(self.cameras)):
+			self.renderer.rasterizer.raster_settings.image_size=image_size
+			pix2face = self.renderer.rasterizer(self.mesh_d, cameras=self.cameras[i]).pix_to_face
+			self.renderer.rasterizer.raster_settings.image_size=self.render_size
+			pix2face_list.append(pix2face)
+		if not hasattr(self, "mesh_uv"):
+			self.construct_uv_mesh()
+		raster_settings = RasterizationSettings(
+			image_size=self.target_size,
+			blur_radius=0,
+			faces_per_pixel=1,
+			perspective_correct=False,
+			cull_backfaces=False,
+			max_faces_per_bin=30000,
+			)
+		R, T = look_at_view_transform(dist=2, elev=0, azim=0)
+		cameras = FoVOrthographicCameras(device=self.device, R=R, T=T)
+		rasterizer=MeshRasterizer(
+			cameras=cameras,
+			raster_settings=raster_settings
+		)
+		uv_pix2face = rasterizer(self.mesh_uv).pix_to_face
+		visible_triangles = []
+		for i in range(len(pix2face_list)):
+			valid_faceid = torch.unique(pix2face_list[i])
+			valid_faceid = valid_faceid[1:] if valid_faceid[0]==-1 else valid_faceid
+			mask = torch.isin(uv_pix2face[0], valid_faceid, assume_unique=False)
+			# uv_pix2face[0][~mask] = -1
+			triangle_mask = torch.ones(self.target_size+(1,), device=self.device)
+			triangle_mask[~mask] = 0
+			triangle_mask[:,1:][triangle_mask[:,:-1] > 0] = 1
+			triangle_mask[:,:-1][triangle_mask[:,1:] > 0] = 1
+			triangle_mask[1:,:][triangle_mask[:-1,:] > 0] = 1
+			triangle_mask[:-1,:][triangle_mask[1:,:] > 0] = 1
+			visible_triangles.append(triangle_mask)
+		self.visible_triangles = visible_triangles
+	# Render the current mesh and texture from current cameras
+	def render_textured_views(self):
+		meshes = self.mesh.extend(len(self.cameras))
+		images_predicted = self.renderer(meshes, cameras=self.cameras, lights=self.lights)
+		return [image.permute(2, 0, 1) for image in images_predicted]
+	# Bake views into a texture
+	# First bake into individual textures then combine based on cosine weight
+	@torch.enable_grad()
+	def bake_texture(self, views=None, main_views=[], cos_weighted=True, channels=None, exp=None, noisy=False, generator=None):
+		if not exp:
+			exp=1
+		if not channels:
+			channels = self.channels
+		views = [view.permute(1, 2, 0) for view in views]
+		tmp_mesh = self.mesh
+		bake_maps = [torch.zeros(self.target_size+(views[0].shape[2],), device=self.device, requires_grad=True) for view in views]
+		optimizer = torch.optim.SGD(bake_maps, lr=1, momentum=0)
+		optimizer.zero_grad()
+		loss = 0
+		for i in range(len(self.cameras)):
+			bake_tex = TexturesUV([bake_maps[i]], tmp_mesh.textures.faces_uvs_padded(), tmp_mesh.textures.verts_uvs_padded(), sampling_mode=self.sampling_mode)
+			tmp_mesh.textures = bake_tex
+			images_predicted = self.renderer(tmp_mesh, cameras=self.cameras[i], lights=self.lights, device=self.device)
+			predicted_rgb = images_predicted[..., :-1]
+			loss += (((predicted_rgb[...] - views[i]))**2).sum()
+		loss.backward(retain_graph=False)
+		optimizer.step()
+		total_weights = 0
+		baked = 0
+		for i in range(len(bake_maps)):
+			normalized_baked_map = bake_maps[i].detach() / (self.gradient_maps[i] + 1E-8)
+			bake_map = voronoi_solve(normalized_baked_map, self.gradient_maps[i][...,0])
+			weight = self.visible_triangles[i] * (self.cos_maps[i]) ** exp
+			if noisy:
+				noise = torch.rand(weight.shape[:-1]+(1,), generator=generator).type(weight.dtype).to(weight.device)
+				weight *= noise
+			total_weights += weight
+			baked += bake_map * weight
+		baked /= total_weights + 1E-8
+		baked = voronoi_solve(baked, total_weights[...,0])
+		bake_tex = TexturesUV([baked], tmp_mesh.textures.faces_uvs_padded(), tmp_mesh.textures.verts_uvs_padded(), sampling_mode=self.sampling_mode)
+		tmp_mesh.textures = bake_tex
+		extended_mesh = tmp_mesh.extend(len(self.cameras))
+		images_predicted = self.renderer(extended_mesh, cameras=self.cameras, lights=self.lights)
+		learned_views = [image.permute(2, 0, 1) for image in images_predicted]
+		return learned_views, baked.permute(2, 0, 1), total_weights.permute(2, 0, 1)
+	# Move the internel data to a specific device
+	def to(self, device):
+		for mesh_name in ["mesh", "mesh_d", "mesh_uv"]:
+			if hasattr(self, mesh_name):
+				mesh = getattr(self, mesh_name)
+				setattr(self, mesh_name, mesh.to(device))
+		for list_name in ["visible_triangles", "visibility_maps", "cos_maps"]:
+			if hasattr(self, list_name):
+				map_list = getattr(self, list_name)
+				for i in range(len(map_list)):
+					map_list[i] = map_list[i].to(device)

step1x3d_texture/texture_sync/shader.py ADDED Viewed

	@@ -0,0 +1,118 @@

+from typing import Optional
+import torch
+import pytorch3d
+from pytorch3d.io import load_objs_as_meshes, load_obj, save_obj
+from pytorch3d.ops import interpolate_face_attributes
+from pytorch3d.structures import Meshes
+from pytorch3d.renderer import (
+	look_at_view_transform,
+	FoVPerspectiveCameras,
+	AmbientLights,
+	PointLights,
+	DirectionalLights,
+	Materials,
+	RasterizationSettings,
+	MeshRenderer,
+	MeshRasterizer,
+	SoftPhongShader,
+	SoftSilhouetteShader,
+	HardPhongShader,
+	TexturesVertex,
+	TexturesUV,
+	Materials,
+)
+from pytorch3d.renderer.blending import BlendParams,hard_rgb_blend
+from pytorch3d.renderer.utils import convert_to_tensors_and_broadcast, TensorProperties
+from pytorch3d.renderer.lighting import AmbientLights
+from pytorch3d.renderer.materials import Materials
+from pytorch3d.renderer.mesh.shader import ShaderBase
+from pytorch3d.renderer.mesh.shading import _apply_lighting, flat_shading
+from pytorch3d.renderer.mesh.rasterizer import Fragments
+'''
+	Customized the original pytorch3d hard flat shader to support N channel flat shading
+'''
+class HardNChannelFlatShader(ShaderBase):
+	"""
+	Per face lighting - the lighting model is applied using the average face
+	position and the face normal. The blending function hard assigns
+	the color of the closest face for each pixel.
+	To use the default values, simply initialize the shader with the desired
+	device e.g.
+	.. code-block::
+		shader = HardFlatShader(device=torch.device("cuda:0"))
+	"""
+	def __init__(
+		self,
+		device = "cpu",
+		cameras: Optional[TensorProperties] = None,
+		lights: Optional[TensorProperties] = None,
+		materials: Optional[Materials] = None,
+		blend_params: Optional[BlendParams] = None,
+		channels: int = 3,
+	):
+		self.channels = channels
+		ones = ((1.0,)*channels,)
+		zeros = ((0.0,)*channels,)
+		if not isinstance(lights, AmbientLights) or not lights.ambient_color.shape[-1] == channels:
+			lights = AmbientLights(
+				ambient_color=ones,
+				device=device,
+			)
+		if not materials or not materials.ambient_color.shape[-1] == channels:
+			materials = Materials(
+				device=device,
+				diffuse_color=zeros,
+				ambient_color=ones,
+				specular_color=zeros,
+				shininess=0.0,
+			)
+		blend_params_new = BlendParams(background_color=(1.0,)*channels)
+		if not isinstance(blend_params, BlendParams):
+			blend_params = blend_params_new
+		else:
+			background_color_ = blend_params.background_color
+			if isinstance(background_color_, Sequence[float]) and not len(background_color_) == channels:
+				blend_params = blend_params_new
+			if isinstance(background_color_, torch.Tensor) and not background_color_.shape[-1] == channels:
+				blend_params = blend_params_new
+		super().__init__(
+			device,
+			cameras,
+			lights,
+			materials,
+			blend_params,
+		)
+	def forward(self, fragments: Fragments, meshes: Meshes, **kwargs) -> torch.Tensor:
+		cameras = super()._get_cameras(**kwargs)
+		texels = meshes.sample_textures(fragments)
+		lights = kwargs.get("lights", self.lights)
+		materials = kwargs.get("materials", self.materials)
+		blend_params = kwargs.get("blend_params", self.blend_params)
+		colors = flat_shading(
+			meshes=meshes,
+			fragments=fragments,
+			texels=texels,
+			lights=lights,
+			cameras=cameras,
+			materials=materials,
+		)
+		images = hard_rgb_blend(colors, fragments, blend_params)
+		return images

step1x3d_texture/texture_sync/step_sync.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import torch
+from diffusers.utils.torch_utils import randn_tensor
+'''
+	Customized Step Function
+	step on texture
+'''
+@torch.no_grad()
+def step_tex_sync(
+		scheduler,
+		uvp,
+		model_output: torch.FloatTensor,
+		timestep: int,
+		sample: torch.FloatTensor,
+		texture: None,
+		generator=None,
+		return_dict: bool = True,
+		guidance_scale = 1,
+		main_views = [],
+		hires_original_views = True,
+		exp=None,
+		cos_weighted=True
+):
+	t = timestep
+	prev_t = scheduler.previous_timestep(t)
+	if model_output.shape[1] == sample.shape[1] * 2 and scheduler.variance_type in ["learned", "learned_range"]:
+		model_output, predicted_variance = torch.split(model_output, sample.shape[1], dim=1)
+	else:
+		predicted_variance = None
+	# 1. compute alphas, betas
+	alpha_prod_t = scheduler.alphas_cumprod[t]
+	alpha_prod_t_prev = scheduler.alphas_cumprod[prev_t] if prev_t >= 0 else scheduler.one
+	beta_prod_t = 1 - alpha_prod_t
+	beta_prod_t_prev = 1 - alpha_prod_t_prev
+	current_alpha_t = alpha_prod_t / alpha_prod_t_prev
+	current_beta_t = 1 - current_alpha_t
+	# 2. compute predicted original sample from predicted noise also called
+	# "predicted x_0" of formula (15) from https://arxiv.org/pdf/2006.11239.pdf
+	if scheduler.config.prediction_type == "epsilon":
+		pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
+	elif scheduler.config.prediction_type == "sample":
+		pred_original_sample = model_output
+	elif scheduler.config.prediction_type == "v_prediction":
+		pred_original_sample = (alpha_prod_t**0.5) * sample - (beta_prod_t**0.5) * model_output
+	else:
+		raise ValueError(
+			f"prediction_type given as {scheduler.config.prediction_type} must be one of `epsilon`, `sample` or"
+			" `v_prediction`  for the DDPMScheduler."
+		)
+	# 3. Clip or threshold "predicted x_0"
+	if scheduler.config.thresholding:
+		pred_original_sample = scheduler._threshold_sample(pred_original_sample)
+	elif scheduler.config.clip_sample:
+		pred_original_sample = pred_original_sample.clamp(
+			-scheduler.config.clip_sample_range, scheduler.config.clip_sample_range
+		)
+	# 4. Compute coefficients for pred_original_sample x_0 and current sample x_t
+	# See formula (7) from https://arxiv.org/pdf/2006.11239.pdf
+	pred_original_sample_coeff = (alpha_prod_t_prev ** (0.5) * current_beta_t) / beta_prod_t
+	current_sample_coeff = current_alpha_t ** (0.5) * beta_prod_t_prev / beta_prod_t
+	'''
+		Add multidiffusion here
+	'''
+	if texture is None:
+		sample_views = [view for view in sample]
+		sample_views, texture, _ = uvp.bake_texture(views=sample_views, main_views=main_views, exp=exp)
+		sample_views = torch.stack(sample_views, axis=0)[:,:-1,...]
+	original_views = [view for view in pred_original_sample]
+	original_views, original_tex, visibility_weights = uvp.bake_texture(views=original_views, main_views=main_views, exp=exp)
+	uvp.set_texture_map(original_tex)
+	original_views = uvp.render_textured_views()
+	original_views = torch.stack(original_views, axis=0)[:,:-1,...]
+	# 5. Compute predicted previous sample µ_t
+	# See formula (7) from https://arxiv.org/pdf/2006.11239.pdf
+	# pred_prev_sample = pred_original_sample_coeff * pred_original_sample + current_sample_coeff * sample
+	prev_tex = pred_original_sample_coeff * original_tex + current_sample_coeff * texture
+	# 6. Add noise
+	variance = 0
+	if predicted_variance is not None:
+		variance_views = [view for view in predicted_variance]
+		variance_views, variance_tex, visibility_weights = uvp.bake_texture(views=variance_views, main_views=main_views, cos_weighted=cos_weighted, exp=exp)
+		variance_views = torch.stack(variance_views, axis=0)[:,:-1,...]
+	else:
+		variance_tex = None
+	if t > 0:
+		device = texture.device
+		variance_noise = randn_tensor(
+			texture.shape, generator=generator, device=device, dtype=texture.dtype
+		)
+		if scheduler.variance_type == "fixed_small_log":
+			variance = scheduler._get_variance(t, predicted_variance=variance_tex) * variance_noise
+		elif scheduler.variance_type == "learned_range":
+			variance = scheduler._get_variance(t, predicted_variance=variance_tex)
+			variance = torch.exp(0.5 * variance) * variance_noise
+		else:
+			variance = (scheduler._get_variance(t, predicted_variance=variance_tex) ** 0.5) * variance_noise
+	prev_tex = prev_tex + variance
+	uvp.set_texture_map(prev_tex)
+	prev_views = uvp.render_textured_views()
+	pred_prev_sample = torch.clone(sample)
+	for i, view in enumerate(prev_views):
+		pred_prev_sample[i] = view[:-1]
+	masks = [view[-1:] for view in prev_views]
+	return {"prev_sample": pred_prev_sample, "pred_original_sample":pred_original_sample, "prev_tex": prev_tex}
+	if not return_dict:
+		return pred_prev_sample, pred_original_sample
+	pass

step1x3d_texture/{renderer → texture_sync}/voronoi.py RENAMED Viewed

File without changes