|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import cv2 |
|
import torch |
|
import trimesh |
|
import numpy as np |
|
from PIL import Image |
|
import torch.nn.functional as F |
|
from typing import Union, Optional, Tuple, List, Any, Callable |
|
from dataclasses import dataclass |
|
from enum import Enum |
|
from .camera_utils import ( |
|
transform_pos, |
|
get_mv_matrix, |
|
get_orthographic_projection_matrix, |
|
get_perspective_projection_matrix, |
|
) |
|
|
|
try: |
|
from .mesh_utils import load_mesh, save_mesh |
|
except: |
|
print("Bpy IO CAN NOT BE Imported!!!") |
|
|
|
try: |
|
from .mesh_inpaint_processor import meshVerticeInpaint |
|
except: |
|
print("InPaint Function CAN NOT BE Imported!!!") |
|
|
|
|
|
class RenderMode(Enum): |
|
"""Rendering mode enumeration.""" |
|
NORMAL = "normal" |
|
POSITION = "position" |
|
ALPHA = "alpha" |
|
UV_POS = "uvpos" |
|
|
|
|
|
class ReturnType(Enum): |
|
"""Return type enumeration.""" |
|
TENSOR = "th" |
|
NUMPY = "np" |
|
PIL = "pl" |
|
|
|
|
|
class TextureType(Enum): |
|
"""Texture type enumeration.""" |
|
DIFFUSE = "diffuse" |
|
METALLIC_ROUGHNESS = "mr" |
|
NORMAL = "normal" |
|
|
|
|
|
@dataclass |
|
class RenderConfig: |
|
"""Unified rendering configuration.""" |
|
elev: float = 0 |
|
azim: float = 0 |
|
camera_distance: Optional[float] = None |
|
center: Optional[List[float]] = None |
|
resolution: Optional[Union[int, Tuple[int, int]]] = None |
|
bg_color: List[float] = None |
|
return_type: str = "th" |
|
|
|
def __post_init__(self): |
|
if self.bg_color is None: |
|
self.bg_color = [1, 1, 1] |
|
|
|
|
|
@dataclass |
|
class ViewState: |
|
"""Camera view state for rendering pipeline.""" |
|
proj_mat: torch.Tensor |
|
mv_mat: torch.Tensor |
|
pos_camera: torch.Tensor |
|
pos_clip: torch.Tensor |
|
resolution: Tuple[int, int] |
|
|
|
|
|
def stride_from_shape(shape): |
|
""" |
|
Calculate stride values from a given shape for multi-dimensional indexing. |
|
|
|
Args: |
|
shape: Tuple or list representing tensor dimensions |
|
|
|
Returns: |
|
List of stride values for each dimension |
|
""" |
|
stride = [1] |
|
for x in reversed(shape[1:]): |
|
stride.append(stride[-1] * x) |
|
return list(reversed(stride)) |
|
|
|
|
|
def scatter_add_nd_with_count(input, count, indices, values, weights=None): |
|
""" |
|
Perform scatter-add operation on N-dimensional tensors with counting. |
|
|
|
Args: |
|
input: Input tensor [..., C] with D dimensions + C channels |
|
count: Count tensor [..., 1] with D dimensions |
|
indices: Index tensor [N, D] of type long |
|
values: Value tensor [N, C] to scatter |
|
weights: Optional weight tensor [N, C], defaults to ones if None |
|
|
|
Returns: |
|
Tuple of (updated_input, updated_count) tensors |
|
""" |
|
|
|
|
|
|
|
|
|
|
|
D = indices.shape[-1] |
|
C = input.shape[-1] |
|
size = input.shape[:-1] |
|
stride = stride_from_shape(size) |
|
|
|
assert len(size) == D |
|
|
|
input = input.view(-1, C) |
|
count = count.view(-1, 1) |
|
|
|
flatten_indices = (indices * torch.tensor(stride, dtype=torch.long, device=indices.device)).sum(-1) |
|
|
|
if weights is None: |
|
weights = torch.ones_like(values[..., :1]) |
|
|
|
input.scatter_add_(0, flatten_indices.unsqueeze(1).repeat(1, C), values) |
|
count.scatter_add_(0, flatten_indices.unsqueeze(1), weights) |
|
|
|
return input.view(*size, C), count.view(*size, 1) |
|
|
|
|
|
def linear_grid_put_2d(H, W, coords, values, return_count=False): |
|
""" |
|
Place values on a 2D grid using bilinear interpolation. |
|
|
|
Args: |
|
H: Grid height |
|
W: Grid width |
|
coords: Coordinate tensor [N, 2] with values in range [0, 1] |
|
values: Value tensor [N, C] to place on grid |
|
return_count: Whether to return count information |
|
|
|
Returns: |
|
2D grid tensor [H, W, C] with interpolated values, optionally with count tensor |
|
""" |
|
|
|
|
|
|
|
C = values.shape[-1] |
|
|
|
indices = coords * torch.tensor([H - 1, W - 1], dtype=torch.float32, device=coords.device) |
|
indices_00 = indices.floor().long() |
|
indices_00[:, 0].clamp_(0, H - 2) |
|
indices_00[:, 1].clamp_(0, W - 2) |
|
indices_01 = indices_00 + torch.tensor([0, 1], dtype=torch.long, device=indices.device) |
|
indices_10 = indices_00 + torch.tensor([1, 0], dtype=torch.long, device=indices.device) |
|
indices_11 = indices_00 + torch.tensor([1, 1], dtype=torch.long, device=indices.device) |
|
|
|
h = indices[..., 0] - indices_00[..., 0].float() |
|
w = indices[..., 1] - indices_00[..., 1].float() |
|
w_00 = (1 - h) * (1 - w) |
|
w_01 = (1 - h) * w |
|
w_10 = h * (1 - w) |
|
w_11 = h * w |
|
|
|
result = torch.zeros(H, W, C, device=values.device, dtype=values.dtype) |
|
count = torch.zeros(H, W, 1, device=values.device, dtype=values.dtype) |
|
weights = torch.ones_like(values[..., :1]) |
|
|
|
result, count = scatter_add_nd_with_count( |
|
result, count, indices_00, values * w_00.unsqueeze(1), weights * w_00.unsqueeze(1) |
|
) |
|
result, count = scatter_add_nd_with_count( |
|
result, count, indices_01, values * w_01.unsqueeze(1), weights * w_01.unsqueeze(1) |
|
) |
|
result, count = scatter_add_nd_with_count( |
|
result, count, indices_10, values * w_10.unsqueeze(1), weights * w_10.unsqueeze(1) |
|
) |
|
result, count = scatter_add_nd_with_count( |
|
result, count, indices_11, values * w_11.unsqueeze(1), weights * w_11.unsqueeze(1) |
|
) |
|
|
|
if return_count: |
|
return result, count |
|
|
|
mask = count.squeeze(-1) > 0 |
|
result[mask] = result[mask] / count[mask].repeat(1, C) |
|
|
|
return result |
|
|
|
|
|
def mipmap_linear_grid_put_2d(H, W, coords, values, min_resolution=128, return_count=False): |
|
""" |
|
Place values on 2D grid using mipmap-based multiresolution interpolation to fill holes. |
|
|
|
Args: |
|
H: Grid height |
|
W: Grid width |
|
coords: Coordinate tensor [N, 2] with values in range [0, 1] |
|
values: Value tensor [N, C] to place on grid |
|
min_resolution: Minimum resolution for mipmap levels |
|
return_count: Whether to return count information |
|
|
|
Returns: |
|
2D grid tensor [H, W, C] with filled values, optionally with count tensor |
|
""" |
|
|
|
|
|
|
|
C = values.shape[-1] |
|
|
|
result = torch.zeros(H, W, C, device=values.device, dtype=values.dtype) |
|
count = torch.zeros(H, W, 1, device=values.device, dtype=values.dtype) |
|
|
|
cur_H, cur_W = H, W |
|
|
|
while min(cur_H, cur_W) > min_resolution: |
|
|
|
|
|
mask = count.squeeze(-1) == 0 |
|
if not mask.any(): |
|
break |
|
|
|
cur_result, cur_count = linear_grid_put_2d(cur_H, cur_W, coords, values, return_count=True) |
|
result[mask] = ( |
|
result[mask] |
|
+ F.interpolate( |
|
cur_result.permute(2, 0, 1).unsqueeze(0).contiguous(), (H, W), mode="bilinear", align_corners=False |
|
) |
|
.squeeze(0) |
|
.permute(1, 2, 0) |
|
.contiguous()[mask] |
|
) |
|
count[mask] = ( |
|
count[mask] |
|
+ F.interpolate(cur_count.view(1, 1, cur_H, cur_W), (H, W), mode="bilinear", align_corners=False).view( |
|
H, W, 1 |
|
)[mask] |
|
) |
|
cur_H //= 2 |
|
cur_W //= 2 |
|
|
|
if return_count: |
|
return result, count |
|
|
|
mask = count.squeeze(-1) > 0 |
|
result[mask] = result[mask] / count[mask].repeat(1, C) |
|
|
|
return result |
|
|
|
|
|
|
|
|
|
def _normalize_image_input(image: Union[np.ndarray, torch.Tensor, Image.Image]) -> Union[np.ndarray, torch.Tensor]: |
|
"""Normalize image input to consistent format.""" |
|
if isinstance(image, Image.Image): |
|
return np.array(image) / 255.0 |
|
elif isinstance(image, torch.Tensor): |
|
return image.cpu().numpy() if image.is_cuda else image |
|
return image |
|
|
|
|
|
def _convert_texture_format(tex: Union[np.ndarray, torch.Tensor, Image.Image], |
|
texture_size: Tuple[int, int], device: str, force_set: bool = False) -> torch.Tensor: |
|
"""Unified texture format conversion logic.""" |
|
if not force_set: |
|
if isinstance(tex, np.ndarray): |
|
tex = Image.fromarray((tex * 255).astype(np.uint8)) |
|
elif isinstance(tex, torch.Tensor): |
|
tex_np = tex.cpu().numpy() |
|
tex = Image.fromarray((tex_np * 255).astype(np.uint8)) |
|
|
|
tex = tex.resize(texture_size).convert("RGB") |
|
tex = np.array(tex) / 255.0 |
|
return torch.from_numpy(tex).to(device).float() |
|
else: |
|
if isinstance(tex, np.ndarray): |
|
tex = torch.from_numpy(tex) |
|
return tex.to(device).float() |
|
|
|
|
|
def _format_output(image: torch.Tensor, return_type: str) -> Union[torch.Tensor, np.ndarray, Image.Image]: |
|
"""Convert output to requested format.""" |
|
if return_type == ReturnType.NUMPY.value: |
|
return image.cpu().numpy() |
|
elif return_type == ReturnType.PIL.value: |
|
img_np = image.cpu().numpy() * 255 |
|
return Image.fromarray(img_np.astype(np.uint8)) |
|
return image |
|
|
|
|
|
def _ensure_resolution_format(resolution: Optional[Union[int, Tuple[int, int]]], |
|
default: Tuple[int, int]) -> Tuple[int, int]: |
|
"""Ensure resolution is in (height, width) format.""" |
|
if resolution is None: |
|
return default |
|
if isinstance(resolution, (int, float)): |
|
return (int(resolution), int(resolution)) |
|
return tuple(resolution) |
|
|
|
|
|
def _apply_background_mask(content: torch.Tensor, visible_mask: torch.Tensor, |
|
bg_color: List[float], device: str) -> torch.Tensor: |
|
"""Apply background color to masked regions.""" |
|
bg_tensor = torch.tensor(bg_color, dtype=torch.float32, device=device) |
|
return content * visible_mask + bg_tensor * (1 - visible_mask) |
|
|
|
|
|
class MeshRender: |
|
def __init__( |
|
self, |
|
camera_distance=1.45, |
|
camera_type="orth", |
|
default_resolution=1024, |
|
texture_size=1024, |
|
use_antialias=True, |
|
max_mip_level=None, |
|
filter_mode="linear-mipmap-linear", |
|
bake_mode="back_sample", |
|
raster_mode="cr", |
|
shader_type="face", |
|
use_opengl=False, |
|
device="cuda", |
|
): |
|
""" |
|
Initialize mesh renderer with configurable parameters. |
|
|
|
Args: |
|
camera_distance: Distance from camera to object center |
|
camera_type: Type of camera projection ("orth" or "perspective") |
|
default_resolution: Default rendering resolution |
|
texture_size: Size of texture maps |
|
use_antialias: Whether to use antialiasing |
|
max_mip_level: Maximum mipmap level for texture filtering |
|
filter_mode: Texture filtering mode |
|
bake_mode: Texture baking method ("back_sample", "linear", "mip-map") |
|
raster_mode: Rasterization backend ("cr" for custom rasterizer) |
|
shader_type: Shading type ("face" or "vertex") |
|
use_opengl: Whether to use OpenGL backend (deprecated) |
|
device: Computing device ("cuda" or "cpu") |
|
""" |
|
|
|
self.device = device |
|
|
|
self.set_default_render_resolution(default_resolution) |
|
self.set_default_texture_resolution(texture_size) |
|
|
|
self.camera_distance = camera_distance |
|
self.use_antialias = use_antialias |
|
self.max_mip_level = max_mip_level |
|
self.filter_mode = filter_mode |
|
self.bake_angle_thres = 75 |
|
self.set_boundary_unreliable_scale(2) |
|
self.bake_mode = bake_mode |
|
self.shader_type = shader_type |
|
|
|
self.raster_mode = raster_mode |
|
if self.raster_mode == "cr": |
|
import custom_rasterizer as cr |
|
|
|
self.raster = cr |
|
else: |
|
raise f"No raster named {self.raster_mode}" |
|
|
|
if camera_type == "orth": |
|
self.set_orth_scale(1.2) |
|
elif camera_type == "perspective": |
|
self.camera_proj_mat = get_perspective_projection_matrix( |
|
49.13, self.default_resolution[1] / self.default_resolution[0], 0.01, 100.0 |
|
) |
|
else: |
|
raise f"No camera type {camera_type}" |
|
|
|
|
|
|
|
def _create_view_state(self, config: RenderConfig) -> ViewState: |
|
"""Create unified view state for rendering pipeline.""" |
|
proj = self.camera_proj_mat |
|
r_mv = get_mv_matrix( |
|
elev=config.elev, |
|
azim=config.azim, |
|
camera_distance=self.camera_distance if config.camera_distance is None else config.camera_distance, |
|
center=config.center, |
|
) |
|
|
|
pos_camera = transform_pos(r_mv, self.vtx_pos, keepdim=True) |
|
pos_clip = transform_pos(proj, pos_camera) |
|
resolution = _ensure_resolution_format(config.resolution, self.default_resolution) |
|
|
|
return ViewState(proj, r_mv, pos_camera, pos_clip, resolution) |
|
|
|
def _compute_face_normals(self, triangles: torch.Tensor) -> torch.Tensor: |
|
"""Compute face normals from triangle vertices.""" |
|
return F.normalize( |
|
torch.cross( |
|
triangles[:, 1, :] - triangles[:, 0, :], |
|
triangles[:, 2, :] - triangles[:, 0, :], |
|
dim=-1, |
|
), |
|
dim=-1, |
|
) |
|
|
|
def _get_normals_for_shading(self, view_state: ViewState, use_abs_coor: bool = False) -> torch.Tensor: |
|
"""Get normals based on shader type and coordinate system.""" |
|
if use_abs_coor: |
|
mesh_triangles = self.vtx_pos[self.pos_idx[:, :3], :] |
|
else: |
|
pos_camera = view_state.pos_camera[:, :3] / view_state.pos_camera[:, 3:4] |
|
mesh_triangles = pos_camera[self.pos_idx[:, :3], :] |
|
|
|
face_normals = self._compute_face_normals(mesh_triangles) |
|
|
|
|
|
rast_out, _ = self.raster_rasterize(view_state.pos_clip, self.pos_idx, resolution=view_state.resolution) |
|
|
|
if self.shader_type == "vertex": |
|
vertex_normals = trimesh.geometry.mean_vertex_normals( |
|
vertex_count=self.vtx_pos.shape[0], |
|
faces=self.pos_idx.cpu(), |
|
face_normals=face_normals.cpu(), |
|
) |
|
vertex_normals = torch.from_numpy(vertex_normals).float().to(self.device).contiguous() |
|
normal, _ = self.raster_interpolate(vertex_normals[None, ...], rast_out, self.pos_idx) |
|
|
|
elif self.shader_type == "face": |
|
tri_ids = rast_out[..., 3] |
|
tri_ids_mask = tri_ids > 0 |
|
tri_ids = ((tri_ids - 1) * tri_ids_mask).long() |
|
normal = torch.zeros(rast_out.shape[0], rast_out.shape[1], rast_out.shape[2], 3).to(rast_out) |
|
normal.reshape(-1, 3)[tri_ids_mask.view(-1)] = face_normals.reshape(-1, 3)[tri_ids[tri_ids_mask].view(-1)] |
|
|
|
return normal, rast_out |
|
|
|
def _unified_render_pipeline(self, config: RenderConfig, mode: RenderMode, **kwargs) -> torch.Tensor: |
|
"""Unified rendering pipeline for all render modes.""" |
|
view_state = self._create_view_state(config) |
|
|
|
if mode == RenderMode.ALPHA: |
|
rast_out, _ = self.raster_rasterize(view_state.pos_clip, self.pos_idx, resolution=view_state.resolution) |
|
return rast_out[..., -1:].long() |
|
|
|
elif mode == RenderMode.UV_POS: |
|
return self.uv_feature_map(self.vtx_pos * 0.5 + 0.5) |
|
|
|
elif mode == RenderMode.NORMAL: |
|
use_abs_coor = kwargs.get('use_abs_coor', False) |
|
normalize_rgb = kwargs.get('normalize_rgb', True) |
|
|
|
normal, rast_out = self._get_normals_for_shading(view_state, use_abs_coor) |
|
visible_mask = torch.clamp(rast_out[..., -1:], 0, 1) |
|
|
|
result = _apply_background_mask(normal, visible_mask, config.bg_color, self.device) |
|
|
|
if normalize_rgb: |
|
result = (result + 1) * 0.5 |
|
|
|
if self.use_antialias: |
|
result = self.raster_antialias(result, rast_out, view_state.pos_clip, self.pos_idx) |
|
|
|
return result[0, ...] |
|
|
|
elif mode == RenderMode.POSITION: |
|
rast_out, _ = self.raster_rasterize(view_state.pos_clip, self.pos_idx, resolution=view_state.resolution) |
|
|
|
tex_position = 0.5 - self.vtx_pos[:, :3] / self.scale_factor |
|
tex_position = tex_position.contiguous() |
|
|
|
position, _ = self.raster_interpolate(tex_position[None, ...], rast_out, self.pos_idx) |
|
visible_mask = torch.clamp(rast_out[..., -1:], 0, 1) |
|
|
|
result = _apply_background_mask(position, visible_mask, config.bg_color, self.device) |
|
|
|
if self.use_antialias: |
|
result = self.raster_antialias(result, rast_out, view_state.pos_clip, self.pos_idx) |
|
|
|
return result[0, ...] |
|
|
|
def set_orth_scale(self, ortho_scale): |
|
""" |
|
Set the orthographic projection scale and update camera projection matrix. |
|
|
|
Args: |
|
ortho_scale: Scale factor for orthographic projection |
|
""" |
|
self.ortho_scale = ortho_scale |
|
self.camera_proj_mat = get_orthographic_projection_matrix( |
|
left=-self.ortho_scale * 0.5, |
|
right=self.ortho_scale * 0.5, |
|
bottom=-self.ortho_scale * 0.5, |
|
top=self.ortho_scale * 0.5, |
|
near=0.1, |
|
far=100, |
|
) |
|
|
|
def raster_rasterize(self, pos, tri, resolution, ranges=None, grad_db=True): |
|
""" |
|
Rasterize triangular mesh using the configured rasterization backend. |
|
|
|
Args: |
|
pos: Vertex positions in clip space |
|
tri: Triangle indices |
|
resolution: Rendering resolution [height, width] |
|
ranges: Optional rendering ranges (unused in current implementation) |
|
grad_db: Whether to compute gradients (unused in current implementation) |
|
|
|
Returns: |
|
Tuple of (rasterization_output, gradient_info) |
|
""" |
|
|
|
if self.raster_mode == "cr": |
|
rast_out_db = None |
|
if pos.dim() == 2: |
|
pos = pos.unsqueeze(0) |
|
|
|
|
|
if pos.dtype == torch.float64: |
|
pos = pos.to(torch.float32) |
|
|
|
|
|
if tri.dtype == torch.int64: |
|
tri = tri.to(torch.int32) |
|
|
|
findices, barycentric = self.raster.rasterize(pos, tri, resolution) |
|
rast_out = torch.cat((barycentric, findices.unsqueeze(-1)), dim=-1) |
|
rast_out = rast_out.unsqueeze(0) |
|
else: |
|
raise f"No raster named {self.raster_mode}" |
|
|
|
return rast_out, rast_out_db |
|
|
|
def raster_interpolate(self, uv, rast_out, uv_idx): |
|
""" |
|
Interpolate texture coordinates or vertex attributes across rasterized triangles. |
|
|
|
Args: |
|
uv: UV coordinates or vertex attributes to interpolate |
|
rast_out: Rasterization output containing barycentric coordinates |
|
uv_idx: UV or vertex indices for triangles |
|
|
|
Returns: |
|
Tuple of (interpolated_values, gradient_info) |
|
""" |
|
|
|
if self.raster_mode == "cr": |
|
textd = None |
|
barycentric = rast_out[0, ..., :-1] |
|
findices = rast_out[0, ..., -1] |
|
if uv.dim() == 2: |
|
uv = uv.unsqueeze(0) |
|
textc = self.raster.interpolate(uv, findices, barycentric, uv_idx) |
|
else: |
|
raise f"No raster named {self.raster_mode}" |
|
|
|
return textc, textd |
|
|
|
def raster_antialias(self, color, rast, pos, tri, topology_hash=None, pos_gradient_boost=1.0): |
|
""" |
|
Apply antialiasing to rendered colors (currently returns input unchanged). |
|
|
|
Args: |
|
color: Input color values |
|
rast: Rasterization output |
|
pos: Vertex positions |
|
tri: Triangle indices |
|
topology_hash: Optional topology hash for optimization |
|
pos_gradient_boost: Gradient boosting factor |
|
|
|
Returns: |
|
Antialiased color values |
|
""" |
|
|
|
if self.raster_mode == "cr": |
|
color = color |
|
else: |
|
raise f"No raster named {self.raster_mode}" |
|
|
|
return color |
|
|
|
def set_boundary_unreliable_scale(self, scale): |
|
""" |
|
Set the kernel size for boundary unreliable region detection during texture baking. |
|
|
|
Args: |
|
scale: Scale factor relative to 512 resolution baseline |
|
""" |
|
self.bake_unreliable_kernel_size = int( |
|
(scale / 512) * max(self.default_resolution[0], self.default_resolution[1]) |
|
) |
|
|
|
def load_mesh( |
|
self, |
|
mesh, |
|
scale_factor=1.15, |
|
auto_center=True, |
|
): |
|
""" |
|
Load mesh from file and set up rendering data structures. |
|
|
|
Args: |
|
mesh: Path to mesh file or mesh object |
|
scale_factor: Scaling factor for mesh normalization |
|
auto_center: Whether to automatically center the mesh |
|
""" |
|
vtx_pos, pos_idx, vtx_uv, uv_idx, texture_data = load_mesh(mesh) |
|
self.set_mesh( |
|
vtx_pos, pos_idx, vtx_uv=vtx_uv, uv_idx=uv_idx, scale_factor=scale_factor, auto_center=auto_center |
|
) |
|
if texture_data is not None: |
|
self.set_texture(texture_data) |
|
|
|
def save_mesh(self, mesh_path, downsample=False): |
|
""" |
|
Save current mesh with textures to file. |
|
|
|
Args: |
|
mesh_path: Output file path |
|
downsample: Whether to downsample textures by half |
|
""" |
|
|
|
vtx_pos, pos_idx, vtx_uv, uv_idx = self.get_mesh(normalize=False) |
|
texture_data = self.get_texture() |
|
texture_metallic, texture_roughness = self.get_texture_mr() |
|
texture_normal = self.get_texture_normal() |
|
if downsample: |
|
texture_data = cv2.resize(texture_data, (texture_data.shape[1] // 2, texture_data.shape[0] // 2)) |
|
if texture_metallic is not None: |
|
texture_metallic = cv2.resize( |
|
texture_metallic, (texture_metallic.shape[1] // 2, texture_metallic.shape[0] // 2) |
|
) |
|
if texture_roughness is not None: |
|
texture_roughness = cv2.resize( |
|
texture_roughness, (texture_roughness.shape[1] // 2, texture_roughness.shape[0] // 2) |
|
) |
|
if texture_normal is not None: |
|
texture_normal = cv2.resize( |
|
texture_normal, (texture_normal.shape[1] // 2, texture_normal.shape[0] // 2) |
|
) |
|
|
|
save_mesh( |
|
mesh_path, |
|
vtx_pos, |
|
pos_idx, |
|
vtx_uv, |
|
uv_idx, |
|
texture_data, |
|
metallic=texture_metallic, |
|
roughness=texture_roughness, |
|
normal=texture_normal, |
|
) |
|
|
|
def set_mesh(self, vtx_pos, pos_idx, vtx_uv=None, uv_idx=None, scale_factor=1.15, auto_center=True): |
|
""" |
|
Set mesh geometry data and perform coordinate transformations. |
|
|
|
Args: |
|
vtx_pos: Vertex positions [N, 3] |
|
pos_idx: Triangle vertex indices [F, 3] |
|
vtx_uv: UV coordinates [N, 2], optional |
|
uv_idx: Triangle UV indices [F, 3], optional |
|
scale_factor: Scaling factor for mesh normalization |
|
auto_center: Whether to automatically center and scale the mesh |
|
""" |
|
self.vtx_pos = torch.from_numpy(vtx_pos).to(self.device) |
|
self.pos_idx = torch.from_numpy(pos_idx).to(self.device) |
|
|
|
|
|
if self.vtx_pos.dtype == torch.float64: |
|
self.vtx_pos = self.vtx_pos.to(torch.float32) |
|
|
|
|
|
if self.pos_idx.dtype == torch.int64: |
|
self.pos_idx = self.pos_idx.to(torch.int32) |
|
|
|
if (vtx_uv is not None) and (uv_idx is not None): |
|
self.vtx_uv = torch.from_numpy(vtx_uv).to(self.device) |
|
self.uv_idx = torch.from_numpy(uv_idx).to(self.device) |
|
|
|
|
|
if self.vtx_uv.dtype == torch.float64: |
|
self.vtx_uv = self.vtx_uv.to(torch.float32) |
|
|
|
|
|
if self.uv_idx.dtype == torch.int64: |
|
self.uv_idx = self.uv_idx.to(torch.int32) |
|
else: |
|
self.vtx_uv = None |
|
self.uv_idx = None |
|
|
|
self.vtx_pos[:, [0, 1]] = -self.vtx_pos[:, [0, 1]] |
|
self.vtx_pos[:, [1, 2]] = self.vtx_pos[:, [2, 1]] |
|
if (vtx_uv is not None) and (uv_idx is not None): |
|
self.vtx_uv[:, 1] = 1.0 - self.vtx_uv[:, 1] |
|
pass |
|
|
|
if auto_center: |
|
max_bb = (self.vtx_pos - 0).max(0)[0] |
|
min_bb = (self.vtx_pos - 0).min(0)[0] |
|
center = (max_bb + min_bb) / 2 |
|
scale = torch.norm(self.vtx_pos - center, dim=1).max() * 2.0 |
|
self.vtx_pos = (self.vtx_pos - center) * (scale_factor / float(scale)) |
|
self.scale_factor = scale_factor |
|
self.mesh_normalize_scale_factor = scale_factor / float(scale) |
|
self.mesh_normalize_scale_center = center.unsqueeze(0).cpu().numpy() |
|
else: |
|
self.scale_factor = 1.0 |
|
self.mesh_normalize_scale_factor = 1.0 |
|
self.mesh_normalize_scale_center = np.array([[0, 0, 0]]) |
|
|
|
if uv_idx is not None: |
|
self.extract_textiles() |
|
|
|
def _set_texture_unified(self, tex: Union[np.ndarray, torch.Tensor, Image.Image], |
|
texture_type: TextureType, force_set: bool = False): |
|
"""Unified texture setting method.""" |
|
converted_tex = _convert_texture_format(tex, self.texture_size, self.device, force_set) |
|
|
|
if texture_type == TextureType.DIFFUSE: |
|
self.tex = converted_tex |
|
elif texture_type == TextureType.METALLIC_ROUGHNESS: |
|
self.tex_mr = converted_tex |
|
elif texture_type == TextureType.NORMAL: |
|
self.tex_normalMap = converted_tex |
|
|
|
def set_texture(self, tex, force_set=False): |
|
"""Set the main diffuse texture for the mesh.""" |
|
self._set_texture_unified(tex, TextureType.DIFFUSE, force_set) |
|
|
|
def set_texture_mr(self, mr, force_set=False): |
|
"""Set metallic-roughness texture for PBR rendering.""" |
|
self._set_texture_unified(mr, TextureType.METALLIC_ROUGHNESS, force_set) |
|
|
|
def set_texture_normal(self, normal, force_set=False): |
|
"""Set normal map texture for surface detail.""" |
|
self._set_texture_unified(normal, TextureType.NORMAL, force_set) |
|
|
|
def set_default_render_resolution(self, default_resolution): |
|
""" |
|
Set the default resolution for rendering operations. |
|
|
|
Args: |
|
default_resolution: Resolution as int (square) or tuple (height, width) |
|
""" |
|
if isinstance(default_resolution, int): |
|
default_resolution = (default_resolution, default_resolution) |
|
self.default_resolution = default_resolution |
|
|
|
def set_default_texture_resolution(self, texture_size): |
|
""" |
|
Set the default texture resolution for UV mapping operations. |
|
|
|
Args: |
|
texture_size: Texture size as int (square) or tuple (height, width) |
|
""" |
|
if isinstance(texture_size, int): |
|
texture_size = (texture_size, texture_size) |
|
self.texture_size = texture_size |
|
|
|
def get_face_num(self): |
|
""" |
|
Get the number of triangular faces in the mesh. |
|
|
|
Returns: |
|
Number of faces as integer |
|
""" |
|
return self.pos_idx.shape[0] |
|
|
|
def get_vertex_num(self): |
|
""" |
|
Get the number of vertices in the mesh. |
|
|
|
Returns: |
|
Number of vertices as integer |
|
""" |
|
return self.vtx_pos.shape[0] |
|
|
|
def get_face_areas(self, from_one_index=False): |
|
""" |
|
Calculate the area of each triangular face in the mesh. |
|
|
|
Args: |
|
from_one_index: If True, insert zero at beginning for 1-indexed face IDs |
|
|
|
Returns: |
|
Numpy array of face areas |
|
""" |
|
v0 = self.vtx_pos[self.pos_idx[:, 0], :] |
|
v1 = self.vtx_pos[self.pos_idx[:, 1], :] |
|
v2 = self.vtx_pos[self.pos_idx[:, 2], :] |
|
|
|
|
|
edge1 = v1 - v0 |
|
edge2 = v2 - v0 |
|
|
|
|
|
areas = torch.norm(torch.cross(edge1, edge2, dim=-1), dim=-1) * 0.5 |
|
|
|
areas = areas.cpu().numpy() |
|
|
|
if from_one_index: |
|
|
|
areas = np.insert(areas, 0, 0) |
|
|
|
return areas |
|
|
|
def get_mesh(self, normalize=True): |
|
""" |
|
Get mesh geometry with optional coordinate denormalization. |
|
|
|
Args: |
|
normalize: Whether to keep normalized coordinates (True) or restore original scale (False) |
|
|
|
Returns: |
|
Tuple of (vertex_positions, face_indices, uv_coordinates, uv_indices) |
|
""" |
|
vtx_pos = self.vtx_pos.cpu().numpy() |
|
pos_idx = self.pos_idx.cpu().numpy() |
|
vtx_uv = self.vtx_uv.cpu().numpy() |
|
uv_idx = self.uv_idx.cpu().numpy() |
|
|
|
|
|
if not normalize: |
|
vtx_pos = vtx_pos / self.mesh_normalize_scale_factor |
|
vtx_pos = vtx_pos + self.mesh_normalize_scale_center |
|
vtx_pos[:, [1, 2]] = vtx_pos[:, [2, 1]] |
|
vtx_pos[:, [0, 1]] = -vtx_pos[:, [0, 1]] |
|
|
|
vtx_uv[:, 1] = 1.0 - vtx_uv[:, 1] |
|
return vtx_pos, pos_idx, vtx_uv, uv_idx |
|
|
|
def get_texture(self): |
|
""" |
|
Get the current diffuse texture as numpy array. |
|
|
|
Returns: |
|
Texture as numpy array in range [0, 1] |
|
""" |
|
return self.tex.cpu().numpy() |
|
|
|
def get_texture_mr(self): |
|
""" |
|
Get metallic and roughness textures as separate channels. |
|
|
|
Returns: |
|
Tuple of (metallic_texture, roughness_texture) as numpy arrays, or (None, None) if not set |
|
""" |
|
metallic, roughness = None, None |
|
if hasattr(self, "tex_mr"): |
|
mr = self.tex_mr.cpu().numpy() |
|
metallic = np.repeat(mr[:, :, 0:1], repeats=3, axis=2) |
|
roughness = np.repeat(mr[:, :, 1:2], repeats=3, axis=2) |
|
return metallic, roughness |
|
|
|
def get_texture_normal(self): |
|
""" |
|
Get the normal map texture as numpy array. |
|
|
|
Returns: |
|
Normal map as numpy array, or None if not set |
|
""" |
|
normal = None |
|
if hasattr(self, "tex_normalMap"): |
|
normal = self.tex_normalMap.cpu().numpy() |
|
return normal |
|
|
|
def to(self, device): |
|
""" |
|
Move all tensor attributes to the specified device. |
|
|
|
Args: |
|
device: Target device ("cuda", "cpu", etc.) |
|
""" |
|
self.device = device |
|
|
|
for attr_name in dir(self): |
|
attr_value = getattr(self, attr_name) |
|
if isinstance(attr_value, torch.Tensor): |
|
setattr(self, attr_name, attr_value.to(self.device)) |
|
|
|
def color_rgb_to_srgb(self, image): |
|
""" |
|
Convert RGB color values to sRGB color space using gamma correction. |
|
|
|
Args: |
|
image: Input image as PIL Image, numpy array, or torch tensor |
|
|
|
Returns: |
|
sRGB corrected image in same format as input |
|
""" |
|
if isinstance(image, Image.Image): |
|
image_rgb = torch.tesnor(np.array(image) / 255.0).float().to(self.device) |
|
elif isinstance(image, np.ndarray): |
|
image_rgb = torch.tensor(image).float() |
|
else: |
|
image_rgb = image.to(self.device) |
|
|
|
image_srgb = torch.where( |
|
image_rgb <= 0.0031308, 12.92 * image_rgb, 1.055 * torch.pow(image_rgb, 1 / 2.4) - 0.055 |
|
) |
|
|
|
if isinstance(image, Image.Image): |
|
image_srgb = Image.fromarray((image_srgb.cpu().numpy() * 255).astype(np.uint8)) |
|
elif isinstance(image, np.ndarray): |
|
image_srgb = image_srgb.cpu().numpy() |
|
else: |
|
image_srgb = image_srgb.to(image.device) |
|
|
|
return image_srgb |
|
|
|
def extract_textiles(self): |
|
""" |
|
Extract texture-space position and normal information by rasterizing |
|
the mesh in UV coordinate space. Creates texture-space geometry mappings. |
|
""" |
|
|
|
vnum = self.vtx_uv.shape[0] |
|
vtx_uv = torch.cat( |
|
(self.vtx_uv, torch.zeros_like(self.vtx_uv[:, 0:1]), torch.ones_like(self.vtx_uv[:, 0:1])), axis=1 |
|
) |
|
vtx_uv = vtx_uv.view(1, vnum, 4) * 2 - 1 |
|
|
|
rast_out, rast_out_db = self.raster_rasterize(vtx_uv, self.uv_idx, resolution=self.texture_size) |
|
position, _ = self.raster_interpolate(self.vtx_pos, rast_out, self.pos_idx) |
|
|
|
v0 = self.vtx_pos[self.pos_idx[:, 0], :] |
|
v1 = self.vtx_pos[self.pos_idx[:, 1], :] |
|
v2 = self.vtx_pos[self.pos_idx[:, 2], :] |
|
face_normals = F.normalize(torch.cross(v1 - v0, v2 - v0, dim=-1), dim=-1) |
|
vertex_normals = trimesh.geometry.mean_vertex_normals( |
|
vertex_count=self.vtx_pos.shape[0], |
|
faces=self.pos_idx.cpu(), |
|
face_normals=face_normals.cpu(), |
|
) |
|
vertex_normals = torch.from_numpy(vertex_normals).to(self.vtx_pos).contiguous() |
|
position_normal, _ = self.raster_interpolate(vertex_normals[None, ...], rast_out, self.pos_idx) |
|
visible_mask = torch.clamp(rast_out[..., -1:], 0, 1)[0, ..., 0] |
|
position = position[0] |
|
position_normal = position_normal[0] |
|
tri_ids = rast_out[0, ..., 3] |
|
tri_ids_mask = tri_ids > 0 |
|
tri_ids = ((tri_ids - 1) * tri_ids_mask).long() |
|
position_normal.reshape(-1, 3)[tri_ids_mask.view(-1)] = face_normals.reshape(-1, 3)[ |
|
tri_ids[tri_ids_mask].view(-1) |
|
] |
|
|
|
row = torch.arange(position.shape[0]).to(visible_mask.device) |
|
col = torch.arange(position.shape[1]).to(visible_mask.device) |
|
grid_i, grid_j = torch.meshgrid(row, col, indexing="ij") |
|
|
|
mask = visible_mask.reshape(-1) > 0 |
|
position = position.reshape(-1, 3)[mask] |
|
position_normal = position_normal.reshape(-1, 3)[mask] |
|
position = torch.cat((position, torch.ones_like(position[:, :1])), axis=-1) |
|
grid = torch.stack((grid_i, grid_j), -1).reshape(-1, 2)[mask] |
|
|
|
texture_indices = ( |
|
torch.ones(self.texture_size[0], self.texture_size[1], device=self.device, dtype=torch.long) * -1 |
|
) |
|
texture_indices.view(-1)[grid[:, 0] * self.texture_size[1] + grid[:, 1]] = torch.arange(grid.shape[0]).to( |
|
device=self.device, dtype=torch.long |
|
) |
|
|
|
self.tex_position = position |
|
self.tex_normal = position_normal |
|
self.tex_grid = grid |
|
self.texture_indices = texture_indices |
|
|
|
def render_normal(self, elev, azim, camera_distance=None, center=None, resolution=None, |
|
bg_color=[1, 1, 1], use_abs_coor=False, normalize_rgb=True, return_type="th"): |
|
"""Render surface normals of the mesh from specified viewpoint.""" |
|
config = RenderConfig(elev, azim, camera_distance, center, resolution, bg_color, return_type) |
|
image = self._unified_render_pipeline(config, RenderMode.NORMAL, |
|
use_abs_coor=use_abs_coor, normalize_rgb=normalize_rgb) |
|
return _format_output(image, return_type) |
|
|
|
def convert_normal_map(self, image): |
|
""" |
|
Convert normal map from standard format to renderer's coordinate system. |
|
Applies coordinate transformations for proper normal interpretation. |
|
|
|
Args: |
|
image: Input normal map as PIL Image or numpy array |
|
|
|
Returns: |
|
Converted normal map as PIL Image |
|
""" |
|
|
|
if isinstance(image, Image.Image): |
|
image = np.array(image) |
|
mask = (image == [255, 255, 255]).all(axis=-1) |
|
|
|
image = (image / 255.0) * 2.0 - 1.0 |
|
|
|
image[..., [1]] = -image[..., [1]] |
|
image[..., [1, 2]] = image[..., [2, 1]] |
|
image[..., [0]] = -image[..., [0]] |
|
|
|
image = (image + 1.0) * 0.5 |
|
|
|
image = (image * 255).astype(np.uint8) |
|
image[mask] = [127, 127, 255] |
|
|
|
return Image.fromarray(image) |
|
|
|
def render_position(self, elev, azim, camera_distance=None, center=None, resolution=None, |
|
bg_color=[1, 1, 1], return_type="th"): |
|
"""Render world-space positions of visible mesh surface points.""" |
|
config = RenderConfig(elev, azim, camera_distance, center, resolution, bg_color, return_type) |
|
image = self._unified_render_pipeline(config, RenderMode.POSITION) |
|
|
|
if return_type == ReturnType.PIL.value: |
|
image = image.squeeze(-1).cpu().numpy() * 255 |
|
return Image.fromarray(image.astype(np.uint8)) |
|
return _format_output(image, return_type) |
|
|
|
def render_uvpos(self, return_type="th"): |
|
"""Render vertex positions mapped to UV texture space.""" |
|
config = RenderConfig(return_type=return_type) |
|
image = self._unified_render_pipeline(config, RenderMode.UV_POS) |
|
return _format_output(image, return_type) |
|
|
|
def render_alpha(self, elev, azim, camera_distance=None, center=None, resolution=None, return_type="th"): |
|
"""Render binary alpha mask indicating visible mesh regions.""" |
|
config = RenderConfig(elev, azim, camera_distance, center, resolution, return_type=return_type) |
|
image = self._unified_render_pipeline(config, RenderMode.ALPHA) |
|
|
|
if return_type == ReturnType.PIL.value: |
|
raise Exception("PIL format not supported for alpha rendering") |
|
return _format_output(image, return_type) |
|
|
|
def uv_feature_map(self, vert_feat, bg=None): |
|
""" |
|
Map per-vertex features to UV texture space using mesh topology. |
|
|
|
Args: |
|
vert_feat: Per-vertex feature tensor [N, C] |
|
bg: Background value for unmapped regions (optional) |
|
|
|
Returns: |
|
Feature map in UV texture space [H, W, C] |
|
""" |
|
vtx_uv = self.vtx_uv * 2 - 1.0 |
|
vtx_uv = torch.cat([vtx_uv, torch.zeros_like(self.vtx_uv)], dim=1).unsqueeze(0) |
|
vtx_uv[..., -1] = 1 |
|
uv_idx = self.uv_idx |
|
rast_out, rast_out_db = self.raster_rasterize(vtx_uv, uv_idx, resolution=self.texture_size) |
|
feat_map, _ = self.raster_interpolate(vert_feat[None, ...], rast_out, uv_idx) |
|
feat_map = feat_map[0, ...] |
|
if bg is not None: |
|
visible_mask = torch.clamp(rast_out[..., -1:], 0, 1)[0, ...] |
|
feat_map[visible_mask == 0] = bg |
|
return feat_map |
|
|
|
def render_sketch_from_geometry(self, normal_image, depth_image): |
|
""" |
|
Generate sketch-style edge image from rendered normal and depth maps. |
|
|
|
Args: |
|
normal_image: Rendered normal map tensor |
|
depth_image: Rendered depth map tensor |
|
|
|
Returns: |
|
Binary edge sketch image as tensor |
|
""" |
|
normal_image_np = normal_image.cpu().numpy() |
|
depth_image_np = depth_image.cpu().numpy() |
|
|
|
normal_image_np = (normal_image_np * 255).astype(np.uint8) |
|
depth_image_np = (depth_image_np * 255).astype(np.uint8) |
|
normal_image_np = cv2.cvtColor(normal_image_np, cv2.COLOR_RGB2GRAY) |
|
|
|
normal_edges = cv2.Canny(normal_image_np, 80, 150) |
|
depth_edges = cv2.Canny(depth_image_np, 30, 80) |
|
|
|
combined_edges = np.maximum(normal_edges, depth_edges) |
|
|
|
sketch_image = torch.from_numpy(combined_edges).to(normal_image.device).float() / 255.0 |
|
sketch_image = sketch_image.unsqueeze(-1) |
|
|
|
return sketch_image |
|
|
|
def render_sketch_from_depth(self, depth_image): |
|
""" |
|
Generate sketch-style edge image from depth map using edge detection. |
|
|
|
Args: |
|
depth_image: Input depth map tensor |
|
|
|
Returns: |
|
Binary edge sketch image as tensor |
|
""" |
|
depth_image_np = depth_image.cpu().numpy() |
|
depth_image_np = (depth_image_np * 255).astype(np.uint8) |
|
depth_edges = cv2.Canny(depth_image_np, 30, 80) |
|
combined_edges = depth_edges |
|
sketch_image = torch.from_numpy(combined_edges).to(depth_image.device).float() / 255.0 |
|
sketch_image = sketch_image.unsqueeze(-1) |
|
return sketch_image |
|
|
|
def back_project(self, image, elev, azim, camera_distance=None, center=None, method=None): |
|
""" |
|
Back-project a rendered image onto the mesh's UV texture space. |
|
Handles visibility, viewing angle, and boundary detection for texture baking. |
|
|
|
Args: |
|
image: Input image to back-project (PIL Image, numpy array, or tensor) |
|
elev: Camera elevation angle in degrees used for rendering |
|
azim: Camera azimuth angle in degrees used for rendering |
|
camera_distance: Camera distance (uses default if None) |
|
center: Camera focus center (uses origin if None) |
|
method: Back-projection method ("linear", "mip-map", "back_sample", uses default if None) |
|
|
|
Returns: |
|
Tuple of (texture, cosine_map, boundary_map) tensors in UV space |
|
""" |
|
|
|
if isinstance(image, Image.Image): |
|
image = torch.tensor(np.array(image) / 255.0) |
|
elif isinstance(image, np.ndarray): |
|
image = torch.tensor(image) |
|
if image.dim() == 2: |
|
image = image.unsqueeze(-1) |
|
image = image.float().to(self.device) |
|
resolution = image.shape[:2] |
|
channel = image.shape[-1] |
|
texture = torch.zeros(self.texture_size + (channel,)).to(self.device) |
|
cos_map = torch.zeros(self.texture_size + (1,)).to(self.device) |
|
|
|
proj = self.camera_proj_mat |
|
r_mv = get_mv_matrix( |
|
elev=elev, |
|
azim=azim, |
|
camera_distance=self.camera_distance if camera_distance is None else camera_distance, |
|
center=center, |
|
) |
|
pos_camera = transform_pos(r_mv, self.vtx_pos, keepdim=True) |
|
pos_clip = transform_pos(proj, pos_camera) |
|
pos_camera = pos_camera[:, :3] / pos_camera[:, 3:4] |
|
|
|
v0 = pos_camera[self.pos_idx[:, 0], :] |
|
v1 = pos_camera[self.pos_idx[:, 1], :] |
|
v2 = pos_camera[self.pos_idx[:, 2], :] |
|
face_normals = F.normalize(torch.cross(v1 - v0, v2 - v0, dim=-1), dim=-1) |
|
|
|
tex_depth = pos_camera[:, 2].reshape(1, -1, 1).contiguous() |
|
rast_out, rast_out_db = self.raster_rasterize(pos_clip, self.pos_idx, resolution=resolution) |
|
visible_mask = torch.clamp(rast_out[..., -1:], 0, 1)[0, ...] |
|
|
|
if self.shader_type == "vertex": |
|
vertex_normals = trimesh.geometry.mean_vertex_normals( |
|
vertex_count=self.vtx_pos.shape[0], |
|
faces=self.pos_idx.cpu(), |
|
face_normals=face_normals.cpu(), |
|
) |
|
vertex_normals = torch.from_numpy(vertex_normals).float().to(self.device).contiguous() |
|
normal, _ = self.raster_interpolate(vertex_normals[None, ...], rast_out, self.pos_idx) |
|
elif self.shader_type == "face": |
|
tri_ids = rast_out[..., 3] |
|
tri_ids_mask = tri_ids > 0 |
|
tri_ids = ((tri_ids - 1) * tri_ids_mask).long() |
|
normal = torch.zeros(rast_out.shape[0], rast_out.shape[1], rast_out.shape[2], 3).to(rast_out) |
|
normal.reshape(-1, 3)[tri_ids_mask.view(-1)] = face_normals.reshape(-1, 3)[tri_ids[tri_ids_mask].view(-1)] |
|
|
|
normal = normal[0, ...] |
|
uv, _ = self.raster_interpolate(self.vtx_uv[None, ...], rast_out, self.uv_idx) |
|
depth, _ = self.raster_interpolate(tex_depth, rast_out, self.pos_idx) |
|
depth = depth[0, ...] |
|
|
|
depth_max, depth_min = depth[visible_mask > 0].max(), depth[visible_mask > 0].min() |
|
depth_normalized = (depth - depth_min) / (depth_max - depth_min) |
|
depth_image = depth_normalized * visible_mask |
|
|
|
sketch_image = self.render_sketch_from_depth(depth_image) |
|
|
|
lookat = torch.tensor([[0, 0, -1]], device=self.device) |
|
cos_image = torch.nn.functional.cosine_similarity(lookat, normal.view(-1, 3)) |
|
cos_image = cos_image.view(normal.shape[0], normal.shape[1], 1) |
|
|
|
cos_thres = np.cos(self.bake_angle_thres / 180 * np.pi) |
|
cos_image[cos_image < cos_thres] = 0 |
|
|
|
|
|
if self.bake_unreliable_kernel_size > 0: |
|
kernel_size = self.bake_unreliable_kernel_size * 2 + 1 |
|
kernel = torch.ones((1, 1, kernel_size, kernel_size), dtype=torch.float32).to(sketch_image.device) |
|
|
|
visible_mask = visible_mask.permute(2, 0, 1).unsqueeze(0).float() |
|
visible_mask = F.conv2d(1.0 - visible_mask, kernel, padding=kernel_size // 2) |
|
visible_mask = 1.0 - (visible_mask > 0).float() |
|
visible_mask = visible_mask.squeeze(0).permute(1, 2, 0) |
|
|
|
sketch_image = sketch_image.permute(2, 0, 1).unsqueeze(0) |
|
sketch_image = F.conv2d(sketch_image, kernel, padding=kernel_size // 2) |
|
sketch_image = (sketch_image > 0).float() |
|
sketch_image = sketch_image.squeeze(0).permute(1, 2, 0) |
|
visible_mask = visible_mask * (sketch_image < 0.5) |
|
|
|
cos_image[visible_mask == 0] = 0 |
|
|
|
method = self.bake_mode if method is None else method |
|
|
|
if method == "linear": |
|
proj_mask = (visible_mask != 0).view(-1) |
|
uv = uv.squeeze(0).contiguous().view(-1, 2)[proj_mask] |
|
image = image.squeeze(0).contiguous().view(-1, channel)[proj_mask] |
|
cos_image = cos_image.contiguous().view(-1, 1)[proj_mask] |
|
sketch_image = sketch_image.contiguous().view(-1, 1)[proj_mask] |
|
|
|
texture = linear_grid_put_2d(self.texture_size[1], self.texture_size[0], uv[..., [1, 0]], image) |
|
cos_map = linear_grid_put_2d(self.texture_size[1], self.texture_size[0], uv[..., [1, 0]], cos_image) |
|
boundary_map = linear_grid_put_2d(self.texture_size[1], self.texture_size[0], uv[..., [1, 0]], sketch_image) |
|
elif method == "mip-map": |
|
proj_mask = (visible_mask != 0).view(-1) |
|
uv = uv.squeeze(0).contiguous().view(-1, 2)[proj_mask] |
|
image = image.squeeze(0).contiguous().view(-1, channel)[proj_mask] |
|
cos_image = cos_image.contiguous().view(-1, 1)[proj_mask] |
|
|
|
texture = mipmap_linear_grid_put_2d( |
|
self.texture_size[1], self.texture_size[0], uv[..., [1, 0]], image, min_resolution=128 |
|
) |
|
cos_map = mipmap_linear_grid_put_2d( |
|
self.texture_size[1], self.texture_size[0], uv[..., [1, 0]], cos_image, min_resolution=256 |
|
) |
|
|
|
if self.vtx_map is not None: |
|
vertex_normals = vertex_normals[self.vtx_map, :] |
|
normal_map = self.uv_feature_map(vertex_normals) |
|
cos_map_uv = torch.nn.functional.cosine_similarity(lookat, normal_map.view(-1, 3)) |
|
cos_map_uv = cos_map_uv.view(1, 1, normal_map.shape[0], normal_map.shape[1]) |
|
cos_map_uv = torch.nn.functional.max_pool2d(cos_map_uv, kernel_size=3, stride=1, padding=1) |
|
cos_map_uv = cos_map_uv.reshape(self.texture_size[0], self.texture_size[1], 1) |
|
cos_map_uv[cos_map_uv < cos_thres] = 0 |
|
|
|
cos_map[cos_map_uv < cos_thres] = 0 |
|
elif method == "back_sample": |
|
|
|
img_proj = torch.from_numpy( |
|
np.array(((proj[0, 0], 0, 0, 0), (0, proj[1, 1], 0, 0), (0, 0, 1, 0), (0, 0, 0, 1))) |
|
).to(self.tex_position) |
|
w2c = torch.from_numpy(r_mv).to(self.tex_position) |
|
v_proj = self.tex_position @ w2c.T @ img_proj |
|
inner_mask = (v_proj[:, 0] <= 1.0) & (v_proj[:, 0] >= -1.0) & (v_proj[:, 1] <= 1.0) & (v_proj[:, 1] >= -1.0) |
|
inner_valid_idx = torch.where(inner_mask)[0].long() |
|
img_x = torch.clamp( |
|
((v_proj[:, 0].clamp(-1, 1) * 0.5 + 0.5) * (resolution[0])).long(), 0, resolution[0] - 1 |
|
) |
|
img_y = torch.clamp( |
|
((v_proj[:, 1].clamp(-1, 1) * 0.5 + 0.5) * (resolution[1])).long(), 0, resolution[1] - 1 |
|
) |
|
|
|
indices = img_y * resolution[0] + img_x |
|
sampled_z = depth.reshape(-1)[indices] |
|
sampled_m = visible_mask.reshape(-1)[indices] |
|
v_z = v_proj[:, 2] |
|
|
|
sampled_w = cos_image.reshape(-1)[indices] |
|
depth_thres = 3e-3 |
|
|
|
|
|
valid_idx = torch.where((torch.abs(v_z - sampled_z) < depth_thres) & (sampled_m * sampled_w > 0))[0] |
|
|
|
intersection_mask = torch.isin(valid_idx, inner_valid_idx) |
|
valid_idx = valid_idx[intersection_mask].to(inner_valid_idx) |
|
|
|
indices = indices[valid_idx] |
|
sampled_b = sketch_image.reshape(-1)[indices] |
|
sampled_w = sampled_w[valid_idx] |
|
|
|
|
|
wx = ((v_proj[:, 0] * 0.5 + 0.5) * resolution[0] - img_x)[valid_idx].reshape(-1, 1) |
|
wy = ((v_proj[:, 1] * 0.5 + 0.5) * resolution[1] - img_y)[valid_idx].reshape(-1, 1) |
|
img_x = img_x[valid_idx] |
|
img_y = img_y[valid_idx] |
|
img_x_r = torch.clamp(img_x + 1, 0, resolution[0] - 1) |
|
img_y_r = torch.clamp(img_y + 1, 0, resolution[1] - 1) |
|
indices_lr = img_y * resolution[0] + img_x_r |
|
indices_rl = img_y_r * resolution[0] + img_x |
|
indices_rr = img_y_r * resolution[0] + img_x_r |
|
rgb = image.reshape(-1, channel) |
|
sampled_rgb = (rgb[indices] * (1 - wx) + rgb[indices_lr] * wx) * (1 - wy) + ( |
|
rgb[indices_rl] * (1 - wx) + rgb[indices_rr] * wx |
|
) * wy |
|
|
|
|
|
texture = torch.zeros(self.texture_size[0], self.texture_size[1], channel, device=self.device).reshape( |
|
-1, channel |
|
) |
|
cos_map = torch.zeros(self.texture_size[0], self.texture_size[1], 1, device=self.device).reshape(-1) |
|
boundary_map = torch.zeros(self.texture_size[0], self.texture_size[1], 1, device=self.device).reshape(-1) |
|
|
|
valid_tex_indices = self.tex_grid[valid_idx, 0] * self.texture_size[1] + self.tex_grid[valid_idx, 1] |
|
texture[valid_tex_indices, :] = sampled_rgb |
|
cos_map[valid_tex_indices] = sampled_w |
|
boundary_map[valid_tex_indices] = sampled_b |
|
|
|
texture = texture.view(self.texture_size[0], self.texture_size[1], channel) |
|
cos_map = cos_map.view(self.texture_size[0], self.texture_size[1], 1) |
|
|
|
|
|
else: |
|
raise f"No bake mode {method}" |
|
return texture, cos_map, boundary_map |
|
|
|
def bake_texture(self, colors, elevs, azims, camera_distance=None, center=None, exp=6, weights=None): |
|
""" |
|
Bake multiple view images into a single UV texture using weighted blending. |
|
|
|
Args: |
|
colors: List of input images (tensors, numpy arrays, or PIL Images) |
|
elevs: List of elevation angles for each view |
|
azims: List of azimuth angles for each view |
|
camera_distance: Camera distance (uses default if None) |
|
center: Camera focus center (uses origin if None) |
|
exp: Exponent for cosine weighting (higher values favor front-facing views) |
|
weights: Optional per-view weights (defaults to 1.0 for all views) |
|
|
|
Returns: |
|
Tuple of (merged_texture, trust_map) tensors in UV space |
|
""" |
|
if isinstance(colors, torch.Tensor): |
|
colors = [colors[i, ...].float().permute(1, 2, 0) for i in range(colors.shape[0])] |
|
else: |
|
for i in range(len(colors)): |
|
if isinstance(colors[i], Image.Image): |
|
colors[i] = torch.tensor(np.array(colors[i]) / 255.0, device=self.device).float() |
|
if weights is None: |
|
weights = [1.0 for _ in range(len(colors))] |
|
textures = [] |
|
cos_maps = [] |
|
for color, elev, azim, weight in zip(colors, elevs, azims, weights): |
|
texture, cos_map, _ = self.back_project(color, elev, azim, camera_distance, center) |
|
cos_map = weight * (cos_map**exp) |
|
textures.append(texture) |
|
cos_maps.append(cos_map) |
|
|
|
texture_merge, trust_map_merge = self.fast_bake_texture(textures, cos_maps) |
|
return texture_merge, trust_map_merge |
|
|
|
@torch.no_grad() |
|
def fast_bake_texture(self, textures, cos_maps): |
|
""" |
|
Efficiently merge multiple textures using cosine-weighted blending. |
|
Optimizes by skipping views that don't contribute new information. |
|
|
|
Args: |
|
textures: List of texture tensors to merge |
|
cos_maps: List of corresponding cosine weight maps |
|
|
|
Returns: |
|
Tuple of (merged_texture, valid_mask) tensors |
|
""" |
|
|
|
channel = textures[0].shape[-1] |
|
texture_merge = torch.zeros(self.texture_size + (channel,)).to(self.device) |
|
trust_map_merge = torch.zeros(self.texture_size + (1,)).to(self.device) |
|
for texture, cos_map in zip(textures, cos_maps): |
|
view_sum = (cos_map > 0).sum() |
|
painted_sum = ((cos_map > 0) * (trust_map_merge > 0)).sum() |
|
if painted_sum / view_sum > 0.99: |
|
continue |
|
texture_merge += texture * cos_map |
|
trust_map_merge += cos_map |
|
texture_merge = texture_merge / torch.clamp(trust_map_merge, min=1e-8) |
|
|
|
return texture_merge, trust_map_merge > 1e-8 |
|
|
|
@torch.no_grad() |
|
def uv_inpaint(self, texture, mask, vertex_inpaint=True, method="NS", return_float=False): |
|
""" |
|
Inpaint missing regions in UV texture using mesh-aware and traditional methods. |
|
|
|
Args: |
|
texture: Input texture as tensor, numpy array, or PIL Image |
|
mask: Binary mask indicating regions to inpaint (1 = keep, 0 = inpaint) |
|
vertex_inpaint: Whether to use mesh vertex connectivity for inpainting |
|
method: Inpainting method ("NS" for Navier-Stokes) |
|
return_float: Whether to return float values (False returns uint8) |
|
|
|
Returns: |
|
Inpainted texture as numpy array |
|
""" |
|
|
|
if isinstance(texture, torch.Tensor): |
|
texture_np = texture.cpu().numpy() |
|
elif isinstance(texture, np.ndarray): |
|
texture_np = texture |
|
elif isinstance(texture, Image.Image): |
|
texture_np = np.array(texture) / 255.0 |
|
|
|
if isinstance(mask, torch.Tensor): |
|
mask = (mask.squeeze(-1).cpu().numpy() * 255).astype(np.uint8) |
|
|
|
if vertex_inpaint: |
|
vtx_pos, pos_idx, vtx_uv, uv_idx = self.get_mesh() |
|
texture_np, mask = meshVerticeInpaint(texture_np, mask, vtx_pos, vtx_uv, pos_idx, uv_idx) |
|
|
|
if method == "NS": |
|
texture_np = cv2.inpaint((texture_np * 255).astype(np.uint8), 255 - mask, 3, cv2.INPAINT_NS) |
|
assert return_float == False |
|
|
|
return texture_np |
|
|