Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,652 Bytes
e7b9fb6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import scipy.sparse as sp
import os
def check_and_clean_skinning_weights(file_path, output_path, tolerance=0.1):
"""
Check if all rows in pc_skinning_weights sum to 1 for each item in the NPZ file.
Remove invalid items and save a cleaned version.
Args:
file_path: Path to the input NPZ file
output_path: Path for the cleaned NPZ file
tolerance: Tolerance for floating point comparison
Returns:
tuple: (cleaned_data_list, removed_indices)
"""
data_list = np.load(file_path, allow_pickle=True)['arr_0']
invalid_indices = []
valid_data_list = []
for idx, data in enumerate(data_list):
is_valid = True
weights_data = data['skinning_weights_value']
weights_row = data['skinning_weights_row']
weights_col = data['skinning_weights_col']
weights_shape = data['skinning_weights_shape']
skinning_sparse = sp.coo_matrix(
(weights_data, (weights_row, weights_col)),
shape=weights_shape
)
skinning_csr = skinning_sparse.tocsr()
row_sums = np.array(skinning_csr.sum(axis=1)).flatten()
invalid_rows = np.where(np.abs(row_sums - 1.0) > tolerance)[0]
if len(invalid_rows) > 0:
min_sum = np.min(row_sums)
max_sum = np.max(row_sums)
invalid_indices.append((data['uuid'], f"{len(invalid_rows)} rows, range: [{min_sum:.6f}, {max_sum:.6f}]"))
is_valid = False
if is_valid:
valid_data_list.append(data)
# Save the cleaned data
if valid_data_list:
np.savez_compressed(output_path, valid_data_list, allow_pickle=True)
print(f"Saved {len(valid_data_list)} valid items to {output_path}")
return valid_data_list, invalid_indices
def main():
# File paths
file_path = "articulation_xlv2_train.npz" # "articulation_xlv2_test.npz"
log_file = "invalid_skinning_weights_intrain.txt" # "invalid_skinning_weights_intest.txt"
output_path = "articulation_xlv2_train_updated.npz" # "articulation_xlv2_test_updated.npz"
# Clean the data
valid_data, invalid_indices = check_and_clean_skinning_weights(file_path, output_path)
# Log the results
with open(log_file, "w") as f:
f.write(f"Original file: {file_path}\n")
f.write(f"Cleaned file: {output_path}\n")
f.write(f"Total items: {len(np.load(file_path, allow_pickle=True)['arr_0'])}\n")
f.write(f"Valid items: {len(valid_data)}\n")
f.write(f"Removed items: {len(invalid_indices)}\n\n")
if invalid_indices:
f.write("Details of removed items:\n")
for idx, details in invalid_indices:
f.write(f" Index {idx}: {details}\n")
print(f"Cleaning complete. Results written to {log_file}")
if __name__ == "__main__":
main() |