Spaces:
Running
on
Zero
Running
on
Zero
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
import numpy as np | |
import os | |
def filter_npz_by_filenames(npz_path, txt_path, output_path): | |
data_list = np.load(npz_path, allow_pickle=True)['arr_0'] | |
with open(txt_path, 'r') as f: | |
exclude_filenames = set(line.strip() for line in f if line.strip()) | |
# Filter the data list | |
filtered_data = [] | |
excluded_count = 0 | |
for item in data_list: | |
filename = item['uuid'] | |
if filename in exclude_filenames: | |
excluded_count += 1 | |
print(filename) | |
else: | |
filtered_data.append(item) | |
# Save the filtered data | |
kept_count = len(filtered_data) | |
total_count = len(data_list) | |
print(f"Original items: {total_count}") | |
print(f"Kept items: {kept_count}") | |
print(f"Removed items: {excluded_count}") | |
print(f"Saving filtered data") | |
np.savez_compressed(output_path, filtered_data, allow_pickle=True) | |
def main(): | |
issue_list = "data_utils/issue_data_list.txt" # Change this to your text file path | |
npz_path_train = "articulation_xlv2_train.npz" # Change this to your NPZ file path | |
output_path_train = "articulation_xlv2_train_update.npz" | |
npz_path_test = "articulation_xlv2_test.npz" # Change this to your NPZ file path | |
output_path_test = "articulation_xlv2_test_update.npz" | |
filter_npz_by_filenames(npz_path_train, issue_list, output_path_train) | |
filter_npz_by_filenames(npz_path_test, issue_list, output_path_test) | |
if __name__ == "__main__": | |
main() |