Magic-plus-1 / data_utils /update_npz_rm_issue_data.py
HF User
πŸš€ Fresh deploy of Magic Articulate Enhanced MVP
e7b9fb6
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import os
def filter_npz_by_filenames(npz_path, txt_path, output_path):
data_list = np.load(npz_path, allow_pickle=True)['arr_0']
with open(txt_path, 'r') as f:
exclude_filenames = set(line.strip() for line in f if line.strip())
# Filter the data list
filtered_data = []
excluded_count = 0
for item in data_list:
filename = item['uuid']
if filename in exclude_filenames:
excluded_count += 1
print(filename)
else:
filtered_data.append(item)
# Save the filtered data
kept_count = len(filtered_data)
total_count = len(data_list)
print(f"Original items: {total_count}")
print(f"Kept items: {kept_count}")
print(f"Removed items: {excluded_count}")
print(f"Saving filtered data")
np.savez_compressed(output_path, filtered_data, allow_pickle=True)
def main():
issue_list = "data_utils/issue_data_list.txt" # Change this to your text file path
npz_path_train = "articulation_xlv2_train.npz" # Change this to your NPZ file path
output_path_train = "articulation_xlv2_train_update.npz"
npz_path_test = "articulation_xlv2_test.npz" # Change this to your NPZ file path
output_path_test = "articulation_xlv2_test_update.npz"
filter_npz_by_filenames(npz_path_train, issue_list, output_path_train)
filter_npz_by_filenames(npz_path_test, issue_list, output_path_test)
if __name__ == "__main__":
main()