Spaces:

Nomad2082
/

Magic-plus-1

Running on Zero

Magic-plus-1 / data_utils /update_npz_rm_issue_data.py

HF User

🚀 Fresh deploy of Magic Articulate Enhanced MVP

e7b9fb6 about 1 month ago

2.2 kB

	# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	import numpy as np
	import os

	def filter_npz_by_filenames(npz_path, txt_path, output_path):

	data_list = np.load(npz_path, allow_pickle=True)['arr_0']

	with open(txt_path, 'r') as f:
	exclude_filenames = set(line.strip() for line in f if line.strip())

	# Filter the data list
	filtered_data = []
	excluded_count = 0

	for item in data_list:

	filename = item['uuid']

	if filename in exclude_filenames:
	excluded_count += 1
	print(filename)
	else:
	filtered_data.append(item)

	# Save the filtered data
	kept_count = len(filtered_data)
	total_count = len(data_list)
	print(f"Original items: {total_count}")
	print(f"Kept items: {kept_count}")
	print(f"Removed items: {excluded_count}")

	print(f"Saving filtered data")
	np.savez_compressed(output_path, filtered_data, allow_pickle=True)

	def main():
	issue_list = "data_utils/issue_data_list.txt" # Change this to your text file path
	npz_path_train = "articulation_xlv2_train.npz" # Change this to your NPZ file path
	output_path_train = "articulation_xlv2_train_update.npz"
	npz_path_test = "articulation_xlv2_test.npz" # Change this to your NPZ file path
	output_path_test = "articulation_xlv2_test_update.npz"

	filter_npz_by_filenames(npz_path_train, issue_list, output_path_train)
	filter_npz_by_filenames(npz_path_test, issue_list, output_path_test)

	if __name__ == "__main__":
	main()