Spaces:

insanecoder69
/

TalkSHOWLIVE

Sleeping

App Files Files Community

TalkSHOWLIVE / data_utils /dataset_preprocess.py

vscode69

second half

99afdfe 10 months ago

raw

history blame

5.28 kB

	import os
	import pickle
	from tqdm import tqdm
	import shutil
	import torch
	import numpy as np
	import librosa
	import random

	speakers = ['seth', 'conan', 'oliver', 'chemistry']
	data_root = "../ExpressiveWholeBodyDatasetv1.0/"
	split = 'train'



	def split_list(full_list,shuffle=False,ratio=0.2):
	n_total = len(full_list)
	offset_0 = int(n_total * ratio)
	offset_1 = int(n_total * ratio * 2)
	if n_total==0 or offset_1<1:
	return [],full_list
	if shuffle:
	random.shuffle(full_list)
	sublist_0 = full_list[:offset_0]
	sublist_1 = full_list[offset_0:offset_1]
	sublist_2 = full_list[offset_1:]
	return sublist_0, sublist_1, sublist_2


	def moveto(list, file):
	for f in list:
	before, after = '/'.join(f.split('/')[:-1]), f.split('/')[-1]
	new_path = os.path.join(before, file)
	new_path = os.path.join(new_path, after)
	# os.makedirs(new_path)
	# os.path.isdir(new_path)
	# shutil.move(f, new_path)

	#转移到新目录
	shutil.copytree(f, new_path)
	#删除原train里的文件
	shutil.rmtree(f)
	return None


	def read_pkl(data):
	betas = np.array(data['betas'])

	jaw_pose = np.array(data['jaw_pose'])
	leye_pose = np.array(data['leye_pose'])
	reye_pose = np.array(data['reye_pose'])
	global_orient = np.array(data['global_orient']).squeeze()
	body_pose = np.array(data['body_pose_axis'])
	left_hand_pose = np.array(data['left_hand_pose'])
	right_hand_pose = np.array(data['right_hand_pose'])

	full_body = np.concatenate(
	(jaw_pose, leye_pose, reye_pose, global_orient, body_pose, left_hand_pose, right_hand_pose), axis=1)

	expression = np.array(data['expression'])
	full_body = np.concatenate((full_body, expression), axis=1)

	if (full_body.shape[0] < 90) or (torch.isnan(torch.from_numpy(full_body)).sum() > 0):
	return 1
	else:
	return 0


	for speaker_name in speakers:
	speaker_root = os.path.join(data_root, speaker_name)

	videos = [v for v in os.listdir(speaker_root)]
	print(videos)

	haode = huaide = 0
	total_seqs = []

	for vid in tqdm(videos, desc="Processing training data of {}......".format(speaker_name)):
	# for vid in videos:
	source_vid = vid
	vid_pth = os.path.join(speaker_root, source_vid)
	# vid_pth = os.path.join(speaker_root, source_vid, 'images/half', split)
	t = os.path.join(speaker_root, source_vid, 'test')
	v = os.path.join(speaker_root, source_vid, 'val')

	# if os.path.exists(t):
	# shutil.rmtree(t)
	# if os.path.exists(v):
	# shutil.rmtree(v)
	try:
	seqs = [s for s in os.listdir(vid_pth)]
	except:
	continue
	# if len(seqs) == 0:
	# shutil.rmtree(os.path.join(speaker_root, source_vid))
	# None
	for s in seqs:
	quality = 0
	total_seqs.append(os.path.join(vid_pth,s))
	seq_root = os.path.join(vid_pth, s)
	key = seq_root # correspond to clip******
	audio_fname = os.path.join(speaker_root, source_vid, s, '%s.wav' % (s))

	# delete the data without audio or the audio file could not be read
	if os.path.isfile(audio_fname):
	try:
	audio = librosa.load(audio_fname)
	except:
	# print(key)
	shutil.rmtree(key)
	huaide = huaide + 1
	continue
	else:
	huaide = huaide + 1
	# print(key)
	shutil.rmtree(key)
	continue

	# check motion file
	motion_fname = os.path.join(speaker_root, source_vid, s, '%s.pkl' % (s))
	try:
	f = open(motion_fname, 'rb+')
	except:
	shutil.rmtree(key)
	huaide = huaide + 1
	continue

	data = pickle.load(f)
	w = read_pkl(data)
	f.close()
	quality = quality + w

	if w == 1:
	shutil.rmtree(key)
	# print(key)
	huaide = huaide + 1
	continue

	haode = haode + 1

	print("huaide:{}, haode:{}, total_seqs:{}".format(huaide, haode, total_seqs.__len__()))

	for speaker_name in speakers:
	speaker_root = os.path.join(data_root, speaker_name)

	videos = [v for v in os.listdir(speaker_root)]
	print(videos)

	haode = huaide = 0
	total_seqs = []

	for vid in tqdm(videos, desc="Processing training data of {}......".format(speaker_name)):
	# for vid in videos:
	source_vid = vid
	vid_pth = os.path.join(speaker_root, source_vid)
	try:
	seqs = [s for s in os.listdir(vid_pth)]
	except:
	continue
	for s in seqs:
	quality = 0
	total_seqs.append(os.path.join(vid_pth, s))
	print("total_seqs:{}".format(total_seqs.__len__()))
	# split the dataset
	test_list, val_list, train_list = split_list(total_seqs, True, 0.1)
	print(len(test_list), len(val_list), len(train_list))
	moveto(train_list, 'train')
	moveto(test_list, 'test')
	moveto(val_list, 'val')