vits-api

Sleeping

App Files Files Community

vits-api / vits /text /vits_pinyin.py

Artrajz

update

14e19a5 about 2 years ago

raw

history blame

3.47 kB

	""" from https://github.com/PlayVoice/vits_chinese """
	import pypinyin
	from pypinyin.contrib.neutral_tone import NeutralToneWith5Mixin
	from pypinyin.converter import DefaultConverter
	from pypinyin.core import Pinyin

	import numpy as np

	from vits.bert.prosody_tool import pinyin_dict
	from vits.bert import TTSProsody


	class MyConverter(NeutralToneWith5Mixin, DefaultConverter):
	pass


	def is_chinese(uchar):
	if uchar >= u'\u4e00' and uchar <= u'\u9fa5':
	return True
	else:
	return False


	def clean_chinese(text: str):
	text = text.strip()
	text_clean = []
	for char in text:
	if (is_chinese(char)):
	text_clean.append(char)
	else:
	if len(text_clean) > 1 and is_chinese(text_clean[-1]):
	text_clean.append(',')
	text_clean = ''.join(text_clean).strip(',')
	return text_clean


	class VITS_PinYin:
	def __init__(self, bert_path, device):
	self.pinyin_parser = Pinyin(MyConverter())
	self.prosody = TTSProsody(bert_path, device)

	def chinese_to_phonemes(self, text):
	# 考虑使用g2pw的chinese bert替换原始的pypinyin,目前测试下来运行速度太慢。
	# 将标准中文文本符号替换成 bert 符号库中的单符号,以保证bert的效果.
	text = text.replace("——", "...") \
	.replace("—", "...") \
	.replace("……", "...") \
	.replace("…", "...") \
	.replace('“', '"') \
	.replace('”', '"') \
	.replace("\n", "")
	tokens = self.prosody.char_model.tokenizer.tokenize(text)
	text = ''.join(tokens)
	assert not tokens.count("[UNK]")
	pinyins = np.reshape(pypinyin.pinyin(text, style=pypinyin.TONE3), (-1))
	try:
	phone_index = 0
	phone_items = []
	phone_items.append('sil')
	count_phone = []
	count_phone.append(1)
	temp = ""

	len_pys = len(tokens)
	for word in tokens:
	if is_chinese(word):
	count_phone.append(2)
	if (phone_index >= len_pys):
	print(
	f"!!!![{text}]plz check ur text whether includes MULTIBYTE symbol.\
	(请检查你的文本中是否包含多字节符号)")
	pinyin = pinyins[phone_index]
	phone_index = phone_index + 1
	if not pinyin[-1].isdigit():
	pinyin += "5"
	if pinyin[:-1] in pinyin_dict:
	tone = pinyin[-1]
	a = pinyin[:-1]
	a1, a2 = pinyin_dict[a]
	phone_items += [a1, a2 + tone]
	else:
	temp += word
	if temp == pinyins[phone_index]:
	temp = ""
	phone_index += 1
	count_phone.append(1)
	phone_items.append('sp')

	count_phone.append(1)
	phone_items.append('sil')
	phone_items_str = ' '.join(phone_items)
	except IndexError as e:
	print('except:', e)

	text = f'[PAD]{text}[PAD]'
	char_embeds = self.prosody.get_char_embeds(text)
	char_embeds = self.prosody.expand_for_phone(char_embeds, count_phone)
	return phone_items_str, char_embeds