VOICEVOX-Engine

Running

VOICEVOX-Engine / voicevox_engine /test /unit /tts_pipeline /test_text_analyzer.py

Jirat Jaturanpinyo

Upload voicevox_engine

edc06cb verified 12 months ago

14.7 kB

	from voicevox_engine.tts_pipeline.model import AccentPhrase, Mora
	from voicevox_engine.tts_pipeline.text_analyzer import (
	AccentPhraseLabel,
	BreathGroupLabel,
	Label,
	MoraLabel,
	UtteranceLabel,
	mora_to_text,
	text_to_accent_phrases,
	)


	def contexts_to_feature(contexts: dict[str, str]) -> str:
	"""ラベルの contexts を feature へ変換する"""
	return (
	"{p1}^{p2}-{p3}+{p4}={p5}"
	"/A:{a1}+{a2}+{a3}"
	"/B:{b1}-{b2}_{b3}"
	"/C:{c1}_{c2}+{c3}"
	"/D:{d1}+{d2}_{d3}"
	"/E:{e1}_{e2}!{e3}_{e4}-{e5}"
	"/F:{f1}_{f2}#{f3}_{f4}@{f5}_{f6}\|{f7}_{f8}"
	"/G:{g1}_{g2}%{g3}_{g4}_{g5}"
	"/H:{h1}_{h2}"
	"/I:{i1}-{i2}@{i3}+{i4}&{i5}-{i6}\|{i7}+{i8}"
	"/J:{j1}_{j2}"
	"/K:{k1}+{k2}-{k3}"
	).format(**contexts)


	# OpenJTalk コンテナクラス
	OjtContainer = MoraLabel \| AccentPhraseLabel \| BreathGroupLabel \| UtteranceLabel


	def features(ojt_container: OjtContainer) -> list[str]:
	"""コンテナインスタンスに直接的・間接的に含まれる全ての feature を返す"""
	return [contexts_to_feature(p.contexts) for p in ojt_container.labels]


	# pyopenjtalk.extract_fullcontext("こんにちは、ヒホです。")の結果
	# 出来る限りテスト内で他のライブラリに依存しないため、
	# またテスト内容を透明化するために、テストケースを生成している
	test_case_hello_hiho = [
	# sil (無音)
	"xx^xx-sil+k=o/A:xx+xx+xx/B:xx-xx_xx/C:xx_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx"
	+ "/F:xx_xx#xx_xx@xx_xx\|xx_xx/G:5_5%0_xx_xx/H:xx_xx/I:xx-xx"
	+ "@xx+xx&xx-xx\|xx+xx/J:1_5/K:2+2-9",
	# k
	"xx^sil-k+o=N/A:-4+1+5/B:xx-xx_xx/C:09_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx"
	+ "/F:5_5#0_xx@1_1\|1_5/G:4_1%0_xx_0/H:xx_xx/I:1-5"
	+ "@1+2&1-2\|1+9/J:1_4/K:2+2-9",
	# o
	"sil^k-o+N=n/A:-4+1+5/B:xx-xx_xx/C:09_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx"
	+ "/F:5_5#0_xx@1_1\|1_5/G:4_1%0_xx_0/H:xx_xx/I:1-5"
	+ "@1+2&1-2\|1+9/J:1_4/K:2+2-9",
	# N (ん)
	"k^o-N+n=i/A:-3+2+4/B:xx-xx_xx/C:09_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx"
	+ "/F:5_5#0_xx@1_1\|1_5/G:4_1%0_xx_0/H:xx_xx/I:1-5"
	+ "@1+2&1-2\|1+9/J:1_4/K:2+2-9",
	# n
	"o^N-n+i=ch/A:-2+3+3/B:xx-xx_xx/C:09_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx"
	+ "/F:5_5#0_xx@1_1\|1_5/G:4_1%0_xx_0/H:xx_xx/I:1-5"
	+ "@1+2&1-2\|1+9/J:1_4/K:2+2-9",
	# i
	"N^n-i+ch=i/A:-2+3+3/B:xx-xx_xx/C:09_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx"
	+ "/F:5_5#0_xx@1_1\|1_5/G:4_1%0_xx_0/H:xx_xx/I:1-5"
	+ "@1+2&1-2\|1+9/J:1_4/K:2+2-9",
	# ch
	"n^i-ch+i=w/A:-1+4+2/B:xx-xx_xx/C:09_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx"
	+ "/F:5_5#0_xx@1_1\|1_5/G:4_1%0_xx_0/H:xx_xx/I:1-5"
	+ "@1+2&1-2\|1+9/J:1_4/K:2+2-9",
	# i
	"i^ch-i+w=a/A:-1+4+2/B:xx-xx_xx/C:09_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx"
	+ "/F:5_5#0_xx@1_1\|1_5/G:4_1%0_xx_0/H:xx_xx/I:1-5"
	+ "@1+2&1-2\|1+9/J:1_4/K:2+2-9",
	# w
	"ch^i-w+a=pau/A:0+5+1/B:xx-xx_xx/C:09_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx"
	+ "/F:5_5#0_xx@1_1\|1_5/G:4_1%0_xx_0/H:xx_xx/I:1-5"
	+ "@1+2&1-2\|1+9/J:1_4/K:2+2-9",
	# a
	"i^w-a+pau=h/A:0+5+1/B:xx-xx_xx/C:09_xx+xx/D:09+xx_xx/E:xx_xx!xx_xx-xx"
	+ "/F:5_5#0_xx@1_1\|1_5/G:4_1%0_xx_0/H:xx_xx/I:1-5"
	+ "@1+2&1-2\|1+9/J:1_4/K:2+2-9",
	# pau (読点)
	"w^a-pau+h=i/A:xx+xx+xx/B:09-xx_xx/C:xx_xx+xx/D:09+xx_xx/E:5_5!0_xx-xx"
	+ "/F:xx_xx#xx_xx@xx_xx\|xx_xx/G:4_1%0_xx_xx/H:1_5/I:xx-xx"
	+ "@xx+xx&xx-xx\|xx+xx/J:1_4/K:2+2-9",
	# h
	"a^pau-h+i=h/A:0+1+4/B:09-xx_xx/C:09_xx+xx/D:22+xx_xx/E:5_5!0_xx-0"
	+ "/F:4_1#0_xx@1_1\|1_4/G:xx_xx%xx_xx_xx/H:1_5/I:1-4"
	+ "@2+1&2-1\|6+4/J:xx_xx/K:2+2-9",
	# i
	"pau^h-i+h=o/A:0+1+4/B:09-xx_xx/C:09_xx+xx/D:22+xx_xx/E:5_5!0_xx-0"
	+ "/F:4_1#0_xx@1_1\|1_4/G:xx_xx%xx_xx_xx/H:1_5/I:1-4"
	+ "@2+1&2-1\|6+4/J:xx_xx/K:2+2-9",
	# h
	"h^i-h+o=d/A:1+2+3/B:09-xx_xx/C:22_xx+xx/D:10+7_2/E:5_5!0_xx-0"
	+ "/F:4_1#0_xx@1_1\|1_4/G:xx_xx%xx_xx_xx/H:1_5/I:1-4"
	+ "@2+1&2-1\|6+4/J:xx_xx/K:2+2-9",
	# o
	"i^h-o+d=e/A:1+2+3/B:09-xx_xx/C:22_xx+xx/D:10+7_2/E:5_5!0_xx-0"
	+ "/F:4_1#0_xx@1_1\|1_4/G:xx_xx%xx_xx_xx/H:1_5/I:1-4"
	+ "@2+1&2-1\|6+4/J:xx_xx/K:2+2-9",
	# d
	"h^o-d+e=s/A:2+3+2/B:22-xx_xx/C:10_7+2/D:xx+xx_xx/E:5_5!0_xx-0"
	+ "/F:4_1#0_xx@1_1\|1_4/G:xx_xx%xx_xx_xx/H:1_5/I:1-4"
	+ "@2+1&2-1\|6+4/J:xx_xx/K:2+2-9",
	# e
	"o^d-e+s=U/A:2+3+2/B:22-xx_xx/C:10_7+2/D:xx+xx_xx/E:5_5!0_xx-0"
	+ "/F:4_1#0_xx@1_1\|1_4/G:xx_xx%xx_xx_xx/H:1_5/I:1-4"
	+ "@2+1&2-1\|6+4/J:xx_xx/K:2+2-9",
	# s
	"d^e-s+U=sil/A:3+4+1/B:22-xx_xx/C:10_7+2/D:xx+xx_xx/E:5_5!0_xx-0"
	+ "/F:4_1#0_xx@1_1\|1_4/G:xx_xx%xx_xx_xx/H:1_5/I:1-4"
	+ "@2+1&2-1\|6+4/J:xx_xx/K:2+2-9",
	# U (無声母音)
	"e^s-U+sil=xx/A:3+4+1/B:22-xx_xx/C:10_7+2/D:xx+xx_xx/E:5_5!0_xx-0"
	+ "/F:4_1#0_xx@1_1\|1_4/G:xx_xx%xx_xx_xx/H:1_5/I:1-4"
	+ "@2+1&2-1\|6+4/J:xx_xx/K:2+2-9",
	# sil (無音)
	"s^U-sil+xx=xx/A:xx+xx+xx/B:10-7_2/C:xx_xx+xx/D:xx+xx_xx/E:4_1!0_xx-xx"
	+ "/F:xx_xx#xx_xx@xx_xx\|xx_xx/G:xx_xx%xx_xx_xx/H:1_4/I:xx-xx"
	+ "@xx+xx&xx-xx\|xx+xx/J:xx_xx/K:2+2-9",
	]
	labels_hello_hiho = [Label.from_feature(feature) for feature in test_case_hello_hiho]


	def jointed_phonemes(ojt_container: OjtContainer) -> str:
	"""コンテナインスタンスに直接的・間接的に含まれる全ラベルの音素文字を結合してを返す"""
	return "".join([label.phoneme for label in ojt_container.labels])


	def space_jointed_phonemes(ojt_container: OjtContainer) -> str:
	"""コンテナインスタンスに直接的・間接的に含まれる全ラベルの音素文字を ` ` 挟みながら結合してを返す"""
	return " ".join([label.phoneme for label in ojt_container.labels])


	def test_label_phoneme() -> None:
	"""Label に含まれる音素をテスト"""
	assert (
	" ".join([label.phoneme for label in labels_hello_hiho])
	== "sil k o N n i ch i w a pau h i h o d e s U sil"
	)


	def test_label_is_pause() -> None:
	"""Label のポーズ判定をテスト"""
	assert [label.is_pause() for label in labels_hello_hiho] == [
	True, # sil
	False, # k
	False, # o
	False, # N
	False, # n
	False, # i
	False, # ch
	False, # i
	False, # w
	False, # a
	True, # pau
	False, # h
	False, # i
	False, # h
	False, # o
	False, # d
	False, # e
	False, # s
	False, # u
	True, # sil
	]


	def test_label_feature() -> None:
	"""Label に含まれる features をテスト"""
	assert [
	contexts_to_feature(label.contexts) for label in labels_hello_hiho
	] == test_case_hello_hiho


	# contexts["a2"] == "1" ko
	mora_hello_1 = MoraLabel(consonant=labels_hello_hiho[1], vowel=labels_hello_hiho[2])
	# contexts["a2"] == "2" N
	mora_hello_2 = MoraLabel(consonant=None, vowel=labels_hello_hiho[3])
	# contexts["a2"] == "3" ni
	mora_hello_3 = MoraLabel(consonant=labels_hello_hiho[4], vowel=labels_hello_hiho[5])
	# contexts["a2"] == "4" chi
	mora_hello_4 = MoraLabel(consonant=labels_hello_hiho[6], vowel=labels_hello_hiho[7])
	# contexts["a2"] == "5" wa
	mora_hello_5 = MoraLabel(consonant=labels_hello_hiho[8], vowel=labels_hello_hiho[9])
	# contexts["a2"] == "1" hi
	mora_hiho_1 = MoraLabel(consonant=labels_hello_hiho[11], vowel=labels_hello_hiho[12])
	# contexts["a2"] == "2" ho
	mora_hiho_2 = MoraLabel(consonant=labels_hello_hiho[13], vowel=labels_hello_hiho[14])
	# contexts["a2"] == "3" de
	mora_hiho_3 = MoraLabel(consonant=labels_hello_hiho[15], vowel=labels_hello_hiho[16])
	# contexts["a2"] == "1" sU
	mora_hiho_4 = MoraLabel(consonant=labels_hello_hiho[17], vowel=labels_hello_hiho[18])


	def test_mora_label_phonemes() -> None:
	"""MoraLabel に含まれる音素系列をテスト"""
	assert jointed_phonemes(mora_hello_1) == "ko"
	assert jointed_phonemes(mora_hello_2) == "N"
	assert jointed_phonemes(mora_hello_3) == "ni"
	assert jointed_phonemes(mora_hello_4) == "chi"
	assert jointed_phonemes(mora_hello_5) == "wa"
	assert jointed_phonemes(mora_hiho_1) == "hi"
	assert jointed_phonemes(mora_hiho_2) == "ho"
	assert jointed_phonemes(mora_hiho_3) == "de"
	assert jointed_phonemes(mora_hiho_4) == "sU"


	def test_mora_label_features() -> None:
	"""MoraLabel に含まれる features をテスト"""
	expects = test_case_hello_hiho
	assert features(mora_hello_1) == expects[1:3]
	assert features(mora_hello_2) == expects[3:4]
	assert features(mora_hello_3) == expects[4:6]
	assert features(mora_hello_4) == expects[6:8]
	assert features(mora_hello_5) == expects[8:10]
	assert features(mora_hiho_1) == expects[11:13]
	assert features(mora_hiho_2) == expects[13:15]
	assert features(mora_hiho_3) == expects[15:17]
	assert features(mora_hiho_4) == expects[17:19]


	# TODO: ValueErrorを吐く作為的ではない自然な例の模索
	# 存在しないなら放置でよい
	accent_phrase_hello = AccentPhraseLabel.from_labels(labels_hello_hiho[1:10])
	accent_phrase_hiho = AccentPhraseLabel.from_labels(labels_hello_hiho[11:19])


	def test_accent_phrase_accent() -> None:
	"""AccentPhraseLabel に含まれるアクセント位置をテスト"""
	assert accent_phrase_hello.accent == 5
	assert accent_phrase_hiho.accent == 1


	def test_accent_phrase_phonemes() -> None:
	"""AccentPhraseLabel に含まれる音素系列をテスト"""
	outputs_hello = space_jointed_phonemes(accent_phrase_hello)
	outputs_hiho = space_jointed_phonemes(accent_phrase_hiho)
	assert outputs_hello == "k o N n i ch i w a"
	assert outputs_hiho == "h i h o d e s U"


	def test_accent_phrase_features() -> None:
	"""AccentPhraseLabel に含まれる features をテスト"""
	expects = test_case_hello_hiho
	assert features(accent_phrase_hello) == expects[1:10]
	assert features(accent_phrase_hiho) == expects[11:19]


	breath_group_hello = BreathGroupLabel.from_labels(labels_hello_hiho[1:10])
	breath_group_hiho = BreathGroupLabel.from_labels(labels_hello_hiho[11:19])


	def test_breath_group_phonemes() -> None:
	"""BreathGroupLabel に含まれる音素系列をテスト"""
	outputs_hello = space_jointed_phonemes(breath_group_hello)
	outputs_hiho = space_jointed_phonemes(breath_group_hiho)
	assert outputs_hello == "k o N n i ch i w a"
	assert outputs_hiho == "h i h o d e s U"


	def test_breath_group_features() -> None:
	"""BreathGroupLabel に含まれる features をテスト"""
	expects = test_case_hello_hiho
	assert features(breath_group_hello) == expects[1:10]
	assert features(breath_group_hiho) == expects[11:19]


	utterance_hello_hiho = UtteranceLabel.from_labels(labels_hello_hiho)


	def test_utterance_phonemes() -> None:
	"""UtteranceLabel に含まれる音素系列をテスト"""
	outputs_hello_hiho = space_jointed_phonemes(utterance_hello_hiho)
	expects_hello_hiho = "sil k o N n i ch i w a pau h i h o d e s U sil"
	assert outputs_hello_hiho == expects_hello_hiho


	def test_utterance_features() -> None:
	"""UtteranceLabel に含まれる features をテスト"""
	assert features(utterance_hello_hiho) == test_case_hello_hiho


	def test_voice() -> None:
	assert mora_to_text("a") == "ア"
	assert mora_to_text("i") == "イ"
	assert mora_to_text("ka") == "カ"
	assert mora_to_text("N") == "ン"
	assert mora_to_text("cl") == "ッ"
	assert mora_to_text("gye") == "ギェ"
	assert mora_to_text("ye") == "イェ"
	assert mora_to_text("wo") == "ウォ"


	def test_unvoice() -> None:
	assert mora_to_text("A") == "ア"
	assert mora_to_text("I") == "イ"
	assert mora_to_text("kA") == "カ"
	assert mora_to_text("gyE") == "ギェ"
	assert mora_to_text("yE") == "イェ"
	assert mora_to_text("wO") == "ウォ"


	def test_invalid_mora() -> None:
	"""変なモーラが来ても例外を投げない"""
	assert mora_to_text("x") == "x"
	assert mora_to_text("") == ""


	def _gen_mora(text: str, consonant: str \| None, vowel: str) -> Mora:
	return Mora(
	text=text,
	consonant=consonant,
	consonant_length=0 if consonant else None,
	vowel=vowel,
	vowel_length=0,
	pitch=0,
	)


	def test_text_to_accent_phrases_normal() -> None:
	"""`text_to_accent_phrases` は正常な日本語文をパースする"""
	# Inputs
	text = "こんにちは、ヒホです。"
	# Expects
	true_accent_phrases = [
	AccentPhrase(
	moras=[
	_gen_mora("コ", "k", "o"),
	_gen_mora("ン", None, "N"),
	_gen_mora("ニ", "n", "i"),
	_gen_mora("チ", "ch", "i"),
	_gen_mora("ワ", "w", "a"),
	],
	accent=5,
	pause_mora=_gen_mora("、", None, "pau"),
	),
	AccentPhrase(
	moras=[
	_gen_mora("ヒ", "h", "i"),
	_gen_mora("ホ", "h", "o"),
	_gen_mora("デ", "d", "e"),
	_gen_mora("ス", "s", "U"),
	],
	accent=1,
	pause_mora=None,
	),
	]
	# Outputs
	accent_phrases = text_to_accent_phrases(text)
	# Tests
	assert accent_phrases == true_accent_phrases


	def stub_unknown_features_koxx(_: str) -> list[str]:
	"""`sil-k-o-xx-sil` に相当する features を常に返す `text_to_features()` のStub"""
	return [
	".^.-sil+.=./A:.+xx+./B:.-._./C:._.+./D:.+._./E:._.!._.-./F:xx_xx#xx_.@xx_.\|._./G:._.%._._./H:._./I:.-.@xx+.&.-.\|.+./J:._./K:.+.-.",
	".^.-k+.=./A:.+1+./B:.-._./C:._.+./D:.+._./E:._.!._.-./F:2_1#0_.@1_.\|._./G:._.%._._./H:._./I:.-.@1+.&.-.\|.+./J:._./K:.+.-.",
	".^.-o+.=./A:.+1+./B:.-._./C:._.+./D:.+._./E:._.!._.-./F:2_1#0_.@1_.\|._./G:._.%._._./H:._./I:.-.@1+.&.-.\|.+./J:._./K:.+.-.",
	".^.-xx+.=./A:.+2+./B:.-._./C:._.+./D:.+._./E:._.!._.-./F:2_1#0_.@1_.\|._./G:._.%._._./H:._./I:.-.@1+.&.-.\|.+./J:._./K:.+.-.",
	".^.-sil+.=./A:.+xx+./B:.-._./C:._.+./D:.+._./E:._.!._.-./F:xx_xx#xx_.@xx_.\|._./G:._.%._._./H:._./I:.-.@xx+.&.-.\|.+./J:._./K:.+.-.",
	]


	def test_text_to_accent_phrases_unknown() -> None:
	"""`text_to_accent_phrases` は unknown 音素を含む features をパースする"""
	# Expects
	true_accent_phrases = [
	AccentPhrase(
	moras=[
	_gen_mora("コ", "k", "o"),
	_gen_mora("xx", None, "xx"),
	],
	accent=1,
	pause_mora=None,
	),
	]
	# Outputs
	accent_phrases = text_to_accent_phrases(
	"dummy", text_to_features=stub_unknown_features_koxx
	)
	# Tests
	assert accent_phrases == true_accent_phrases