Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +6 -0
- GPT_SoVITS/download.py +5 -0
- GPT_SoVITS/download_hubert.py +52 -0
- GPT_SoVITS/export_torch_script.py +832 -0
- GPT_SoVITS/extract_hubert.py +150 -0
- GPT_SoVITS/inference_cli.py +55 -0
- GPT_SoVITS/inference_gui.py +310 -0
- GPT_SoVITS/inference_webui.py +772 -0
- GPT_SoVITS/inference_webui_fast.py +336 -0
- GPT_SoVITS/onnx_export.py +344 -0
- GPT_SoVITS/prepare_data.py +66 -0
- GPT_SoVITS/pretrained_models/.gitignore +2 -0
- GPT_SoVITS/pretrained_models/README.md +5 -0
- GPT_SoVITS/process_ckpt.py +31 -0
- GPT_SoVITS/s1_train.py +113 -0
- GPT_SoVITS/s2_train.py +610 -0
- GPT_SoVITS/text/.gitignore +3 -0
- GPT_SoVITS/text/__init__.py +27 -0
- GPT_SoVITS/text/cantonese.py +209 -0
- GPT_SoVITS/text/chinese.py +211 -0
- GPT_SoVITS/text/chinese2.py +308 -0
- GPT_SoVITS/text/cleaner.py +91 -0
- GPT_SoVITS/text/cmudict-fast.rep +0 -0
- GPT_SoVITS/text/cmudict.rep +0 -0
- GPT_SoVITS/text/engdict-hot.rep +3 -0
- GPT_SoVITS/text/english.py +374 -0
- GPT_SoVITS/text/g2pw/__init__.py +1 -0
- GPT_SoVITS/text/g2pw/dataset.py +166 -0
- GPT_SoVITS/text/g2pw/g2pw.py +154 -0
- GPT_SoVITS/text/g2pw/onnx_api.py +241 -0
- GPT_SoVITS/text/g2pw/polyphonic-fix.rep +0 -0
- GPT_SoVITS/text/g2pw/polyphonic.rep +53 -0
- GPT_SoVITS/text/g2pw/utils.py +145 -0
- GPT_SoVITS/text/hindi.py +222 -0
- GPT_SoVITS/text/japanese.py +226 -0
- GPT_SoVITS/text/korean.py +265 -0
- GPT_SoVITS/text/opencpop-strict.txt +429 -0
- GPT_SoVITS/text/symbols.py +427 -0
- GPT_SoVITS/text/symbols2.py +444 -0
- GPT_SoVITS/text/text_processing.py +37 -0
- GPT_SoVITS/text/tone_sandhi.py +807 -0
- GPT_SoVITS/text/zh_normalization/README.md +16 -0
- GPT_SoVITS/text/zh_normalization/__init__.py +14 -0
- GPT_SoVITS/text/zh_normalization/char_convert.py +46 -0
- GPT_SoVITS/text/zh_normalization/chronology.py +134 -0
- GPT_SoVITS/text/zh_normalization/constants.py +62 -0
- GPT_SoVITS/text/zh_normalization/num.py +318 -0
- GPT_SoVITS/text/zh_normalization/phonecode.py +63 -0
- GPT_SoVITS/text/zh_normalization/quantifier.py +63 -0
- GPT_SoVITS/text/zh_normalization/text_normlization.py +175 -0
.gitattributes
CHANGED
@@ -702,3 +702,9 @@ data8/wavs/119.wav filter=lfs diff=lfs merge=lfs -text
|
|
702 |
data8/wavs/122.wav filter=lfs diff=lfs merge=lfs -text
|
703 |
data8/wavs/135.wav filter=lfs diff=lfs merge=lfs -text
|
704 |
data8/wavs/159.wav filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
702 |
data8/wavs/122.wav filter=lfs diff=lfs merge=lfs -text
|
703 |
data8/wavs/135.wav filter=lfs diff=lfs merge=lfs -text
|
704 |
data8/wavs/159.wav filter=lfs diff=lfs merge=lfs -text
|
705 |
+
data8/wavs/190.wav filter=lfs diff=lfs merge=lfs -text
|
706 |
+
data8/wavs/118.wav filter=lfs diff=lfs merge=lfs -text
|
707 |
+
data8/wavs/129.wav filter=lfs diff=lfs merge=lfs -text
|
708 |
+
data8/wavs/174.wav filter=lfs diff=lfs merge=lfs -text
|
709 |
+
data8/wavs/152.wav filter=lfs diff=lfs merge=lfs -text
|
710 |
+
data8/wavs/13.wav filter=lfs diff=lfs merge=lfs -text
|
GPT_SoVITS/download.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os, sys
|
2 |
+
now_dir = os.getcwd()
|
3 |
+
sys.path.insert(0, now_dir)
|
4 |
+
from text.g2pw import G2PWPinyin
|
5 |
+
g2pw = G2PWPinyin(model_dir="GPT_SoVITS/text/G2PWModel",model_source="GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large",v_to_u=False, neutral_tone_with_five=True)
|
GPT_SoVITS/download_hubert.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import requests
|
3 |
+
import torch
|
4 |
+
from pathlib import Path
|
5 |
+
from tqdm import tqdm
|
6 |
+
|
7 |
+
def download_file(url, dest_path):
|
8 |
+
"""Download a file with progress bar"""
|
9 |
+
response = requests.get(url, stream=True)
|
10 |
+
total_size = int(response.headers.get('content-length', 0))
|
11 |
+
|
12 |
+
with open(dest_path, 'wb') as f, tqdm(
|
13 |
+
desc=os.path.basename(dest_path),
|
14 |
+
total=total_size,
|
15 |
+
unit='iB',
|
16 |
+
unit_scale=True,
|
17 |
+
unit_divisor=1024,
|
18 |
+
) as pbar:
|
19 |
+
for data in response.iter_content(chunk_size=1024):
|
20 |
+
size = f.write(data)
|
21 |
+
pbar.update(size)
|
22 |
+
|
23 |
+
def download_hubert():
|
24 |
+
"""Download Chinese Hubert model"""
|
25 |
+
|
26 |
+
# Get project root directory (parent of GPT_SoVITS)
|
27 |
+
root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
28 |
+
|
29 |
+
# Create model directory
|
30 |
+
model_dir = os.path.join(root_dir, "pretrained_models", "chinese-hubert-base")
|
31 |
+
os.makedirs(model_dir, exist_ok=True)
|
32 |
+
|
33 |
+
# Model files to download
|
34 |
+
files = {
|
35 |
+
"config.json": "https://huggingface.co/TencentGameMate/chinese-hubert-base/raw/main/config.json",
|
36 |
+
"preprocessor_config.json": "https://huggingface.co/TencentGameMate/chinese-hubert-base/raw/main/preprocessor_config.json",
|
37 |
+
"pytorch_model.bin": "https://huggingface.co/TencentGameMate/chinese-hubert-base/resolve/main/pytorch_model.bin"
|
38 |
+
}
|
39 |
+
|
40 |
+
print("Downloading Chinese Hubert model...")
|
41 |
+
for filename, url in files.items():
|
42 |
+
dest_path = os.path.join(model_dir, filename)
|
43 |
+
if not os.path.exists(dest_path):
|
44 |
+
print(f"Downloading {filename}...")
|
45 |
+
download_file(url, dest_path)
|
46 |
+
else:
|
47 |
+
print(f"{filename} already exists, skipping...")
|
48 |
+
|
49 |
+
print("Download complete.")
|
50 |
+
|
51 |
+
if __name__ == "__main__":
|
52 |
+
download_hubert()
|
GPT_SoVITS/export_torch_script.py
ADDED
@@ -0,0 +1,832 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/models/t2s_model.py
|
2 |
+
# reference: https://github.com/lifeiteng/vall-e
|
3 |
+
import argparse
|
4 |
+
from typing import Optional
|
5 |
+
from my_utils import load_audio
|
6 |
+
from text import cleaned_text_to_sequence
|
7 |
+
import torch
|
8 |
+
import torchaudio
|
9 |
+
|
10 |
+
from torch import IntTensor, LongTensor, Tensor, nn
|
11 |
+
from torch.nn import functional as F
|
12 |
+
|
13 |
+
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
14 |
+
from feature_extractor import cnhubert
|
15 |
+
|
16 |
+
from AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
17 |
+
from module.models_onnx import SynthesizerTrn
|
18 |
+
|
19 |
+
from inference_webui import get_phones_and_bert
|
20 |
+
|
21 |
+
import os
|
22 |
+
import soundfile
|
23 |
+
|
24 |
+
default_config = {
|
25 |
+
"embedding_dim": 512,
|
26 |
+
"hidden_dim": 512,
|
27 |
+
"num_head": 8,
|
28 |
+
"num_layers": 12,
|
29 |
+
"num_codebook": 8,
|
30 |
+
"p_dropout": 0.0,
|
31 |
+
"vocab_size": 1024 + 1,
|
32 |
+
"phoneme_vocab_size": 512,
|
33 |
+
"EOS": 1024,
|
34 |
+
}
|
35 |
+
|
36 |
+
def get_raw_t2s_model(dict_s1) -> Text2SemanticLightningModule:
|
37 |
+
config = dict_s1["config"]
|
38 |
+
config["model"]["dropout"] = float(config["model"]["dropout"])
|
39 |
+
t2s_model = Text2SemanticLightningModule(config, "****", is_train=False)
|
40 |
+
t2s_model.load_state_dict(dict_s1["weight"])
|
41 |
+
t2s_model = t2s_model.eval()
|
42 |
+
return t2s_model
|
43 |
+
|
44 |
+
@torch.jit.script
|
45 |
+
def logits_to_probs(
|
46 |
+
logits,
|
47 |
+
previous_tokens: Optional[torch.Tensor] = None,
|
48 |
+
temperature: float = 1.0,
|
49 |
+
top_k: Optional[int] = None,
|
50 |
+
top_p: Optional[int] = None,
|
51 |
+
repetition_penalty: float = 1.0,
|
52 |
+
):
|
53 |
+
# if previous_tokens is not None:
|
54 |
+
# previous_tokens = previous_tokens.squeeze()
|
55 |
+
# print(logits.shape,previous_tokens.shape)
|
56 |
+
# pdb.set_trace()
|
57 |
+
if previous_tokens is not None and repetition_penalty != 1.0:
|
58 |
+
previous_tokens = previous_tokens.long()
|
59 |
+
score = torch.gather(logits, dim=1, index=previous_tokens)
|
60 |
+
score = torch.where(
|
61 |
+
score < 0, score * repetition_penalty, score / repetition_penalty
|
62 |
+
)
|
63 |
+
logits.scatter_(dim=1, index=previous_tokens, src=score)
|
64 |
+
|
65 |
+
if top_p is not None and top_p < 1.0:
|
66 |
+
sorted_logits, sorted_indices = torch.sort(logits, descending=True)
|
67 |
+
cum_probs = torch.cumsum(
|
68 |
+
torch.nn.functional.softmax(sorted_logits, dim=-1), dim=-1
|
69 |
+
)
|
70 |
+
sorted_indices_to_remove = cum_probs > top_p
|
71 |
+
sorted_indices_to_remove[:, 0] = False # keep at least one option
|
72 |
+
indices_to_remove = sorted_indices_to_remove.scatter(
|
73 |
+
dim=1, index=sorted_indices, src=sorted_indices_to_remove
|
74 |
+
)
|
75 |
+
logits = logits.masked_fill(indices_to_remove, -float("Inf"))
|
76 |
+
|
77 |
+
logits = logits / max(temperature, 1e-5)
|
78 |
+
|
79 |
+
if top_k is not None:
|
80 |
+
v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
|
81 |
+
pivot = v[: , -1].unsqueeze(-1)
|
82 |
+
logits = torch.where(logits < pivot, -float("Inf"), logits)
|
83 |
+
|
84 |
+
probs = torch.nn.functional.softmax(logits, dim=-1)
|
85 |
+
return probs
|
86 |
+
|
87 |
+
@torch.jit.script
|
88 |
+
def multinomial_sample_one_no_sync(probs_sort):
|
89 |
+
# Does multinomial sampling without a cuda synchronization
|
90 |
+
q = torch.randn_like(probs_sort)
|
91 |
+
return torch.argmax(probs_sort / q, dim=-1, keepdim=True).to(dtype=torch.int)
|
92 |
+
|
93 |
+
@torch.jit.script
|
94 |
+
def sample(
|
95 |
+
logits,
|
96 |
+
previous_tokens,
|
97 |
+
temperature: float = 1.0,
|
98 |
+
top_k: Optional[int] = None,
|
99 |
+
top_p: Optional[int] = None,
|
100 |
+
repetition_penalty: float = 1.0,
|
101 |
+
):
|
102 |
+
probs = logits_to_probs(
|
103 |
+
logits=logits, previous_tokens=previous_tokens, temperature=temperature, top_k=top_k, top_p=top_p, repetition_penalty=repetition_penalty
|
104 |
+
)
|
105 |
+
idx_next = multinomial_sample_one_no_sync(probs)
|
106 |
+
return idx_next, probs
|
107 |
+
|
108 |
+
|
109 |
+
@torch.jit.script
|
110 |
+
def spectrogram_torch(y:Tensor, n_fft:int, sampling_rate:int, hop_size:int, win_size:int, center:bool=False):
|
111 |
+
hann_window = torch.hann_window(win_size,device=y.device,dtype=y.dtype)
|
112 |
+
y = torch.nn.functional.pad(
|
113 |
+
y.unsqueeze(1),
|
114 |
+
(int((n_fft - hop_size) / 2), int((n_fft - hop_size) / 2)),
|
115 |
+
mode="reflect",
|
116 |
+
)
|
117 |
+
y = y.squeeze(1)
|
118 |
+
spec = torch.stft(
|
119 |
+
y,
|
120 |
+
n_fft,
|
121 |
+
hop_length=hop_size,
|
122 |
+
win_length=win_size,
|
123 |
+
window=hann_window,
|
124 |
+
center=center,
|
125 |
+
pad_mode="reflect",
|
126 |
+
normalized=False,
|
127 |
+
onesided=True,
|
128 |
+
return_complex=False,
|
129 |
+
)
|
130 |
+
spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6)
|
131 |
+
return spec
|
132 |
+
|
133 |
+
|
134 |
+
class DictToAttrRecursive(dict):
|
135 |
+
def __init__(self, input_dict):
|
136 |
+
super().__init__(input_dict)
|
137 |
+
for key, value in input_dict.items():
|
138 |
+
if isinstance(value, dict):
|
139 |
+
value = DictToAttrRecursive(value)
|
140 |
+
self[key] = value
|
141 |
+
setattr(self, key, value)
|
142 |
+
|
143 |
+
def __getattr__(self, item):
|
144 |
+
try:
|
145 |
+
return self[item]
|
146 |
+
except KeyError:
|
147 |
+
raise AttributeError(f"Attribute {item} not found")
|
148 |
+
|
149 |
+
def __setattr__(self, key, value):
|
150 |
+
if isinstance(value, dict):
|
151 |
+
value = DictToAttrRecursive(value)
|
152 |
+
super(DictToAttrRecursive, self).__setitem__(key, value)
|
153 |
+
super().__setattr__(key, value)
|
154 |
+
|
155 |
+
def __delattr__(self, item):
|
156 |
+
try:
|
157 |
+
del self[item]
|
158 |
+
except KeyError:
|
159 |
+
raise AttributeError(f"Attribute {item} not found")
|
160 |
+
|
161 |
+
@torch.jit.script
|
162 |
+
class T2SMLP:
|
163 |
+
def __init__(self, w1, b1, w2, b2):
|
164 |
+
self.w1 = w1
|
165 |
+
self.b1 = b1
|
166 |
+
self.w2 = w2
|
167 |
+
self.b2 = b2
|
168 |
+
|
169 |
+
def forward(self, x):
|
170 |
+
x = F.relu(F.linear(x, self.w1, self.b1))
|
171 |
+
x = F.linear(x, self.w2, self.b2)
|
172 |
+
return x
|
173 |
+
|
174 |
+
@torch.jit.script
|
175 |
+
class T2SBlock:
|
176 |
+
def __init__(
|
177 |
+
self,
|
178 |
+
num_heads: int,
|
179 |
+
hidden_dim: int,
|
180 |
+
mlp: T2SMLP,
|
181 |
+
qkv_w,
|
182 |
+
qkv_b,
|
183 |
+
out_w,
|
184 |
+
out_b,
|
185 |
+
norm_w1,
|
186 |
+
norm_b1,
|
187 |
+
norm_eps1: float,
|
188 |
+
norm_w2,
|
189 |
+
norm_b2,
|
190 |
+
norm_eps2: float,
|
191 |
+
):
|
192 |
+
self.num_heads = num_heads
|
193 |
+
self.mlp = mlp
|
194 |
+
self.hidden_dim: int = hidden_dim
|
195 |
+
self.qkv_w = qkv_w
|
196 |
+
self.qkv_b = qkv_b
|
197 |
+
self.out_w = out_w
|
198 |
+
self.out_b = out_b
|
199 |
+
self.norm_w1 = norm_w1
|
200 |
+
self.norm_b1 = norm_b1
|
201 |
+
self.norm_eps1 = norm_eps1
|
202 |
+
self.norm_w2 = norm_w2
|
203 |
+
self.norm_b2 = norm_b2
|
204 |
+
self.norm_eps2 = norm_eps2
|
205 |
+
|
206 |
+
self.false = torch.tensor(False, dtype=torch.bool)
|
207 |
+
|
208 |
+
@torch.jit.ignore
|
209 |
+
def to_mask(self, x:torch.Tensor, padding_mask:Optional[torch.Tensor]):
|
210 |
+
if padding_mask is None:
|
211 |
+
return x
|
212 |
+
|
213 |
+
if padding_mask.dtype == torch.bool:
|
214 |
+
return x.masked_fill(padding_mask, 0)
|
215 |
+
else:
|
216 |
+
return x * padding_mask
|
217 |
+
|
218 |
+
def process_prompt(self, x:torch.Tensor, attn_mask : torch.Tensor, padding_mask:Optional[torch.Tensor]=None):
|
219 |
+
q, k, v = F.linear(self.to_mask(x, padding_mask), self.qkv_w, self.qkv_b).chunk(3, dim=-1)
|
220 |
+
|
221 |
+
batch_size = q.shape[0]
|
222 |
+
q_len = q.shape[1]
|
223 |
+
kv_len = k.shape[1]
|
224 |
+
|
225 |
+
q = self.to_mask(q, padding_mask)
|
226 |
+
k_cache = self.to_mask(k, padding_mask)
|
227 |
+
v_cache = self.to_mask(v, padding_mask)
|
228 |
+
|
229 |
+
q = q.view(batch_size, q_len, self.num_heads, -1).transpose(1, 2)
|
230 |
+
k = k_cache.view(batch_size, kv_len, self.num_heads, -1).transpose(1, 2)
|
231 |
+
v = v_cache.view(batch_size, kv_len, self.num_heads, -1).transpose(1, 2)
|
232 |
+
|
233 |
+
attn = F.scaled_dot_product_attention(q, k, v, ~attn_mask)
|
234 |
+
|
235 |
+
attn = attn.permute(2, 0, 1, 3).reshape(batch_size*q_len, self.hidden_dim)
|
236 |
+
attn = attn.view(q_len, batch_size, self.hidden_dim).transpose(1, 0)
|
237 |
+
attn = F.linear(self.to_mask(attn, padding_mask), self.out_w, self.out_b)
|
238 |
+
|
239 |
+
if padding_mask is not None:
|
240 |
+
for i in range(batch_size):
|
241 |
+
# mask = padding_mask[i,:,0]
|
242 |
+
if self.false.device!= padding_mask.device:
|
243 |
+
self.false = self.false.to(padding_mask.device)
|
244 |
+
idx = torch.where(padding_mask[i,:,0]==self.false)[0]
|
245 |
+
x_item = x[i,idx,:].unsqueeze(0)
|
246 |
+
attn_item = attn[i,idx,:].unsqueeze(0)
|
247 |
+
x_item = x_item + attn_item
|
248 |
+
x_item = F.layer_norm(
|
249 |
+
x_item, [self.hidden_dim], self.norm_w1, self.norm_b1, self.norm_eps1
|
250 |
+
)
|
251 |
+
x_item = x_item + self.mlp.forward(x_item)
|
252 |
+
x_item = F.layer_norm(
|
253 |
+
x_item,
|
254 |
+
[self.hidden_dim],
|
255 |
+
self.norm_w2,
|
256 |
+
self.norm_b2,
|
257 |
+
self.norm_eps2,
|
258 |
+
)
|
259 |
+
x[i,idx,:] = x_item.squeeze(0)
|
260 |
+
x = self.to_mask(x, padding_mask)
|
261 |
+
else:
|
262 |
+
x = x + attn
|
263 |
+
x = F.layer_norm(
|
264 |
+
x, [self.hidden_dim], self.norm_w1, self.norm_b1, self.norm_eps1
|
265 |
+
)
|
266 |
+
x = x + self.mlp.forward(x)
|
267 |
+
x = F.layer_norm(
|
268 |
+
x,
|
269 |
+
[self.hidden_dim],
|
270 |
+
self.norm_w2,
|
271 |
+
self.norm_b2,
|
272 |
+
self.norm_eps2,
|
273 |
+
)
|
274 |
+
return x, k_cache, v_cache
|
275 |
+
|
276 |
+
def decode_next_token(self, x:torch.Tensor, k_cache:torch.Tensor, v_cache:torch.Tensor):
|
277 |
+
q, k, v = F.linear(x, self.qkv_w, self.qkv_b).chunk(3, dim=-1)
|
278 |
+
|
279 |
+
k_cache = torch.cat([k_cache, k], dim=1)
|
280 |
+
v_cache = torch.cat([v_cache, v], dim=1)
|
281 |
+
|
282 |
+
batch_size = q.shape[0]
|
283 |
+
q_len = q.shape[1]
|
284 |
+
kv_len = k_cache.shape[1]
|
285 |
+
|
286 |
+
q = q.view(batch_size, q_len, self.num_heads, -1).transpose(1, 2)
|
287 |
+
k = k_cache.view(batch_size, kv_len, self.num_heads, -1).transpose(1, 2)
|
288 |
+
v = v_cache.view(batch_size, kv_len, self.num_heads, -1).transpose(1, 2)
|
289 |
+
|
290 |
+
attn = F.scaled_dot_product_attention(q, k, v)
|
291 |
+
|
292 |
+
attn = attn.permute(2, 0, 1, 3).reshape(batch_size*q_len, self.hidden_dim)
|
293 |
+
attn = attn.view(q_len, batch_size, self.hidden_dim).transpose(1, 0)
|
294 |
+
attn = F.linear(attn, self.out_w, self.out_b)
|
295 |
+
|
296 |
+
x = x + attn
|
297 |
+
x = F.layer_norm(
|
298 |
+
x, [self.hidden_dim], self.norm_w1, self.norm_b1, self.norm_eps1
|
299 |
+
)
|
300 |
+
x = x + self.mlp.forward(x)
|
301 |
+
x = F.layer_norm(
|
302 |
+
x,
|
303 |
+
[self.hidden_dim],
|
304 |
+
self.norm_w2,
|
305 |
+
self.norm_b2,
|
306 |
+
self.norm_eps2,
|
307 |
+
)
|
308 |
+
return x, k_cache, v_cache
|
309 |
+
|
310 |
+
@torch.jit.script
|
311 |
+
class T2STransformer:
|
312 |
+
def __init__(self, num_blocks : int, blocks: list[T2SBlock]):
|
313 |
+
self.num_blocks : int = num_blocks
|
314 |
+
self.blocks = blocks
|
315 |
+
|
316 |
+
def process_prompt(
|
317 |
+
self, x:torch.Tensor, attn_mask : torch.Tensor,padding_mask : Optional[torch.Tensor]=None):
|
318 |
+
k_cache : list[torch.Tensor] = []
|
319 |
+
v_cache : list[torch.Tensor] = []
|
320 |
+
for i in range(self.num_blocks):
|
321 |
+
x, k_cache_, v_cache_ = self.blocks[i].process_prompt(x, attn_mask, padding_mask)
|
322 |
+
k_cache.append(k_cache_)
|
323 |
+
v_cache.append(v_cache_)
|
324 |
+
return x, k_cache, v_cache
|
325 |
+
|
326 |
+
def decode_next_token(
|
327 |
+
self, x:torch.Tensor,
|
328 |
+
k_cache: list[torch.Tensor],
|
329 |
+
v_cache: list[torch.Tensor]):
|
330 |
+
for i in range(self.num_blocks):
|
331 |
+
x, k_cache[i], v_cache[i] = self.blocks[i].decode_next_token(x, k_cache[i], v_cache[i])
|
332 |
+
return x, k_cache, v_cache
|
333 |
+
|
334 |
+
class VitsModel(nn.Module):
|
335 |
+
def __init__(self, vits_path):
|
336 |
+
super().__init__()
|
337 |
+
# dict_s2 = torch.load(vits_path,map_location="cpu")
|
338 |
+
dict_s2 = torch.load(vits_path)
|
339 |
+
self.hps = dict_s2["config"]
|
340 |
+
if dict_s2['weight']['enc_p.text_embedding.weight'].shape[0] == 322:
|
341 |
+
self.hps["model"]["version"] = "v1"
|
342 |
+
else:
|
343 |
+
self.hps["model"]["version"] = "v2"
|
344 |
+
|
345 |
+
self.hps = DictToAttrRecursive(self.hps)
|
346 |
+
self.hps.model.semantic_frame_rate = "25hz"
|
347 |
+
self.vq_model = SynthesizerTrn(
|
348 |
+
self.hps.data.filter_length // 2 + 1,
|
349 |
+
self.hps.train.segment_size // self.hps.data.hop_length,
|
350 |
+
n_speakers=self.hps.data.n_speakers,
|
351 |
+
**self.hps.model
|
352 |
+
)
|
353 |
+
self.vq_model.eval()
|
354 |
+
self.vq_model.load_state_dict(dict_s2["weight"], strict=False)
|
355 |
+
|
356 |
+
def forward(self, text_seq, pred_semantic, ref_audio, speed=1.0):
|
357 |
+
refer = spectrogram_torch(
|
358 |
+
ref_audio,
|
359 |
+
self.hps.data.filter_length,
|
360 |
+
self.hps.data.sampling_rate,
|
361 |
+
self.hps.data.hop_length,
|
362 |
+
self.hps.data.win_length,
|
363 |
+
center=False
|
364 |
+
)
|
365 |
+
return self.vq_model(pred_semantic, text_seq, refer, speed)[0, 0]
|
366 |
+
|
367 |
+
class T2SModel(nn.Module):
|
368 |
+
def __init__(self,raw_t2s:Text2SemanticLightningModule):
|
369 |
+
super(T2SModel, self).__init__()
|
370 |
+
self.model_dim = raw_t2s.model.model_dim
|
371 |
+
self.embedding_dim = raw_t2s.model.embedding_dim
|
372 |
+
self.num_head = raw_t2s.model.num_head
|
373 |
+
self.num_layers = raw_t2s.model.num_layers
|
374 |
+
self.vocab_size = raw_t2s.model.vocab_size
|
375 |
+
self.phoneme_vocab_size = raw_t2s.model.phoneme_vocab_size
|
376 |
+
# self.p_dropout = float(raw_t2s.model.p_dropout)
|
377 |
+
self.EOS:int = int(raw_t2s.model.EOS)
|
378 |
+
self.norm_first = raw_t2s.model.norm_first
|
379 |
+
assert self.EOS == self.vocab_size - 1
|
380 |
+
self.hz = 50
|
381 |
+
|
382 |
+
self.bert_proj = raw_t2s.model.bert_proj
|
383 |
+
self.ar_text_embedding = raw_t2s.model.ar_text_embedding
|
384 |
+
self.ar_text_position = raw_t2s.model.ar_text_position
|
385 |
+
self.ar_audio_embedding = raw_t2s.model.ar_audio_embedding
|
386 |
+
self.ar_audio_position = raw_t2s.model.ar_audio_position
|
387 |
+
|
388 |
+
# self.t2s_transformer = T2STransformer(self.num_layers, blocks)
|
389 |
+
# self.t2s_transformer = raw_t2s.model.t2s_transformer
|
390 |
+
|
391 |
+
blocks = []
|
392 |
+
h = raw_t2s.model.h
|
393 |
+
|
394 |
+
for i in range(self.num_layers):
|
395 |
+
layer = h.layers[i]
|
396 |
+
t2smlp = T2SMLP(
|
397 |
+
layer.linear1.weight,
|
398 |
+
layer.linear1.bias,
|
399 |
+
layer.linear2.weight,
|
400 |
+
layer.linear2.bias
|
401 |
+
)
|
402 |
+
|
403 |
+
block = T2SBlock(
|
404 |
+
self.num_head,
|
405 |
+
self.model_dim,
|
406 |
+
t2smlp,
|
407 |
+
layer.self_attn.in_proj_weight,
|
408 |
+
layer.self_attn.in_proj_bias,
|
409 |
+
layer.self_attn.out_proj.weight,
|
410 |
+
layer.self_attn.out_proj.bias,
|
411 |
+
layer.norm1.weight,
|
412 |
+
layer.norm1.bias,
|
413 |
+
layer.norm1.eps,
|
414 |
+
layer.norm2.weight,
|
415 |
+
layer.norm2.bias,
|
416 |
+
layer.norm2.eps
|
417 |
+
)
|
418 |
+
|
419 |
+
blocks.append(block)
|
420 |
+
|
421 |
+
self.t2s_transformer = T2STransformer(self.num_layers, blocks)
|
422 |
+
|
423 |
+
# self.ar_predict_layer = nn.Linear(self.model_dim, self.vocab_size, bias=False)
|
424 |
+
self.ar_predict_layer = raw_t2s.model.ar_predict_layer
|
425 |
+
# self.loss_fct = nn.CrossEntropyLoss(reduction="sum")
|
426 |
+
self.max_sec = raw_t2s.config["data"]["max_sec"]
|
427 |
+
self.top_k = int(raw_t2s.config["inference"]["top_k"])
|
428 |
+
self.early_stop_num = torch.LongTensor([self.hz * self.max_sec])
|
429 |
+
|
430 |
+
def forward(self,prompts:LongTensor, ref_seq:LongTensor, text_seq:LongTensor, ref_bert:torch.Tensor, text_bert:torch.Tensor):
|
431 |
+
bert = torch.cat([ref_bert.T, text_bert.T], 1)
|
432 |
+
all_phoneme_ids = torch.cat([ref_seq, text_seq], 1)
|
433 |
+
bert = bert.unsqueeze(0)
|
434 |
+
|
435 |
+
x = self.ar_text_embedding(all_phoneme_ids)
|
436 |
+
x = x + self.bert_proj(bert.transpose(1, 2))
|
437 |
+
x:torch.Tensor = self.ar_text_position(x)
|
438 |
+
|
439 |
+
early_stop_num = self.early_stop_num
|
440 |
+
|
441 |
+
|
442 |
+
#[1,N,512] [1,N]
|
443 |
+
# y, k, v, y_emb, x_example = self.first_stage_decoder(x, prompts)
|
444 |
+
y = prompts
|
445 |
+
# x_example = x[:,:,0] * 0.0
|
446 |
+
|
447 |
+
x_len = x.shape[1]
|
448 |
+
x_attn_mask = torch.zeros((x_len, x_len), dtype=torch.bool)
|
449 |
+
|
450 |
+
y_emb = self.ar_audio_embedding(y)
|
451 |
+
y_len = y_emb.shape[1]
|
452 |
+
prefix_len = y.shape[1]
|
453 |
+
y_pos = self.ar_audio_position(y_emb)
|
454 |
+
xy_pos = torch.concat([x, y_pos], dim=1)
|
455 |
+
|
456 |
+
bsz = x.shape[0]
|
457 |
+
src_len = x_len + y_len
|
458 |
+
x_attn_mask_pad = F.pad(
|
459 |
+
x_attn_mask,
|
460 |
+
(0, y_len), ###xx的纯0扩展到xx纯0+xy纯1,(x,x+y)
|
461 |
+
value=True,
|
462 |
+
)
|
463 |
+
y_attn_mask = F.pad( ###yy的右上1扩展到左边xy的0,(y,x+y)
|
464 |
+
torch.triu(torch.ones(y_len, y_len, dtype=torch.bool), diagonal=1),
|
465 |
+
(x_len, 0),
|
466 |
+
value=False,
|
467 |
+
)
|
468 |
+
xy_attn_mask = torch.concat([x_attn_mask_pad, y_attn_mask], dim=0)\
|
469 |
+
.unsqueeze(0)\
|
470 |
+
.expand(bsz*self.num_head, -1, -1)\
|
471 |
+
.view(bsz, self.num_head, src_len, src_len)\
|
472 |
+
.to(device=x.device, dtype=torch.bool)
|
473 |
+
|
474 |
+
idx = 0
|
475 |
+
|
476 |
+
xy_dec, k_cache, v_cache = self.t2s_transformer.process_prompt(xy_pos, xy_attn_mask, None)
|
477 |
+
|
478 |
+
logits = self.ar_predict_layer(xy_dec[:, -1])
|
479 |
+
logits = logits[:, :-1]
|
480 |
+
samples = sample(logits, y, top_k=self.top_k, top_p=1, repetition_penalty=1.35, temperature=1.0)[0]
|
481 |
+
y = torch.concat([y, samples], dim=1)
|
482 |
+
y_emb = self.ar_audio_embedding(y[:, -1:])
|
483 |
+
xy_pos = y_emb * self.ar_audio_position.x_scale + self.ar_audio_position.alpha * self.ar_audio_position.pe[:, y_len + idx].to(dtype=y_emb.dtype,device=y_emb.device)
|
484 |
+
|
485 |
+
stop = False
|
486 |
+
# for idx in range(1, 50):
|
487 |
+
for idx in range(1, 1500):
|
488 |
+
#[1, N] [N_layer, N, 1, 512] [N_layer, N, 1, 512] [1, N, 512] [1] [1, N, 512] [1, N]
|
489 |
+
# y, k, v, y_emb, logits, samples = self.stage_decoder(y, k, v, y_emb, x_example)
|
490 |
+
xy_dec, k_cache, v_cache = self.t2s_transformer.decode_next_token(xy_pos, k_cache, v_cache)
|
491 |
+
logits = self.ar_predict_layer(xy_dec[:, -1])
|
492 |
+
|
493 |
+
if(idx<11):###至少预测出10个token不然不给停止(0.4s)
|
494 |
+
logits = logits[:, :-1]
|
495 |
+
|
496 |
+
samples = sample(logits, y, top_k=self.top_k, top_p=1, repetition_penalty=1.35, temperature=1.0)[0]
|
497 |
+
|
498 |
+
y = torch.concat([y, samples], dim=1)
|
499 |
+
|
500 |
+
if early_stop_num != -1 and (y.shape[1] - prefix_len) > early_stop_num:
|
501 |
+
stop = True
|
502 |
+
if torch.argmax(logits, dim=-1)[0] == self.EOS or samples[0, 0] == self.EOS:
|
503 |
+
stop = True
|
504 |
+
if stop:
|
505 |
+
if y.shape[1] == 0:
|
506 |
+
y = torch.concat([y, torch.zeros_like(samples)], dim=1)
|
507 |
+
break
|
508 |
+
|
509 |
+
y_emb = self.ar_audio_embedding(y[:, -1:])
|
510 |
+
xy_pos = y_emb * self.ar_audio_position.x_scale + self.ar_audio_position.alpha * self.ar_audio_position.pe[:, y_len + idx].to(dtype=y_emb.dtype,device=y_emb.device)
|
511 |
+
|
512 |
+
y[0,-1] = 0
|
513 |
+
|
514 |
+
return y[:, -idx:].unsqueeze(0)
|
515 |
+
|
516 |
+
bert_path = os.environ.get(
|
517 |
+
"bert_path", "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large"
|
518 |
+
)
|
519 |
+
cnhubert_base_path = "GPT_SoVITS/pretrained_models/chinese-hubert-base"
|
520 |
+
cnhubert.cnhubert_base_path = cnhubert_base_path
|
521 |
+
|
522 |
+
@torch.jit.script
|
523 |
+
def build_phone_level_feature(res:Tensor, word2ph:IntTensor):
|
524 |
+
phone_level_feature = []
|
525 |
+
for i in range(word2ph.shape[0]):
|
526 |
+
repeat_feature = res[i].repeat(word2ph[i].item(), 1)
|
527 |
+
phone_level_feature.append(repeat_feature)
|
528 |
+
phone_level_feature = torch.cat(phone_level_feature, dim=0)
|
529 |
+
# [sum(word2ph), 1024]
|
530 |
+
return phone_level_feature
|
531 |
+
|
532 |
+
class MyBertModel(torch.nn.Module):
|
533 |
+
def __init__(self, bert_model):
|
534 |
+
super(MyBertModel, self).__init__()
|
535 |
+
self.bert = bert_model
|
536 |
+
|
537 |
+
def forward(self, input_ids:torch.Tensor, attention_mask:torch.Tensor, token_type_ids:torch.Tensor, word2ph:IntTensor):
|
538 |
+
outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
|
539 |
+
# res = torch.cat(outputs["hidden_states"][-3:-2], -1)[0][1:-1]
|
540 |
+
res = torch.cat(outputs[1][-3:-2], -1)[0][1:-1]
|
541 |
+
return build_phone_level_feature(res, word2ph)
|
542 |
+
|
543 |
+
class SSLModel(torch.nn.Module):
|
544 |
+
def __init__(self):
|
545 |
+
super().__init__()
|
546 |
+
self.ssl = cnhubert.get_model().model
|
547 |
+
|
548 |
+
def forward(self, ref_audio_16k)-> torch.Tensor:
|
549 |
+
ssl_content = self.ssl(ref_audio_16k)["last_hidden_state"].transpose(1, 2)
|
550 |
+
return ssl_content
|
551 |
+
|
552 |
+
class ExportSSLModel(torch.nn.Module):
|
553 |
+
def __init__(self,ssl:SSLModel):
|
554 |
+
super().__init__()
|
555 |
+
self.ssl = ssl
|
556 |
+
|
557 |
+
def forward(self, ref_audio:torch.Tensor):
|
558 |
+
return self.ssl(ref_audio)
|
559 |
+
|
560 |
+
@torch.jit.export
|
561 |
+
def resample(self,ref_audio:torch.Tensor,src_sr:int,dst_sr:int)->torch.Tensor:
|
562 |
+
audio = resamplex(ref_audio,src_sr,dst_sr).float()
|
563 |
+
return audio
|
564 |
+
|
565 |
+
def export_bert(output_path):
|
566 |
+
tokenizer = AutoTokenizer.from_pretrained(bert_path)
|
567 |
+
|
568 |
+
text = "叹息声一声接着一声传出,木兰对着房门织布.听不见织布机织布的声音,只听见木兰在叹息.问木兰在想什么?问木兰在惦记什么?木兰答道,我也没有在想什么,也没有在惦记什么."
|
569 |
+
ref_bert_inputs = tokenizer(text, return_tensors="pt")
|
570 |
+
word2ph = []
|
571 |
+
for c in text:
|
572 |
+
if c in [',','。',':','?',",",".","?"]:
|
573 |
+
word2ph.append(1)
|
574 |
+
else:
|
575 |
+
word2ph.append(2)
|
576 |
+
ref_bert_inputs['word2ph'] = torch.Tensor(word2ph).int()
|
577 |
+
|
578 |
+
bert_model = AutoModelForMaskedLM.from_pretrained(bert_path,output_hidden_states=True,torchscript=True)
|
579 |
+
my_bert_model = MyBertModel(bert_model)
|
580 |
+
|
581 |
+
ref_bert_inputs = {
|
582 |
+
'input_ids': ref_bert_inputs['input_ids'],
|
583 |
+
'attention_mask': ref_bert_inputs['attention_mask'],
|
584 |
+
'token_type_ids': ref_bert_inputs['token_type_ids'],
|
585 |
+
'word2ph': ref_bert_inputs['word2ph']
|
586 |
+
}
|
587 |
+
|
588 |
+
torch._dynamo.mark_dynamic(ref_bert_inputs['input_ids'], 1)
|
589 |
+
torch._dynamo.mark_dynamic(ref_bert_inputs['attention_mask'], 1)
|
590 |
+
torch._dynamo.mark_dynamic(ref_bert_inputs['token_type_ids'], 1)
|
591 |
+
torch._dynamo.mark_dynamic(ref_bert_inputs['word2ph'], 0)
|
592 |
+
|
593 |
+
my_bert_model = torch.jit.trace(my_bert_model,example_kwarg_inputs=ref_bert_inputs)
|
594 |
+
output_path = os.path.join(output_path, "bert_model.pt")
|
595 |
+
my_bert_model.save(output_path)
|
596 |
+
print('#### exported bert ####')
|
597 |
+
|
598 |
+
def export(gpt_path, vits_path, ref_audio_path, ref_text, output_path, export_bert_and_ssl=False, device='cpu'):
|
599 |
+
if not os.path.exists(output_path):
|
600 |
+
os.makedirs(output_path)
|
601 |
+
print(f"目录已创建: {output_path}")
|
602 |
+
else:
|
603 |
+
print(f"目录已存在: {output_path}")
|
604 |
+
|
605 |
+
ref_audio = torch.tensor([load_audio(ref_audio_path, 16000)]).float()
|
606 |
+
ssl = SSLModel()
|
607 |
+
if export_bert_and_ssl:
|
608 |
+
s = ExportSSLModel(torch.jit.trace(ssl,example_inputs=(ref_audio)))
|
609 |
+
ssl_path = os.path.join(output_path, "ssl_model.pt")
|
610 |
+
torch.jit.script(s).save(ssl_path)
|
611 |
+
print('#### exported ssl ####')
|
612 |
+
export_bert(output_path)
|
613 |
+
else:
|
614 |
+
s = ExportSSLModel(ssl)
|
615 |
+
|
616 |
+
print(f"device: {device}")
|
617 |
+
|
618 |
+
|
619 |
+
ref_seq_id,ref_bert_T,ref_norm_text = get_phones_and_bert(ref_text,"all_zh",'v2')
|
620 |
+
ref_seq = torch.LongTensor([ref_seq_id]).to(device)
|
621 |
+
ref_bert = ref_bert_T.T.to(ref_seq.device)
|
622 |
+
text_seq_id,text_bert_T,norm_text = get_phones_and_bert("这是一条测试语音,说什么无所谓,只是给它一个例子","all_zh",'v2')
|
623 |
+
text_seq = torch.LongTensor([text_seq_id]).to(device)
|
624 |
+
text_bert = text_bert_T.T.to(text_seq.device)
|
625 |
+
|
626 |
+
ssl_content = ssl(ref_audio).to(device)
|
627 |
+
|
628 |
+
# vits_path = "SoVITS_weights_v2/xw_e8_s216.pth"
|
629 |
+
vits = VitsModel(vits_path).to(device)
|
630 |
+
vits.eval()
|
631 |
+
|
632 |
+
# gpt_path = "GPT_weights_v2/xw-e15.ckpt"
|
633 |
+
# dict_s1 = torch.load(gpt_path, map_location=device)
|
634 |
+
dict_s1 = torch.load(gpt_path)
|
635 |
+
raw_t2s = get_raw_t2s_model(dict_s1).to(device)
|
636 |
+
print('#### get_raw_t2s_model ####')
|
637 |
+
print(raw_t2s.config)
|
638 |
+
t2s_m = T2SModel(raw_t2s)
|
639 |
+
t2s_m.eval()
|
640 |
+
t2s = torch.jit.script(t2s_m).to(device)
|
641 |
+
print('#### script t2s_m ####')
|
642 |
+
|
643 |
+
print("vits.hps.data.sampling_rate:",vits.hps.data.sampling_rate)
|
644 |
+
gpt_sovits = GPT_SoVITS(t2s,vits).to(device)
|
645 |
+
gpt_sovits.eval()
|
646 |
+
|
647 |
+
ref_audio_sr = s.resample(ref_audio,16000,32000).to(device)
|
648 |
+
|
649 |
+
torch._dynamo.mark_dynamic(ssl_content, 2)
|
650 |
+
torch._dynamo.mark_dynamic(ref_audio_sr, 1)
|
651 |
+
torch._dynamo.mark_dynamic(ref_seq, 1)
|
652 |
+
torch._dynamo.mark_dynamic(text_seq, 1)
|
653 |
+
torch._dynamo.mark_dynamic(ref_bert, 0)
|
654 |
+
torch._dynamo.mark_dynamic(text_bert, 0)
|
655 |
+
|
656 |
+
with torch.no_grad():
|
657 |
+
gpt_sovits_export = torch.jit.trace(
|
658 |
+
gpt_sovits,
|
659 |
+
example_inputs=(
|
660 |
+
ssl_content,
|
661 |
+
ref_audio_sr,
|
662 |
+
ref_seq,
|
663 |
+
text_seq,
|
664 |
+
ref_bert,
|
665 |
+
text_bert))
|
666 |
+
|
667 |
+
gpt_sovits_path = os.path.join(output_path, "gpt_sovits_model.pt")
|
668 |
+
gpt_sovits_export.save(gpt_sovits_path)
|
669 |
+
print('#### exported gpt_sovits ####')
|
670 |
+
|
671 |
+
@torch.jit.script
|
672 |
+
def parse_audio(ref_audio):
|
673 |
+
ref_audio_16k = torchaudio.functional.resample(ref_audio,48000,16000).float()#.to(ref_audio.device)
|
674 |
+
ref_audio_sr = torchaudio.functional.resample(ref_audio,48000,32000).float()#.to(ref_audio.device)
|
675 |
+
return ref_audio_16k,ref_audio_sr
|
676 |
+
|
677 |
+
@torch.jit.script
|
678 |
+
def resamplex(ref_audio:torch.Tensor,src_sr:int,dst_sr:int)->torch.Tensor:
|
679 |
+
return torchaudio.functional.resample(ref_audio,src_sr,dst_sr).float()
|
680 |
+
|
681 |
+
class GPT_SoVITS(nn.Module):
|
682 |
+
def __init__(self, t2s:T2SModel,vits:VitsModel):
|
683 |
+
super().__init__()
|
684 |
+
self.t2s = t2s
|
685 |
+
self.vits = vits
|
686 |
+
|
687 |
+
def forward(self, ssl_content:torch.Tensor, ref_audio_sr:torch.Tensor, ref_seq:Tensor, text_seq:Tensor, ref_bert:Tensor, text_bert:Tensor, speed=1.0):
|
688 |
+
codes = self.vits.vq_model.extract_latent(ssl_content)
|
689 |
+
prompt_semantic = codes[0, 0]
|
690 |
+
prompts = prompt_semantic.unsqueeze(0)
|
691 |
+
|
692 |
+
pred_semantic = self.t2s(prompts, ref_seq, text_seq, ref_bert, text_bert)
|
693 |
+
audio = self.vits(text_seq, pred_semantic, ref_audio_sr, speed)
|
694 |
+
return audio
|
695 |
+
|
696 |
+
def test():
|
697 |
+
parser = argparse.ArgumentParser(description="GPT-SoVITS Command Line Tool")
|
698 |
+
parser.add_argument('--gpt_model', required=True, help="Path to the GPT model file")
|
699 |
+
parser.add_argument('--sovits_model', required=True, help="Path to the SoVITS model file")
|
700 |
+
parser.add_argument('--ref_audio', required=True, help="Path to the reference audio file")
|
701 |
+
parser.add_argument('--ref_text', required=True, help="Path to the reference text file")
|
702 |
+
parser.add_argument('--output_path', required=True, help="Path to the output directory")
|
703 |
+
|
704 |
+
|
705 |
+
args = parser.parse_args()
|
706 |
+
gpt_path = args.gpt_model
|
707 |
+
vits_path = args.sovits_model
|
708 |
+
ref_audio_path = args.ref_audio
|
709 |
+
ref_text = args.ref_text
|
710 |
+
|
711 |
+
tokenizer = AutoTokenizer.from_pretrained(bert_path)
|
712 |
+
# bert_model = AutoModelForMaskedLM.from_pretrained(bert_path,output_hidden_states=True,torchscript=True)
|
713 |
+
# bert = MyBertModel(bert_model)
|
714 |
+
my_bert = torch.jit.load("onnx/bert_model.pt",map_location='cuda')
|
715 |
+
|
716 |
+
# dict_s1 = torch.load(gpt_path, map_location="cuda")
|
717 |
+
# raw_t2s = get_raw_t2s_model(dict_s1)
|
718 |
+
# t2s = T2SModel(raw_t2s)
|
719 |
+
# t2s.eval()
|
720 |
+
# t2s = torch.jit.load("onnx/xw/t2s_model.pt",map_location='cuda')
|
721 |
+
|
722 |
+
# vits_path = "SoVITS_weights_v2/xw_e8_s216.pth"
|
723 |
+
# vits = VitsModel(vits_path)
|
724 |
+
# vits.eval()
|
725 |
+
|
726 |
+
# ssl = ExportSSLModel(SSLModel()).to('cuda')
|
727 |
+
# ssl.eval()
|
728 |
+
ssl = torch.jit.load("onnx/by/ssl_model.pt",map_location='cuda')
|
729 |
+
|
730 |
+
# gpt_sovits = GPT_SoVITS(t2s,vits)
|
731 |
+
gpt_sovits = torch.jit.load("onnx/by/gpt_sovits_model.pt",map_location='cuda')
|
732 |
+
|
733 |
+
ref_seq_id,ref_bert_T,ref_norm_text = get_phones_and_bert(ref_text,"all_zh",'v2')
|
734 |
+
ref_seq = torch.LongTensor([ref_seq_id])
|
735 |
+
ref_bert = ref_bert_T.T.to(ref_seq.device)
|
736 |
+
# text_seq_id,text_bert_T,norm_text = get_phones_and_bert("昨天晚上看见征兵文书,知道君主在大规模征兵,那么多卷征兵文册,每一卷上都有父亲的名字.","all_zh",'v2')
|
737 |
+
text = "昨天晚上看见征兵文书,知道君主在大规模征兵,那么多卷征兵文册,每一卷上都有父亲的名字."
|
738 |
+
|
739 |
+
text_seq_id,text_bert_T,norm_text = get_phones_and_bert(text,"all_zh",'v2')
|
740 |
+
|
741 |
+
test_bert = tokenizer(text, return_tensors="pt")
|
742 |
+
word2ph = []
|
743 |
+
for c in text:
|
744 |
+
if c in [',','。',':','?',"?",",","."]:
|
745 |
+
word2ph.append(1)
|
746 |
+
else:
|
747 |
+
word2ph.append(2)
|
748 |
+
test_bert['word2ph'] = torch.Tensor(word2ph).int()
|
749 |
+
|
750 |
+
test_bert = my_bert(
|
751 |
+
test_bert['input_ids'].to('cuda'),
|
752 |
+
test_bert['attention_mask'].to('cuda'),
|
753 |
+
test_bert['token_type_ids'].to('cuda'),
|
754 |
+
test_bert['word2ph'].to('cuda')
|
755 |
+
)
|
756 |
+
|
757 |
+
text_seq = torch.LongTensor([text_seq_id])
|
758 |
+
text_bert = text_bert_T.T.to(text_seq.device)
|
759 |
+
|
760 |
+
print('text_bert:',text_bert.shape,text_bert)
|
761 |
+
print('test_bert:',test_bert.shape,test_bert)
|
762 |
+
print(torch.allclose(text_bert.to('cuda'),test_bert))
|
763 |
+
|
764 |
+
print('text_seq:',text_seq.shape)
|
765 |
+
print('text_bert:',text_bert.shape,text_bert.type())
|
766 |
+
|
767 |
+
#[1,N]
|
768 |
+
ref_audio = torch.tensor([load_audio(ref_audio_path, 16000)]).float().to('cuda')
|
769 |
+
print('ref_audio:',ref_audio.shape)
|
770 |
+
|
771 |
+
ref_audio_sr = ssl.resample(ref_audio,16000,32000)
|
772 |
+
print('start ssl')
|
773 |
+
ssl_content = ssl(ref_audio)
|
774 |
+
|
775 |
+
print('start gpt_sovits:')
|
776 |
+
print('ssl_content:',ssl_content.shape)
|
777 |
+
print('ref_audio_sr:',ref_audio_sr.shape)
|
778 |
+
print('ref_seq:',ref_seq.shape)
|
779 |
+
ref_seq=ref_seq.to('cuda')
|
780 |
+
print('text_seq:',text_seq.shape)
|
781 |
+
text_seq=text_seq.to('cuda')
|
782 |
+
print('ref_bert:',ref_bert.shape)
|
783 |
+
ref_bert=ref_bert.to('cuda')
|
784 |
+
print('text_bert:',text_bert.shape)
|
785 |
+
text_bert=text_bert.to('cuda')
|
786 |
+
|
787 |
+
with torch.no_grad():
|
788 |
+
audio = gpt_sovits(ssl_content, ref_audio_sr, ref_seq, text_seq, ref_bert, test_bert)
|
789 |
+
print('start write wav')
|
790 |
+
soundfile.write("out.wav", audio.detach().cpu().numpy(), 32000)
|
791 |
+
|
792 |
+
|
793 |
+
import text
|
794 |
+
import json
|
795 |
+
|
796 |
+
def export_symbel(version='v2'):
|
797 |
+
if version=='v1':
|
798 |
+
symbols = text._symbol_to_id_v1
|
799 |
+
with open(f"onnx/symbols_v1.json", "w") as file:
|
800 |
+
json.dump(symbols, file, indent=4)
|
801 |
+
else:
|
802 |
+
symbols = text._symbol_to_id_v2
|
803 |
+
with open(f"onnx/symbols_v2.json", "w") as file:
|
804 |
+
json.dump(symbols, file, indent=4)
|
805 |
+
|
806 |
+
def main():
|
807 |
+
parser = argparse.ArgumentParser(description="GPT-SoVITS Command Line Tool")
|
808 |
+
parser.add_argument('--gpt_model', required=True, help="Path to the GPT model file")
|
809 |
+
parser.add_argument('--sovits_model', required=True, help="Path to the SoVITS model file")
|
810 |
+
parser.add_argument('--ref_audio', required=True, help="Path to the reference audio file")
|
811 |
+
parser.add_argument('--ref_text', required=True, help="Path to the reference text file")
|
812 |
+
parser.add_argument('--output_path', required=True, help="Path to the output directory")
|
813 |
+
parser.add_argument('--export_common_model', action='store_true', help="Export Bert and SSL model")
|
814 |
+
parser.add_argument('--device', help="Device to use")
|
815 |
+
|
816 |
+
args = parser.parse_args()
|
817 |
+
export(
|
818 |
+
gpt_path=args.gpt_model,
|
819 |
+
vits_path=args.sovits_model,
|
820 |
+
ref_audio_path=args.ref_audio,
|
821 |
+
ref_text=args.ref_text,
|
822 |
+
output_path=args.output_path,
|
823 |
+
device=args.device,
|
824 |
+
export_bert_and_ssl=args.export_common_model,
|
825 |
+
)
|
826 |
+
|
827 |
+
import inference_webui
|
828 |
+
if __name__ == "__main__":
|
829 |
+
inference_webui.is_half=False
|
830 |
+
inference_webui.dtype=torch.float32
|
831 |
+
main()
|
832 |
+
# test()
|
GPT_SoVITS/extract_hubert.py
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import torch
|
4 |
+
import numpy as np
|
5 |
+
import traceback
|
6 |
+
from scipy.io import wavfile
|
7 |
+
import librosa
|
8 |
+
from pathlib import Path
|
9 |
+
from time import time as ttime
|
10 |
+
import shutil
|
11 |
+
from tools.my_utils import load_audio, clean_path
|
12 |
+
from feature_extractor import cnhubert
|
13 |
+
|
14 |
+
def my_save(fea, path, i_part):
|
15 |
+
"""Fix issue: torch.save doesn't support chinese path"""
|
16 |
+
dir = os.path.dirname(path)
|
17 |
+
name = os.path.basename(path)
|
18 |
+
tmp_path = f"{ttime()}{i_part}.pth"
|
19 |
+
torch.save(fea, tmp_path)
|
20 |
+
shutil.move(tmp_path, f"{dir}/{name}")
|
21 |
+
|
22 |
+
def extract_hubert_features(data_dir="data8", exp_dir="logs/s2"):
|
23 |
+
"""Extract Hubert features for stage 2 training"""
|
24 |
+
|
25 |
+
# Get project root directory (parent of GPT_SoVITS)
|
26 |
+
root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
27 |
+
|
28 |
+
# Convert relative paths to absolute
|
29 |
+
data_dir = os.path.join(root_dir, data_dir)
|
30 |
+
exp_dir = os.path.join(root_dir, exp_dir)
|
31 |
+
|
32 |
+
# Set environment variables for Hubert extraction
|
33 |
+
inp_text = os.path.join(exp_dir, "2-name2text.txt")
|
34 |
+
inp_wav_dir = os.path.join(exp_dir, "5-wav32k")
|
35 |
+
exp_name = "s2"
|
36 |
+
i_part = "0"
|
37 |
+
all_parts = "1"
|
38 |
+
opt_dir = exp_dir
|
39 |
+
cnhubert.cnhubert_base_path = os.path.join(root_dir, "pretrained_models", "chinese-hubert-base")
|
40 |
+
is_half = torch.cuda.is_available()
|
41 |
+
|
42 |
+
print("Starting Hubert feature extraction...")
|
43 |
+
print(f"Input text file: {inp_text}")
|
44 |
+
print(f"Input wav directory: {inp_wav_dir}")
|
45 |
+
print(f"Output directory: {opt_dir}")
|
46 |
+
|
47 |
+
hubert_dir = f"{opt_dir}/4-cnhubert"
|
48 |
+
wav32dir = f"{opt_dir}/5-wav32k"
|
49 |
+
os.makedirs(opt_dir, exist_ok=True)
|
50 |
+
os.makedirs(hubert_dir, exist_ok=True)
|
51 |
+
os.makedirs(wav32dir, exist_ok=True)
|
52 |
+
|
53 |
+
maxx = 0.95
|
54 |
+
alpha = 0.5
|
55 |
+
if torch.cuda.is_available():
|
56 |
+
device = "cuda:0"
|
57 |
+
else:
|
58 |
+
device = "cpu"
|
59 |
+
|
60 |
+
print(f"Loading Hubert model from: {cnhubert.cnhubert_base_path}")
|
61 |
+
model = cnhubert.get_model()
|
62 |
+
if is_half:
|
63 |
+
model = model.half().to(device)
|
64 |
+
else:
|
65 |
+
model = model.to(device)
|
66 |
+
|
67 |
+
nan_fails = []
|
68 |
+
|
69 |
+
def name2go(wav_name, wav_path):
|
70 |
+
print(f"Processing: {wav_name} from {wav_path}")
|
71 |
+
hubert_path = f"{hubert_dir}/{wav_name}.pt"
|
72 |
+
if os.path.exists(hubert_path):
|
73 |
+
print(f"Skipping {wav_name} - already processed")
|
74 |
+
return
|
75 |
+
|
76 |
+
if not os.path.exists(wav_path):
|
77 |
+
print(f"Error: WAV file not found: {wav_path}")
|
78 |
+
return
|
79 |
+
|
80 |
+
tmp_audio = load_audio(wav_path, 32000)
|
81 |
+
if tmp_audio is None:
|
82 |
+
print(f"Error: Failed to load audio: {wav_path}")
|
83 |
+
return
|
84 |
+
|
85 |
+
tmp_max = np.abs(tmp_audio).max()
|
86 |
+
if tmp_max > 2.2:
|
87 |
+
print(f"{wav_name}-filtered,{tmp_max}")
|
88 |
+
return
|
89 |
+
|
90 |
+
tmp_audio32 = (tmp_audio / tmp_max * (maxx * alpha * 32768)) + ((1 - alpha) * 32768) * tmp_audio
|
91 |
+
tmp_audio32b = (tmp_audio / tmp_max * (maxx * alpha * 1145.14)) + ((1 - alpha) * 1145.14) * tmp_audio
|
92 |
+
tmp_audio = librosa.resample(tmp_audio32b, orig_sr=32000, target_sr=16000)
|
93 |
+
|
94 |
+
tensor_wav16 = torch.from_numpy(tmp_audio)
|
95 |
+
if is_half:
|
96 |
+
tensor_wav16 = tensor_wav16.half().to(device)
|
97 |
+
else:
|
98 |
+
tensor_wav16 = tensor_wav16.to(device)
|
99 |
+
|
100 |
+
ssl = model.model(tensor_wav16.unsqueeze(0))["last_hidden_state"].transpose(1, 2).cpu()
|
101 |
+
|
102 |
+
if np.isnan(ssl.detach().numpy()).sum() != 0:
|
103 |
+
nan_fails.append((wav_name, wav_path))
|
104 |
+
print(f"nan filtered:{wav_name}")
|
105 |
+
return
|
106 |
+
|
107 |
+
wavfile.write(
|
108 |
+
f"{wav32dir}/{wav_name}",
|
109 |
+
32000,
|
110 |
+
tmp_audio32.astype("int16"),
|
111 |
+
)
|
112 |
+
my_save(ssl, hubert_path, i_part)
|
113 |
+
print(f"Successfully processed {wav_name}")
|
114 |
+
|
115 |
+
print(f"Reading text file: {inp_text}")
|
116 |
+
with open(inp_text, "r", encoding="utf8") as f:
|
117 |
+
lines = f.read().strip("\n").split("\n")
|
118 |
+
print(f"Found {len(lines)} lines in text file")
|
119 |
+
|
120 |
+
for line in lines[int(i_part)::int(all_parts)]:
|
121 |
+
try:
|
122 |
+
print(f"Processing line: {line}")
|
123 |
+
wav_name, text, _, _ = line.split("\t")
|
124 |
+
wav_name = clean_path(wav_name)
|
125 |
+
if inp_wav_dir:
|
126 |
+
wav_name = os.path.basename(wav_name)
|
127 |
+
wav_path = f"{inp_wav_dir}/{wav_name}"
|
128 |
+
else:
|
129 |
+
wav_path = wav_name
|
130 |
+
wav_name = os.path.basename(wav_name)
|
131 |
+
name2go(wav_name, wav_path)
|
132 |
+
except Exception as e:
|
133 |
+
print(f"Error processing line: {line}")
|
134 |
+
print(traceback.format_exc())
|
135 |
+
|
136 |
+
if len(nan_fails) > 0 and is_half:
|
137 |
+
print("Retrying failed files in float32 mode...")
|
138 |
+
is_half = False
|
139 |
+
model = model.float()
|
140 |
+
for wav in nan_fails:
|
141 |
+
try:
|
142 |
+
name2go(wav[0], wav[1])
|
143 |
+
except:
|
144 |
+
print(f"Error retrying {wav_name}")
|
145 |
+
print(traceback.format_exc())
|
146 |
+
|
147 |
+
print("Hubert feature extraction complete.")
|
148 |
+
|
149 |
+
if __name__ == "__main__":
|
150 |
+
extract_hubert_features()
|
GPT_SoVITS/inference_cli.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os
|
3 |
+
import soundfile as sf
|
4 |
+
|
5 |
+
from tools.i18n.i18n import I18nAuto
|
6 |
+
from GPT_SoVITS.inference_webui import change_gpt_weights, change_sovits_weights, get_tts_wav
|
7 |
+
|
8 |
+
i18n = I18nAuto()
|
9 |
+
|
10 |
+
def synthesize(GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text_path, ref_language, target_text_path, target_language, output_path):
|
11 |
+
# Read reference text
|
12 |
+
with open(ref_text_path, 'r', encoding='utf-8') as file:
|
13 |
+
ref_text = file.read()
|
14 |
+
|
15 |
+
# Read target text
|
16 |
+
with open(target_text_path, 'r', encoding='utf-8') as file:
|
17 |
+
target_text = file.read()
|
18 |
+
|
19 |
+
# Change model weights
|
20 |
+
change_gpt_weights(gpt_path=GPT_model_path)
|
21 |
+
change_sovits_weights(sovits_path=SoVITS_model_path)
|
22 |
+
|
23 |
+
# Synthesize audio
|
24 |
+
synthesis_result = get_tts_wav(ref_wav_path=ref_audio_path,
|
25 |
+
prompt_text=ref_text,
|
26 |
+
prompt_language=i18n(ref_language),
|
27 |
+
text=target_text,
|
28 |
+
text_language=i18n(target_language), top_p=1, temperature=1)
|
29 |
+
|
30 |
+
result_list = list(synthesis_result)
|
31 |
+
|
32 |
+
if result_list:
|
33 |
+
last_sampling_rate, last_audio_data = result_list[-1]
|
34 |
+
output_wav_path = os.path.join(output_path, "output.wav")
|
35 |
+
sf.write(output_wav_path, last_audio_data, last_sampling_rate)
|
36 |
+
print(f"Audio saved to {output_wav_path}")
|
37 |
+
|
38 |
+
def main():
|
39 |
+
parser = argparse.ArgumentParser(description="GPT-SoVITS Command Line Tool")
|
40 |
+
parser.add_argument('--gpt_model', required=True, help="Path to the GPT model file")
|
41 |
+
parser.add_argument('--sovits_model', required=True, help="Path to the SoVITS model file")
|
42 |
+
parser.add_argument('--ref_audio', required=True, help="Path to the reference audio file")
|
43 |
+
parser.add_argument('--ref_text', required=True, help="Path to the reference text file")
|
44 |
+
parser.add_argument('--ref_language', required=True, choices=["中文", "英文", "日文"], help="Language of the reference audio")
|
45 |
+
parser.add_argument('--target_text', required=True, help="Path to the target text file")
|
46 |
+
parser.add_argument('--target_language', required=True, choices=["中文", "英文", "日文", "中英混合", "日英混合", "多语种混合"], help="Language of the target text")
|
47 |
+
parser.add_argument('--output_path', required=True, help="Path to the output directory")
|
48 |
+
|
49 |
+
args = parser.parse_args()
|
50 |
+
|
51 |
+
synthesize(args.gpt_model, args.sovits_model, args.ref_audio, args.ref_text, args.ref_language, args.target_text, args.target_language, args.output_path)
|
52 |
+
|
53 |
+
if __name__ == '__main__':
|
54 |
+
main()
|
55 |
+
|
GPT_SoVITS/inference_gui.py
ADDED
@@ -0,0 +1,310 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
from PyQt5.QtCore import QEvent
|
4 |
+
from PyQt5.QtWidgets import QApplication, QMainWindow, QLabel, QLineEdit, QPushButton, QTextEdit
|
5 |
+
from PyQt5.QtWidgets import QGridLayout, QVBoxLayout, QWidget, QFileDialog, QStatusBar, QComboBox
|
6 |
+
import soundfile as sf
|
7 |
+
|
8 |
+
from tools.i18n.i18n import I18nAuto
|
9 |
+
i18n = I18nAuto()
|
10 |
+
|
11 |
+
from inference_webui import gpt_path, sovits_path, change_gpt_weights, change_sovits_weights, get_tts_wav
|
12 |
+
|
13 |
+
|
14 |
+
class GPTSoVITSGUI(QMainWindow):
|
15 |
+
GPT_Path = gpt_path
|
16 |
+
SoVITS_Path = sovits_path
|
17 |
+
|
18 |
+
def __init__(self):
|
19 |
+
super().__init__()
|
20 |
+
|
21 |
+
self.setWindowTitle('GPT-SoVITS GUI')
|
22 |
+
self.setGeometry(800, 450, 950, 850)
|
23 |
+
|
24 |
+
self.setStyleSheet("""
|
25 |
+
QWidget {
|
26 |
+
background-color: #a3d3b1;
|
27 |
+
}
|
28 |
+
|
29 |
+
QTabWidget::pane {
|
30 |
+
background-color: #a3d3b1;
|
31 |
+
}
|
32 |
+
|
33 |
+
QTabWidget::tab-bar {
|
34 |
+
alignment: left;
|
35 |
+
}
|
36 |
+
|
37 |
+
QTabBar::tab {
|
38 |
+
background: #8da4bf;
|
39 |
+
color: #ffffff;
|
40 |
+
padding: 8px;
|
41 |
+
}
|
42 |
+
|
43 |
+
QTabBar::tab:selected {
|
44 |
+
background: #2a3f54;
|
45 |
+
}
|
46 |
+
|
47 |
+
QLabel {
|
48 |
+
color: #000000;
|
49 |
+
}
|
50 |
+
|
51 |
+
QPushButton {
|
52 |
+
background-color: #4CAF50;
|
53 |
+
color: white;
|
54 |
+
padding: 8px;
|
55 |
+
border: 1px solid #4CAF50;
|
56 |
+
border-radius: 4px;
|
57 |
+
}
|
58 |
+
|
59 |
+
QPushButton:hover {
|
60 |
+
background-color: #45a049;
|
61 |
+
border: 1px solid #45a049;
|
62 |
+
box-shadow: 2px 2px 2px rgba(0, 0, 0, 0.1);
|
63 |
+
}
|
64 |
+
""")
|
65 |
+
|
66 |
+
license_text = (
|
67 |
+
"本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. "
|
68 |
+
"如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.")
|
69 |
+
license_label = QLabel(license_text)
|
70 |
+
license_label.setWordWrap(True)
|
71 |
+
|
72 |
+
self.GPT_model_label = QLabel("选择GPT模型:")
|
73 |
+
self.GPT_model_input = QLineEdit()
|
74 |
+
self.GPT_model_input.setPlaceholderText("拖拽或选择文件")
|
75 |
+
self.GPT_model_input.setText(self.GPT_Path)
|
76 |
+
self.GPT_model_input.setReadOnly(True)
|
77 |
+
self.GPT_model_button = QPushButton("选择GPT模型文件")
|
78 |
+
self.GPT_model_button.clicked.connect(self.select_GPT_model)
|
79 |
+
|
80 |
+
self.SoVITS_model_label = QLabel("选择SoVITS模型:")
|
81 |
+
self.SoVITS_model_input = QLineEdit()
|
82 |
+
self.SoVITS_model_input.setPlaceholderText("拖拽或选择文件")
|
83 |
+
self.SoVITS_model_input.setText(self.SoVITS_Path)
|
84 |
+
self.SoVITS_model_input.setReadOnly(True)
|
85 |
+
self.SoVITS_model_button = QPushButton("选择SoVITS模型文件")
|
86 |
+
self.SoVITS_model_button.clicked.connect(self.select_SoVITS_model)
|
87 |
+
|
88 |
+
self.ref_audio_label = QLabel("上传参考音频:")
|
89 |
+
self.ref_audio_input = QLineEdit()
|
90 |
+
self.ref_audio_input.setPlaceholderText("拖拽或选择文件")
|
91 |
+
self.ref_audio_input.setReadOnly(True)
|
92 |
+
self.ref_audio_button = QPushButton("选择音频文件")
|
93 |
+
self.ref_audio_button.clicked.connect(self.select_ref_audio)
|
94 |
+
|
95 |
+
self.ref_text_label = QLabel("参考音频文本:")
|
96 |
+
self.ref_text_input = QLineEdit()
|
97 |
+
self.ref_text_input.setPlaceholderText("直接输入文字或上传文本")
|
98 |
+
self.ref_text_button = QPushButton("上传文本")
|
99 |
+
self.ref_text_button.clicked.connect(self.upload_ref_text)
|
100 |
+
|
101 |
+
self.ref_language_label = QLabel("参考音频语言:")
|
102 |
+
self.ref_language_combobox = QComboBox()
|
103 |
+
self.ref_language_combobox.addItems(["中文", "英文", "日文", "中英混合", "日英混合", "多语种混合"])
|
104 |
+
self.ref_language_combobox.setCurrentText("多语种混合")
|
105 |
+
|
106 |
+
self.target_text_label = QLabel("合成目标文本:")
|
107 |
+
self.target_text_input = QLineEdit()
|
108 |
+
self.target_text_input.setPlaceholderText("直接输入文字或上传文本")
|
109 |
+
self.target_text_button = QPushButton("上传文本")
|
110 |
+
self.target_text_button.clicked.connect(self.upload_target_text)
|
111 |
+
|
112 |
+
self.target_language_label = QLabel("合成音频语言:")
|
113 |
+
self.target_language_combobox = QComboBox()
|
114 |
+
self.target_language_combobox.addItems(["中文", "英文", "日文", "中英混合", "日英混合", "多语种混合"])
|
115 |
+
self.target_language_combobox.setCurrentText("多语种混合")
|
116 |
+
|
117 |
+
self.output_label = QLabel("输出音频路径:")
|
118 |
+
self.output_input = QLineEdit()
|
119 |
+
self.output_input.setPlaceholderText("拖拽或选择文件")
|
120 |
+
self.output_input.setReadOnly(True)
|
121 |
+
self.output_button = QPushButton("选择文件夹")
|
122 |
+
self.output_button.clicked.connect(self.select_output_path)
|
123 |
+
|
124 |
+
self.output_text = QTextEdit()
|
125 |
+
self.output_text.setReadOnly(True)
|
126 |
+
|
127 |
+
self.add_drag_drop_events([
|
128 |
+
self.GPT_model_input,
|
129 |
+
self.SoVITS_model_input,
|
130 |
+
self.ref_audio_input,
|
131 |
+
self.ref_text_input,
|
132 |
+
self.target_text_input,
|
133 |
+
self.output_input,
|
134 |
+
])
|
135 |
+
|
136 |
+
self.synthesize_button = QPushButton("合成")
|
137 |
+
self.synthesize_button.clicked.connect(self.synthesize)
|
138 |
+
|
139 |
+
self.clear_output_button = QPushButton("清空输出")
|
140 |
+
self.clear_output_button.clicked.connect(self.clear_output)
|
141 |
+
|
142 |
+
self.status_bar = QStatusBar()
|
143 |
+
|
144 |
+
main_layout = QVBoxLayout()
|
145 |
+
|
146 |
+
input_layout = QGridLayout(self)
|
147 |
+
input_layout.setSpacing(10)
|
148 |
+
|
149 |
+
input_layout.addWidget(license_label, 0, 0, 1, 3)
|
150 |
+
|
151 |
+
input_layout.addWidget(self.GPT_model_label, 1, 0)
|
152 |
+
input_layout.addWidget(self.GPT_model_input, 2, 0, 1, 2)
|
153 |
+
input_layout.addWidget(self.GPT_model_button, 2, 2)
|
154 |
+
|
155 |
+
input_layout.addWidget(self.SoVITS_model_label, 3, 0)
|
156 |
+
input_layout.addWidget(self.SoVITS_model_input, 4, 0, 1, 2)
|
157 |
+
input_layout.addWidget(self.SoVITS_model_button, 4, 2)
|
158 |
+
|
159 |
+
input_layout.addWidget(self.ref_audio_label, 5, 0)
|
160 |
+
input_layout.addWidget(self.ref_audio_input, 6, 0, 1, 2)
|
161 |
+
input_layout.addWidget(self.ref_audio_button, 6, 2)
|
162 |
+
|
163 |
+
input_layout.addWidget(self.ref_language_label, 7, 0)
|
164 |
+
input_layout.addWidget(self.ref_language_combobox, 8, 0, 1, 1)
|
165 |
+
input_layout.addWidget(self.ref_text_label, 9, 0)
|
166 |
+
input_layout.addWidget(self.ref_text_input, 10, 0, 1, 2)
|
167 |
+
input_layout.addWidget(self.ref_text_button, 10, 2)
|
168 |
+
|
169 |
+
input_layout.addWidget(self.target_language_label, 11, 0)
|
170 |
+
input_layout.addWidget(self.target_language_combobox, 12, 0, 1, 1)
|
171 |
+
input_layout.addWidget(self.target_text_label, 13, 0)
|
172 |
+
input_layout.addWidget(self.target_text_input, 14, 0, 1, 2)
|
173 |
+
input_layout.addWidget(self.target_text_button, 14, 2)
|
174 |
+
|
175 |
+
input_layout.addWidget(self.output_label, 15, 0)
|
176 |
+
input_layout.addWidget(self.output_input, 16, 0, 1, 2)
|
177 |
+
input_layout.addWidget(self.output_button, 16, 2)
|
178 |
+
|
179 |
+
main_layout.addLayout(input_layout)
|
180 |
+
|
181 |
+
output_layout = QVBoxLayout()
|
182 |
+
output_layout.addWidget(self.output_text)
|
183 |
+
main_layout.addLayout(output_layout)
|
184 |
+
|
185 |
+
main_layout.addWidget(self.synthesize_button)
|
186 |
+
|
187 |
+
main_layout.addWidget(self.clear_output_button)
|
188 |
+
|
189 |
+
main_layout.addWidget(self.status_bar)
|
190 |
+
|
191 |
+
self.central_widget = QWidget()
|
192 |
+
self.central_widget.setLayout(main_layout)
|
193 |
+
self.setCentralWidget(self.central_widget)
|
194 |
+
|
195 |
+
def dragEnterEvent(self, event):
|
196 |
+
if event.mimeData().hasUrls():
|
197 |
+
event.acceptProposedAction()
|
198 |
+
|
199 |
+
def dropEvent(self, event):
|
200 |
+
if event.mimeData().hasUrls():
|
201 |
+
file_paths = [url.toLocalFile() for url in event.mimeData().urls()]
|
202 |
+
if len(file_paths) == 1:
|
203 |
+
self.update_ref_audio(file_paths[0])
|
204 |
+
else:
|
205 |
+
self.update_ref_audio(", ".join(file_paths))
|
206 |
+
|
207 |
+
def add_drag_drop_events(self, widgets):
|
208 |
+
for widget in widgets:
|
209 |
+
widget.setAcceptDrops(True)
|
210 |
+
widget.installEventFilter(self)
|
211 |
+
|
212 |
+
def eventFilter(self, obj, event):
|
213 |
+
if event.type() in (QEvent.DragEnter, QEvent.Drop):
|
214 |
+
mime_data = event.mimeData()
|
215 |
+
if mime_data.hasUrls():
|
216 |
+
event.acceptProposedAction()
|
217 |
+
|
218 |
+
return super().eventFilter(obj, event)
|
219 |
+
|
220 |
+
def select_GPT_model(self):
|
221 |
+
file_path, _ = QFileDialog.getOpenFileName(self, "选择GPT模型文件", "", "GPT Files (*.ckpt)")
|
222 |
+
if file_path:
|
223 |
+
self.GPT_model_input.setText(file_path)
|
224 |
+
|
225 |
+
def select_SoVITS_model(self):
|
226 |
+
file_path, _ = QFileDialog.getOpenFileName(self, "选择SoVITS模型文件", "", "SoVITS Files (*.pth)")
|
227 |
+
if file_path:
|
228 |
+
self.SoVITS_model_input.setText(file_path)
|
229 |
+
|
230 |
+
def select_ref_audio(self):
|
231 |
+
file_path, _ = QFileDialog.getOpenFileName(self, "选择参考音频文件", "", "Audio Files (*.wav *.mp3)")
|
232 |
+
if file_path:
|
233 |
+
self.update_ref_audio(file_path)
|
234 |
+
|
235 |
+
def upload_ref_text(self):
|
236 |
+
file_path, _ = QFileDialog.getOpenFileName(self, "选择文本文件", "", "Text Files (*.txt)")
|
237 |
+
if file_path:
|
238 |
+
with open(file_path, 'r', encoding='utf-8') as file:
|
239 |
+
content = file.read()
|
240 |
+
self.ref_text_input.setText(content)
|
241 |
+
|
242 |
+
def upload_target_text(self):
|
243 |
+
file_path, _ = QFileDialog.getOpenFileName(self, "选择文本文件", "", "Text Files (*.txt)")
|
244 |
+
if file_path:
|
245 |
+
with open(file_path, 'r', encoding='utf-8') as file:
|
246 |
+
content = file.read()
|
247 |
+
self.target_text_input.setText(content)
|
248 |
+
|
249 |
+
def select_output_path(self):
|
250 |
+
options = QFileDialog.Options()
|
251 |
+
options |= QFileDialog.DontUseNativeDialog
|
252 |
+
options |= QFileDialog.ShowDirsOnly
|
253 |
+
|
254 |
+
folder_dialog = QFileDialog()
|
255 |
+
folder_dialog.setOptions(options)
|
256 |
+
folder_dialog.setFileMode(QFileDialog.Directory)
|
257 |
+
|
258 |
+
if folder_dialog.exec_():
|
259 |
+
folder_path = folder_dialog.selectedFiles()[0]
|
260 |
+
self.output_input.setText(folder_path)
|
261 |
+
|
262 |
+
def update_ref_audio(self, file_path):
|
263 |
+
self.ref_audio_input.setText(file_path)
|
264 |
+
|
265 |
+
def clear_output(self):
|
266 |
+
self.output_text.clear()
|
267 |
+
|
268 |
+
def synthesize(self):
|
269 |
+
GPT_model_path = self.GPT_model_input.text()
|
270 |
+
SoVITS_model_path = self.SoVITS_model_input.text()
|
271 |
+
ref_audio_path = self.ref_audio_input.text()
|
272 |
+
language_combobox = self.ref_language_combobox.currentText()
|
273 |
+
language_combobox = i18n(language_combobox)
|
274 |
+
ref_text = self.ref_text_input.text()
|
275 |
+
target_language_combobox = self.target_language_combobox.currentText()
|
276 |
+
target_language_combobox = i18n(target_language_combobox)
|
277 |
+
target_text = self.target_text_input.text()
|
278 |
+
output_path = self.output_input.text()
|
279 |
+
|
280 |
+
if GPT_model_path != self.GPT_Path:
|
281 |
+
change_gpt_weights(gpt_path=GPT_model_path)
|
282 |
+
self.GPT_Path = GPT_model_path
|
283 |
+
if SoVITS_model_path != self.SoVITS_Path:
|
284 |
+
change_sovits_weights(sovits_path=SoVITS_model_path)
|
285 |
+
self.SoVITS_Path = SoVITS_model_path
|
286 |
+
|
287 |
+
synthesis_result = get_tts_wav(ref_wav_path=ref_audio_path,
|
288 |
+
prompt_text=ref_text,
|
289 |
+
prompt_language=language_combobox,
|
290 |
+
text=target_text,
|
291 |
+
text_language=target_language_combobox)
|
292 |
+
|
293 |
+
result_list = list(synthesis_result)
|
294 |
+
|
295 |
+
if result_list:
|
296 |
+
last_sampling_rate, last_audio_data = result_list[-1]
|
297 |
+
output_wav_path = os.path.join(output_path, "output.wav")
|
298 |
+
sf.write(output_wav_path, last_audio_data, last_sampling_rate)
|
299 |
+
|
300 |
+
result = "Audio saved to " + output_wav_path
|
301 |
+
|
302 |
+
self.status_bar.showMessage("合成完成!输出路径:" + output_wav_path, 5000)
|
303 |
+
self.output_text.append("处理结果:\n" + result)
|
304 |
+
|
305 |
+
|
306 |
+
if __name__ == '__main__':
|
307 |
+
app = QApplication(sys.argv)
|
308 |
+
mainWin = GPTSoVITSGUI()
|
309 |
+
mainWin.show()
|
310 |
+
sys.exit(app.exec_())
|
GPT_SoVITS/inference_webui.py
ADDED
@@ -0,0 +1,772 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'''
|
2 |
+
按中英混合识别
|
3 |
+
按日英混合识别
|
4 |
+
多语种启动切分识别语种
|
5 |
+
全部按中文识别
|
6 |
+
全部按英文识别
|
7 |
+
全部按日文识别
|
8 |
+
'''
|
9 |
+
import logging
|
10 |
+
import traceback
|
11 |
+
|
12 |
+
logging.getLogger("markdown_it").setLevel(logging.ERROR)
|
13 |
+
logging.getLogger("urllib3").setLevel(logging.ERROR)
|
14 |
+
logging.getLogger("httpcore").setLevel(logging.ERROR)
|
15 |
+
logging.getLogger("httpx").setLevel(logging.ERROR)
|
16 |
+
logging.getLogger("asyncio").setLevel(logging.ERROR)
|
17 |
+
logging.getLogger("charset_normalizer").setLevel(logging.ERROR)
|
18 |
+
logging.getLogger("torchaudio._extension").setLevel(logging.ERROR)
|
19 |
+
logging.getLogger("multipart.multipart").setLevel(logging.ERROR)
|
20 |
+
import LangSegment, os, re, sys, json
|
21 |
+
import pdb
|
22 |
+
import torch
|
23 |
+
|
24 |
+
try:
|
25 |
+
import gradio.analytics as analytics
|
26 |
+
analytics.version_check = lambda:None
|
27 |
+
except:...
|
28 |
+
|
29 |
+
version=os.environ.get("version","v2")
|
30 |
+
pretrained_sovits_name=["GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth", "GPT_SoVITS/pretrained_models/s2G488k.pth"]
|
31 |
+
pretrained_gpt_name=["GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt", "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"]
|
32 |
+
|
33 |
+
_ =[[],[]]
|
34 |
+
for i in range(2):
|
35 |
+
if os.path.exists(pretrained_gpt_name[i]):
|
36 |
+
_[0].append(pretrained_gpt_name[i])
|
37 |
+
if os.path.exists(pretrained_sovits_name[i]):
|
38 |
+
_[-1].append(pretrained_sovits_name[i])
|
39 |
+
pretrained_gpt_name,pretrained_sovits_name = _
|
40 |
+
|
41 |
+
|
42 |
+
|
43 |
+
if os.path.exists(f"./weight.json"):
|
44 |
+
pass
|
45 |
+
else:
|
46 |
+
with open(f"./weight.json", 'w', encoding="utf-8") as file:json.dump({'GPT':{},'SoVITS':{}},file)
|
47 |
+
|
48 |
+
with open(f"./weight.json", 'r', encoding="utf-8") as file:
|
49 |
+
weight_data = file.read()
|
50 |
+
weight_data=json.loads(weight_data)
|
51 |
+
gpt_path = os.environ.get(
|
52 |
+
"gpt_path", weight_data.get('GPT',{}).get(version,pretrained_gpt_name))
|
53 |
+
sovits_path = os.environ.get(
|
54 |
+
"sovits_path", weight_data.get('SoVITS',{}).get(version,pretrained_sovits_name))
|
55 |
+
if isinstance(gpt_path,list):
|
56 |
+
gpt_path = gpt_path[0]
|
57 |
+
if isinstance(sovits_path,list):
|
58 |
+
sovits_path = sovits_path[0]
|
59 |
+
|
60 |
+
# gpt_path = os.environ.get(
|
61 |
+
# "gpt_path", pretrained_gpt_name
|
62 |
+
# )
|
63 |
+
# sovits_path = os.environ.get("sovits_path", pretrained_sovits_name)
|
64 |
+
cnhubert_base_path = os.environ.get(
|
65 |
+
"cnhubert_base_path", "GPT_SoVITS/pretrained_models/chinese-hubert-base"
|
66 |
+
)
|
67 |
+
bert_path = os.environ.get(
|
68 |
+
"bert_path", "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large"
|
69 |
+
)
|
70 |
+
infer_ttswebui = os.environ.get("infer_ttswebui", 9872)
|
71 |
+
infer_ttswebui = int(infer_ttswebui)
|
72 |
+
is_share = os.environ.get("is_share", "False")
|
73 |
+
is_share = eval(is_share)
|
74 |
+
if "_CUDA_VISIBLE_DEVICES" in os.environ:
|
75 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
|
76 |
+
is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
|
77 |
+
punctuation = set(['!', '?', '…', ',', '.', '-'," "])
|
78 |
+
import gradio as gr
|
79 |
+
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
80 |
+
import numpy as np
|
81 |
+
import librosa
|
82 |
+
from feature_extractor import cnhubert
|
83 |
+
|
84 |
+
cnhubert.cnhubert_base_path = cnhubert_base_path
|
85 |
+
|
86 |
+
from module.models import SynthesizerTrn
|
87 |
+
from AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
88 |
+
from text import cleaned_text_to_sequence
|
89 |
+
from text.cleaner import clean_text
|
90 |
+
from time import time as ttime
|
91 |
+
from module.mel_processing import spectrogram_torch
|
92 |
+
from tools.my_utils import load_audio
|
93 |
+
from tools.i18n.i18n import I18nAuto, scan_language_list
|
94 |
+
|
95 |
+
language=os.environ.get("language","Auto")
|
96 |
+
language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
|
97 |
+
i18n = I18nAuto(language=language)
|
98 |
+
|
99 |
+
# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 确保直接启动推理UI时也能够设置。
|
100 |
+
|
101 |
+
if torch.cuda.is_available():
|
102 |
+
device = "cuda"
|
103 |
+
else:
|
104 |
+
device = "cpu"
|
105 |
+
|
106 |
+
dict_language_v1 = {
|
107 |
+
i18n("中文"): "all_zh",#全部按中文识别
|
108 |
+
i18n("英文"): "en",#全部按英文识别#######不变
|
109 |
+
i18n("日文"): "all_ja",#全部按日文识别
|
110 |
+
i18n("中英混合"): "zh",#按中英混合识别####不变
|
111 |
+
i18n("日英混合"): "ja",#按日英混合识别####不变
|
112 |
+
i18n("多语种混合"): "auto",#多语种启动切分识别语种
|
113 |
+
}
|
114 |
+
dict_language_v2 = {
|
115 |
+
i18n("中文"): "all_zh",#全部按中文识别
|
116 |
+
i18n("英文"): "en",#全部按英文识别#######不变
|
117 |
+
i18n("日文"): "all_ja",#全部按日文识别
|
118 |
+
i18n("粤语"): "all_yue",#全部按中文识别
|
119 |
+
i18n("韩文"): "all_ko",#全部按韩文识别
|
120 |
+
i18n("中英混合"): "zh",#按中英混合识别####不变
|
121 |
+
i18n("日英混合"): "ja",#按日英混合识别####不变
|
122 |
+
i18n("粤英混合"): "yue",#按粤英混合识别####不变
|
123 |
+
i18n("韩英混合"): "ko",#按韩英混合识别####不变
|
124 |
+
i18n("多语种混合"): "auto",#多语种启动切分识别语种
|
125 |
+
i18n("多语种混合(粤语)"): "auto_yue",#多语种启动切分识别语种
|
126 |
+
}
|
127 |
+
dict_language = dict_language_v1 if version =='v1' else dict_language_v2
|
128 |
+
|
129 |
+
tokenizer = AutoTokenizer.from_pretrained(bert_path)
|
130 |
+
bert_model = AutoModelForMaskedLM.from_pretrained(bert_path)
|
131 |
+
if is_half == True:
|
132 |
+
bert_model = bert_model.half().to(device)
|
133 |
+
else:
|
134 |
+
bert_model = bert_model.to(device)
|
135 |
+
|
136 |
+
|
137 |
+
def get_bert_feature(text, word2ph):
|
138 |
+
with torch.no_grad():
|
139 |
+
inputs = tokenizer(text, return_tensors="pt")
|
140 |
+
for i in inputs:
|
141 |
+
inputs[i] = inputs[i].to(device)
|
142 |
+
res = bert_model(**inputs, output_hidden_states=True)
|
143 |
+
res = torch.cat(res["hidden_states"][-3:-2], -1)[0].cpu()[1:-1]
|
144 |
+
assert len(word2ph) == len(text)
|
145 |
+
phone_level_feature = []
|
146 |
+
for i in range(len(word2ph)):
|
147 |
+
repeat_feature = res[i].repeat(word2ph[i], 1)
|
148 |
+
phone_level_feature.append(repeat_feature)
|
149 |
+
phone_level_feature = torch.cat(phone_level_feature, dim=0)
|
150 |
+
return phone_level_feature.T
|
151 |
+
|
152 |
+
|
153 |
+
class DictToAttrRecursive(dict):
|
154 |
+
def __init__(self, input_dict):
|
155 |
+
super().__init__(input_dict)
|
156 |
+
for key, value in input_dict.items():
|
157 |
+
if isinstance(value, dict):
|
158 |
+
value = DictToAttrRecursive(value)
|
159 |
+
self[key] = value
|
160 |
+
setattr(self, key, value)
|
161 |
+
|
162 |
+
def __getattr__(self, item):
|
163 |
+
try:
|
164 |
+
return self[item]
|
165 |
+
except KeyError:
|
166 |
+
raise AttributeError(f"Attribute {item} not found")
|
167 |
+
|
168 |
+
def __setattr__(self, key, value):
|
169 |
+
if isinstance(value, dict):
|
170 |
+
value = DictToAttrRecursive(value)
|
171 |
+
super(DictToAttrRecursive, self).__setitem__(key, value)
|
172 |
+
super().__setattr__(key, value)
|
173 |
+
|
174 |
+
def __delattr__(self, item):
|
175 |
+
try:
|
176 |
+
del self[item]
|
177 |
+
except KeyError:
|
178 |
+
raise AttributeError(f"Attribute {item} not found")
|
179 |
+
|
180 |
+
|
181 |
+
ssl_model = cnhubert.get_model()
|
182 |
+
if is_half == True:
|
183 |
+
ssl_model = ssl_model.half().to(device)
|
184 |
+
else:
|
185 |
+
ssl_model = ssl_model.to(device)
|
186 |
+
|
187 |
+
|
188 |
+
def change_sovits_weights(sovits_path,prompt_language=None,text_language=None):
|
189 |
+
global vq_model, hps, version, dict_language
|
190 |
+
dict_s2 = torch.load(sovits_path, map_location="cpu")
|
191 |
+
hps = dict_s2["config"]
|
192 |
+
hps = DictToAttrRecursive(hps)
|
193 |
+
hps.model.semantic_frame_rate = "25hz"
|
194 |
+
if dict_s2['weight']['enc_p.text_embedding.weight'].shape[0] == 322:
|
195 |
+
hps.model.version = "v1"
|
196 |
+
else:
|
197 |
+
hps.model.version = "v2"
|
198 |
+
version = hps.model.version
|
199 |
+
# print("sovits版本:",hps.model.version)
|
200 |
+
vq_model = SynthesizerTrn(
|
201 |
+
hps.data.filter_length // 2 + 1,
|
202 |
+
hps.train.segment_size // hps.data.hop_length,
|
203 |
+
n_speakers=hps.data.n_speakers,
|
204 |
+
**hps.model
|
205 |
+
)
|
206 |
+
if ("pretrained" not in sovits_path):
|
207 |
+
del vq_model.enc_q
|
208 |
+
if is_half == True:
|
209 |
+
vq_model = vq_model.half().to(device)
|
210 |
+
else:
|
211 |
+
vq_model = vq_model.to(device)
|
212 |
+
vq_model.eval()
|
213 |
+
print(vq_model.load_state_dict(dict_s2["weight"], strict=False))
|
214 |
+
dict_language = dict_language_v1 if version =='v1' else dict_language_v2
|
215 |
+
with open("./weight.json")as f:
|
216 |
+
data=f.read()
|
217 |
+
data=json.loads(data)
|
218 |
+
data["SoVITS"][version]=sovits_path
|
219 |
+
with open("./weight.json","w")as f:f.write(json.dumps(data))
|
220 |
+
if prompt_language is not None and text_language is not None:
|
221 |
+
if prompt_language in list(dict_language.keys()):
|
222 |
+
prompt_text_update, prompt_language_update = {'__type__':'update'}, {'__type__':'update', 'value':prompt_language}
|
223 |
+
else:
|
224 |
+
prompt_text_update = {'__type__':'update', 'value':''}
|
225 |
+
prompt_language_update = {'__type__':'update', 'value':i18n("中文")}
|
226 |
+
if text_language in list(dict_language.keys()):
|
227 |
+
text_update, text_language_update = {'__type__':'update'}, {'__type__':'update', 'value':text_language}
|
228 |
+
else:
|
229 |
+
text_update = {'__type__':'update', 'value':''}
|
230 |
+
text_language_update = {'__type__':'update', 'value':i18n("中文")}
|
231 |
+
return {'__type__':'update', 'choices':list(dict_language.keys())}, {'__type__':'update', 'choices':list(dict_language.keys())}, prompt_text_update, prompt_language_update, text_update, text_language_update
|
232 |
+
|
233 |
+
|
234 |
+
|
235 |
+
change_sovits_weights(sovits_path)
|
236 |
+
|
237 |
+
|
238 |
+
def change_gpt_weights(gpt_path):
|
239 |
+
global hz, max_sec, t2s_model, config
|
240 |
+
hz = 50
|
241 |
+
dict_s1 = torch.load(gpt_path, map_location="cpu")
|
242 |
+
config = dict_s1["config"]
|
243 |
+
max_sec = config["data"]["max_sec"]
|
244 |
+
t2s_model = Text2SemanticLightningModule(config, "****", is_train=False)
|
245 |
+
t2s_model.load_state_dict(dict_s1["weight"])
|
246 |
+
if is_half == True:
|
247 |
+
t2s_model = t2s_model.half()
|
248 |
+
t2s_model = t2s_model.to(device)
|
249 |
+
t2s_model.eval()
|
250 |
+
total = sum([param.nelement() for param in t2s_model.parameters()])
|
251 |
+
print("Number of parameter: %.2fM" % (total / 1e6))
|
252 |
+
with open("./weight.json")as f:
|
253 |
+
data=f.read()
|
254 |
+
data=json.loads(data)
|
255 |
+
data["GPT"][version]=gpt_path
|
256 |
+
with open("./weight.json","w")as f:f.write(json.dumps(data))
|
257 |
+
|
258 |
+
|
259 |
+
change_gpt_weights(gpt_path)
|
260 |
+
|
261 |
+
|
262 |
+
def get_spepc(hps, filename):
|
263 |
+
audio = load_audio(filename, int(hps.data.sampling_rate))
|
264 |
+
audio = torch.FloatTensor(audio)
|
265 |
+
maxx=audio.abs().max()
|
266 |
+
if(maxx>1):audio/=min(2,maxx)
|
267 |
+
audio_norm = audio
|
268 |
+
audio_norm = audio_norm.unsqueeze(0)
|
269 |
+
spec = spectrogram_torch(
|
270 |
+
audio_norm,
|
271 |
+
hps.data.filter_length,
|
272 |
+
hps.data.sampling_rate,
|
273 |
+
hps.data.hop_length,
|
274 |
+
hps.data.win_length,
|
275 |
+
center=False,
|
276 |
+
)
|
277 |
+
return spec
|
278 |
+
|
279 |
+
def clean_text_inf(text, language, version):
|
280 |
+
phones, word2ph, norm_text = clean_text(text, language, version)
|
281 |
+
phones = cleaned_text_to_sequence(phones, version)
|
282 |
+
return phones, word2ph, norm_text
|
283 |
+
|
284 |
+
dtype=torch.float16 if is_half == True else torch.float32
|
285 |
+
def get_bert_inf(phones, word2ph, norm_text, language):
|
286 |
+
language=language.replace("all_","")
|
287 |
+
if language == "zh":
|
288 |
+
bert = get_bert_feature(norm_text, word2ph).to(device)#.to(dtype)
|
289 |
+
else:
|
290 |
+
bert = torch.zeros(
|
291 |
+
(1024, len(phones)),
|
292 |
+
dtype=torch.float16 if is_half == True else torch.float32,
|
293 |
+
).to(device)
|
294 |
+
|
295 |
+
return bert
|
296 |
+
|
297 |
+
|
298 |
+
splits = {",", "。", "?", "!", ",", ".", "?", "!", "~", ":", ":", "—", "…", }
|
299 |
+
|
300 |
+
|
301 |
+
def get_first(text):
|
302 |
+
pattern = "[" + "".join(re.escape(sep) for sep in splits) + "]"
|
303 |
+
text = re.split(pattern, text)[0].strip()
|
304 |
+
return text
|
305 |
+
|
306 |
+
from text import chinese
|
307 |
+
def get_phones_and_bert(text,language,version,final=False):
|
308 |
+
if language in {"en", "all_zh", "all_ja", "all_ko", "all_yue"}:
|
309 |
+
language = language.replace("all_","")
|
310 |
+
if language == "en":
|
311 |
+
LangSegment.setfilters(["en"])
|
312 |
+
formattext = " ".join(tmp["text"] for tmp in LangSegment.getTexts(text))
|
313 |
+
else:
|
314 |
+
# 因无法区别中日韩文汉字,以用户输入为准
|
315 |
+
formattext = text
|
316 |
+
while " " in formattext:
|
317 |
+
formattext = formattext.replace(" ", " ")
|
318 |
+
if language == "zh":
|
319 |
+
if re.search(r'[A-Za-z]', formattext):
|
320 |
+
formattext = re.sub(r'[a-z]', lambda x: x.group(0).upper(), formattext)
|
321 |
+
formattext = chinese.mix_text_normalize(formattext)
|
322 |
+
return get_phones_and_bert(formattext,"zh",version)
|
323 |
+
else:
|
324 |
+
phones, word2ph, norm_text = clean_text_inf(formattext, language, version)
|
325 |
+
bert = get_bert_feature(norm_text, word2ph).to(device)
|
326 |
+
elif language == "yue" and re.search(r'[A-Za-z]', formattext):
|
327 |
+
formattext = re.sub(r'[a-z]', lambda x: x.group(0).upper(), formattext)
|
328 |
+
formattext = chinese.mix_text_normalize(formattext)
|
329 |
+
return get_phones_and_bert(formattext,"yue",version)
|
330 |
+
else:
|
331 |
+
phones, word2ph, norm_text = clean_text_inf(formattext, language, version)
|
332 |
+
bert = torch.zeros(
|
333 |
+
(1024, len(phones)),
|
334 |
+
dtype=torch.float16 if is_half == True else torch.float32,
|
335 |
+
).to(device)
|
336 |
+
elif language in {"zh", "ja", "ko", "yue", "auto", "auto_yue"}:
|
337 |
+
textlist=[]
|
338 |
+
langlist=[]
|
339 |
+
LangSegment.setfilters(["zh","ja","en","ko"])
|
340 |
+
if language == "auto":
|
341 |
+
for tmp in LangSegment.getTexts(text):
|
342 |
+
langlist.append(tmp["lang"])
|
343 |
+
textlist.append(tmp["text"])
|
344 |
+
elif language == "auto_yue":
|
345 |
+
for tmp in LangSegment.getTexts(text):
|
346 |
+
if tmp["lang"] == "zh":
|
347 |
+
tmp["lang"] = "yue"
|
348 |
+
langlist.append(tmp["lang"])
|
349 |
+
textlist.append(tmp["text"])
|
350 |
+
else:
|
351 |
+
for tmp in LangSegment.getTexts(text):
|
352 |
+
if tmp["lang"] == "en":
|
353 |
+
langlist.append(tmp["lang"])
|
354 |
+
else:
|
355 |
+
# 因无法区别中日韩文汉字,以用户输入为准
|
356 |
+
langlist.append(language)
|
357 |
+
textlist.append(tmp["text"])
|
358 |
+
print(textlist)
|
359 |
+
print(langlist)
|
360 |
+
phones_list = []
|
361 |
+
bert_list = []
|
362 |
+
norm_text_list = []
|
363 |
+
for i in range(len(textlist)):
|
364 |
+
lang = langlist[i]
|
365 |
+
phones, word2ph, norm_text = clean_text_inf(textlist[i], lang, version)
|
366 |
+
bert = get_bert_inf(phones, word2ph, norm_text, lang)
|
367 |
+
phones_list.append(phones)
|
368 |
+
norm_text_list.append(norm_text)
|
369 |
+
bert_list.append(bert)
|
370 |
+
bert = torch.cat(bert_list, dim=1)
|
371 |
+
phones = sum(phones_list, [])
|
372 |
+
norm_text = ''.join(norm_text_list)
|
373 |
+
|
374 |
+
if not final and len(phones) < 6:
|
375 |
+
return get_phones_and_bert("." + text,language,version,final=True)
|
376 |
+
|
377 |
+
return phones,bert.to(dtype),norm_text
|
378 |
+
|
379 |
+
|
380 |
+
def merge_short_text_in_array(texts, threshold):
|
381 |
+
if (len(texts)) < 2:
|
382 |
+
return texts
|
383 |
+
result = []
|
384 |
+
text = ""
|
385 |
+
for ele in texts:
|
386 |
+
text += ele
|
387 |
+
if len(text) >= threshold:
|
388 |
+
result.append(text)
|
389 |
+
text = ""
|
390 |
+
if (len(text) > 0):
|
391 |
+
if len(result) == 0:
|
392 |
+
result.append(text)
|
393 |
+
else:
|
394 |
+
result[len(result) - 1] += text
|
395 |
+
return result
|
396 |
+
|
397 |
+
##ref_wav_path+prompt_text+prompt_language+text(单个)+text_language+top_k+top_p+temperature
|
398 |
+
# cache_tokens={}#暂未实现清理机制
|
399 |
+
cache= {}
|
400 |
+
def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, how_to_cut=i18n("不切"), top_k=20, top_p=0.6, temperature=0.6, ref_free
|
401 |
+
=False,speed=1,if_freeze=False,inp_refs=None):
|
402 |
+
global cache
|
403 |
+
if ref_wav_path:pass
|
404 |
+
else:gr.Warning(i18n('请上传参考音频'))
|
405 |
+
if text:pass
|
406 |
+
else:gr.Warning(i18n('请填入推理文本'))
|
407 |
+
t = []
|
408 |
+
if prompt_text is None or len(prompt_text) == 0:
|
409 |
+
ref_free = True
|
410 |
+
t0 = ttime()
|
411 |
+
prompt_language = dict_language[prompt_language]
|
412 |
+
text_language = dict_language[text_language]
|
413 |
+
|
414 |
+
|
415 |
+
if not ref_free:
|
416 |
+
prompt_text = prompt_text.strip("\n")
|
417 |
+
if (prompt_text[-1] not in splits): prompt_text += "。" if prompt_language != "en" else "."
|
418 |
+
print(i18n("实际输入的参考文本:"), prompt_text)
|
419 |
+
text = text.strip("\n")
|
420 |
+
# if (text[0] not in splits and len(get_first(text)) < 4): text = "。" + text if text_language != "en" else "." + text
|
421 |
+
|
422 |
+
print(i18n("实际输入的目标文本:"), text)
|
423 |
+
zero_wav = np.zeros(
|
424 |
+
int(hps.data.sampling_rate * 0.3),
|
425 |
+
dtype=np.float16 if is_half == True else np.float32,
|
426 |
+
)
|
427 |
+
if not ref_free:
|
428 |
+
with torch.no_grad():
|
429 |
+
wav16k, sr = librosa.load(ref_wav_path, sr=16000)
|
430 |
+
if (wav16k.shape[0] > 160000 or wav16k.shape[0] < 48000):
|
431 |
+
gr.Warning(i18n("参考音频在3~10秒范围外,请更换!"))
|
432 |
+
raise OSError(i18n("参考音频在3~10秒范围外,请更换!"))
|
433 |
+
wav16k = torch.from_numpy(wav16k)
|
434 |
+
zero_wav_torch = torch.from_numpy(zero_wav)
|
435 |
+
if is_half == True:
|
436 |
+
wav16k = wav16k.half().to(device)
|
437 |
+
zero_wav_torch = zero_wav_torch.half().to(device)
|
438 |
+
else:
|
439 |
+
wav16k = wav16k.to(device)
|
440 |
+
zero_wav_torch = zero_wav_torch.to(device)
|
441 |
+
wav16k = torch.cat([wav16k, zero_wav_torch])
|
442 |
+
ssl_content = ssl_model.model(wav16k.unsqueeze(0))[
|
443 |
+
"last_hidden_state"
|
444 |
+
].transpose(
|
445 |
+
1, 2
|
446 |
+
) # .float()
|
447 |
+
codes = vq_model.extract_latent(ssl_content)
|
448 |
+
prompt_semantic = codes[0, 0]
|
449 |
+
prompt = prompt_semantic.unsqueeze(0).to(device)
|
450 |
+
|
451 |
+
t1 = ttime()
|
452 |
+
t.append(t1-t0)
|
453 |
+
|
454 |
+
if (how_to_cut == i18n("凑四句一切")):
|
455 |
+
text = cut1(text)
|
456 |
+
elif (how_to_cut == i18n("凑50字一切")):
|
457 |
+
text = cut2(text)
|
458 |
+
elif (how_to_cut == i18n("按中文句号。切")):
|
459 |
+
text = cut3(text)
|
460 |
+
elif (how_to_cut == i18n("按英文句号.切")):
|
461 |
+
text = cut4(text)
|
462 |
+
elif (how_to_cut == i18n("按标点符号切")):
|
463 |
+
text = cut5(text)
|
464 |
+
while "\n\n" in text:
|
465 |
+
text = text.replace("\n\n", "\n")
|
466 |
+
print(i18n("实际输入的目标文本(切句后):"), text)
|
467 |
+
texts = text.split("\n")
|
468 |
+
texts = process_text(texts)
|
469 |
+
texts = merge_short_text_in_array(texts, 5)
|
470 |
+
audio_opt = []
|
471 |
+
if not ref_free:
|
472 |
+
phones1,bert1,norm_text1=get_phones_and_bert(prompt_text, prompt_language, version)
|
473 |
+
|
474 |
+
for i_text,text in enumerate(texts):
|
475 |
+
# 解决输入目标文本的空行导致报错的问题
|
476 |
+
if (len(text.strip()) == 0):
|
477 |
+
continue
|
478 |
+
if (text[-1] not in splits): text += "。" if text_language != "en" else "."
|
479 |
+
print(i18n("实际输入的目标文本(每句):"), text)
|
480 |
+
phones2,bert2,norm_text2=get_phones_and_bert(text, text_language, version)
|
481 |
+
print(i18n("前端处理后的文本(每句):"), norm_text2)
|
482 |
+
if not ref_free:
|
483 |
+
bert = torch.cat([bert1, bert2], 1)
|
484 |
+
all_phoneme_ids = torch.LongTensor(phones1+phones2).to(device).unsqueeze(0)
|
485 |
+
else:
|
486 |
+
bert = bert2
|
487 |
+
all_phoneme_ids = torch.LongTensor(phones2).to(device).unsqueeze(0)
|
488 |
+
|
489 |
+
bert = bert.to(device).unsqueeze(0)
|
490 |
+
all_phoneme_len = torch.tensor([all_phoneme_ids.shape[-1]]).to(device)
|
491 |
+
|
492 |
+
t2 = ttime()
|
493 |
+
# cache_key="%s-%s-%s-%s-%s-%s-%s-%s"%(ref_wav_path,prompt_text,prompt_language,text,text_language,top_k,top_p,temperature)
|
494 |
+
# print(cache.keys(),if_freeze)
|
495 |
+
if(i_text in cache and if_freeze==True):pred_semantic=cache[i_text]
|
496 |
+
else:
|
497 |
+
with torch.no_grad():
|
498 |
+
pred_semantic, idx = t2s_model.model.infer_panel(
|
499 |
+
all_phoneme_ids,
|
500 |
+
all_phoneme_len,
|
501 |
+
None if ref_free else prompt,
|
502 |
+
bert,
|
503 |
+
# prompt_phone_len=ph_offset,
|
504 |
+
top_k=top_k,
|
505 |
+
top_p=top_p,
|
506 |
+
temperature=temperature,
|
507 |
+
early_stop_num=hz * max_sec,
|
508 |
+
)
|
509 |
+
pred_semantic = pred_semantic[:, -idx:].unsqueeze(0)
|
510 |
+
cache[i_text]=pred_semantic
|
511 |
+
t3 = ttime()
|
512 |
+
refers=[]
|
513 |
+
if(inp_refs):
|
514 |
+
for path in inp_refs:
|
515 |
+
try:
|
516 |
+
refer = get_spepc(hps, path.name).to(dtype).to(device)
|
517 |
+
refers.append(refer)
|
518 |
+
except:
|
519 |
+
traceback.print_exc()
|
520 |
+
if(len(refers)==0):refers = [get_spepc(hps, ref_wav_path).to(dtype).to(device)]
|
521 |
+
audio = (vq_model.decode(pred_semantic, torch.LongTensor(phones2).to(device).unsqueeze(0), refers,speed=speed).detach().cpu().numpy()[0, 0])
|
522 |
+
max_audio=np.abs(audio).max()#简单防止16bit爆音
|
523 |
+
if max_audio>1:audio/=max_audio
|
524 |
+
audio_opt.append(audio)
|
525 |
+
audio_opt.append(zero_wav)
|
526 |
+
t4 = ttime()
|
527 |
+
t.extend([t2 - t1,t3 - t2, t4 - t3])
|
528 |
+
t1 = ttime()
|
529 |
+
print("%.3f\t%.3f\t%.3f\t%.3f" %
|
530 |
+
(t[0], sum(t[1::3]), sum(t[2::3]), sum(t[3::3]))
|
531 |
+
)
|
532 |
+
yield hps.data.sampling_rate, (np.concatenate(audio_opt, 0) * 32768).astype(
|
533 |
+
np.int16
|
534 |
+
)
|
535 |
+
|
536 |
+
|
537 |
+
def split(todo_text):
|
538 |
+
todo_text = todo_text.replace("……", "。").replace("——", ",")
|
539 |
+
if todo_text[-1] not in splits:
|
540 |
+
todo_text += "。"
|
541 |
+
i_split_head = i_split_tail = 0
|
542 |
+
len_text = len(todo_text)
|
543 |
+
todo_texts = []
|
544 |
+
while 1:
|
545 |
+
if i_split_head >= len_text:
|
546 |
+
break # 结尾一定有标点,所以直接跳出即可,最后一段在上次已加入
|
547 |
+
if todo_text[i_split_head] in splits:
|
548 |
+
i_split_head += 1
|
549 |
+
todo_texts.append(todo_text[i_split_tail:i_split_head])
|
550 |
+
i_split_tail = i_split_head
|
551 |
+
else:
|
552 |
+
i_split_head += 1
|
553 |
+
return todo_texts
|
554 |
+
|
555 |
+
|
556 |
+
def cut1(inp):
|
557 |
+
inp = inp.strip("\n")
|
558 |
+
inps = split(inp)
|
559 |
+
split_idx = list(range(0, len(inps), 4))
|
560 |
+
split_idx[-1] = None
|
561 |
+
if len(split_idx) > 1:
|
562 |
+
opts = []
|
563 |
+
for idx in range(len(split_idx) - 1):
|
564 |
+
opts.append("".join(inps[split_idx[idx]: split_idx[idx + 1]]))
|
565 |
+
else:
|
566 |
+
opts = [inp]
|
567 |
+
opts = [item for item in opts if not set(item).issubset(punctuation)]
|
568 |
+
return "\n".join(opts)
|
569 |
+
|
570 |
+
|
571 |
+
def cut2(inp):
|
572 |
+
inp = inp.strip("\n")
|
573 |
+
inps = split(inp)
|
574 |
+
if len(inps) < 2:
|
575 |
+
return inp
|
576 |
+
opts = []
|
577 |
+
summ = 0
|
578 |
+
tmp_str = ""
|
579 |
+
for i in range(len(inps)):
|
580 |
+
summ += len(inps[i])
|
581 |
+
tmp_str += inps[i]
|
582 |
+
if summ > 50:
|
583 |
+
summ = 0
|
584 |
+
opts.append(tmp_str)
|
585 |
+
tmp_str = ""
|
586 |
+
if tmp_str != "":
|
587 |
+
opts.append(tmp_str)
|
588 |
+
# print(opts)
|
589 |
+
if len(opts) > 1 and len(opts[-1]) < 50: ##如果最后一个太短了,和前一个合一起
|
590 |
+
opts[-2] = opts[-2] + opts[-1]
|
591 |
+
opts = opts[:-1]
|
592 |
+
opts = [item for item in opts if not set(item).issubset(punctuation)]
|
593 |
+
return "\n".join(opts)
|
594 |
+
|
595 |
+
|
596 |
+
def cut3(inp):
|
597 |
+
inp = inp.strip("\n")
|
598 |
+
opts = ["%s" % item for item in inp.strip("。").split("。")]
|
599 |
+
opts = [item for item in opts if not set(item).issubset(punctuation)]
|
600 |
+
return "\n".join(opts)
|
601 |
+
|
602 |
+
def cut4(inp):
|
603 |
+
inp = inp.strip("\n")
|
604 |
+
opts = ["%s" % item for item in inp.strip(".").split(".")]
|
605 |
+
opts = [item for item in opts if not set(item).issubset(punctuation)]
|
606 |
+
return "\n".join(opts)
|
607 |
+
|
608 |
+
|
609 |
+
# contributed by https://github.com/AI-Hobbyist/GPT-SoVITS/blob/main/GPT_SoVITS/inference_webui.py
|
610 |
+
def cut5(inp):
|
611 |
+
inp = inp.strip("\n")
|
612 |
+
punds = {',', '.', ';', '?', '!', '、', ',', '。', '?', '!', ';', ':', '…'}
|
613 |
+
mergeitems = []
|
614 |
+
items = []
|
615 |
+
|
616 |
+
for i, char in enumerate(inp):
|
617 |
+
if char in punds:
|
618 |
+
if char == '.' and i > 0 and i < len(inp) - 1 and inp[i - 1].isdigit() and inp[i + 1].isdigit():
|
619 |
+
items.append(char)
|
620 |
+
else:
|
621 |
+
items.append(char)
|
622 |
+
mergeitems.append("".join(items))
|
623 |
+
items = []
|
624 |
+
else:
|
625 |
+
items.append(char)
|
626 |
+
|
627 |
+
if items:
|
628 |
+
mergeitems.append("".join(items))
|
629 |
+
|
630 |
+
opt = [item for item in mergeitems if not set(item).issubset(punds)]
|
631 |
+
return "\n".join(opt)
|
632 |
+
|
633 |
+
|
634 |
+
def custom_sort_key(s):
|
635 |
+
# 使用正则表达式提取字符串中的数字部分和非数字部分
|
636 |
+
parts = re.split('(\d+)', s)
|
637 |
+
# 将数字部分转换为整数,非数字部分保持不变
|
638 |
+
parts = [int(part) if part.isdigit() else part for part in parts]
|
639 |
+
return parts
|
640 |
+
|
641 |
+
def process_text(texts):
|
642 |
+
_text=[]
|
643 |
+
if all(text in [None, " ", "\n",""] for text in texts):
|
644 |
+
raise ValueError(i18n("请输入有效文本"))
|
645 |
+
for text in texts:
|
646 |
+
if text in [None, " ", ""]:
|
647 |
+
pass
|
648 |
+
else:
|
649 |
+
_text.append(text)
|
650 |
+
return _text
|
651 |
+
|
652 |
+
|
653 |
+
def change_choices():
|
654 |
+
SoVITS_names, GPT_names = get_weights_names(GPT_weight_root, SoVITS_weight_root)
|
655 |
+
return {"choices": sorted(SoVITS_names, key=custom_sort_key), "__type__": "update"}, {"choices": sorted(GPT_names, key=custom_sort_key), "__type__": "update"}
|
656 |
+
|
657 |
+
|
658 |
+
SoVITS_weight_root=["SoVITS_weights_v2","SoVITS_weights"]
|
659 |
+
GPT_weight_root=["GPT_weights_v2","GPT_weights"]
|
660 |
+
for path in SoVITS_weight_root+GPT_weight_root:
|
661 |
+
os.makedirs(path,exist_ok=True)
|
662 |
+
|
663 |
+
|
664 |
+
def get_weights_names(GPT_weight_root, SoVITS_weight_root):
|
665 |
+
SoVITS_names = [i for i in pretrained_sovits_name]
|
666 |
+
for path in SoVITS_weight_root:
|
667 |
+
for name in os.listdir(path):
|
668 |
+
if name.endswith(".pth"): SoVITS_names.append("%s/%s" % (path, name))
|
669 |
+
GPT_names = [i for i in pretrained_gpt_name]
|
670 |
+
for path in GPT_weight_root:
|
671 |
+
for name in os.listdir(path):
|
672 |
+
if name.endswith(".ckpt"): GPT_names.append("%s/%s" % (path, name))
|
673 |
+
return SoVITS_names, GPT_names
|
674 |
+
|
675 |
+
|
676 |
+
SoVITS_names, GPT_names = get_weights_names(GPT_weight_root, SoVITS_weight_root)
|
677 |
+
|
678 |
+
def html_center(text, label='p'):
|
679 |
+
return f"""<div style="text-align: center; margin: 100; padding: 50;">
|
680 |
+
<{label} style="margin: 0; padding: 0;">{text}</{label}>
|
681 |
+
</div>"""
|
682 |
+
|
683 |
+
def html_left(text, label='p'):
|
684 |
+
return f"""<div style="text-align: left; margin: 0; padding: 0;">
|
685 |
+
<{label} style="margin: 0; padding: 0;">{text}</{label}>
|
686 |
+
</div>"""
|
687 |
+
|
688 |
+
|
689 |
+
with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
690 |
+
gr.Markdown(
|
691 |
+
value=i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>LICENSE</b>.")
|
692 |
+
)
|
693 |
+
with gr.Group():
|
694 |
+
gr.Markdown(html_center(i18n("模型切换"),'h3'))
|
695 |
+
with gr.Row():
|
696 |
+
GPT_dropdown = gr.Dropdown(label=i18n("GPT模型列表"), choices=sorted(GPT_names, key=custom_sort_key), value=gpt_path, interactive=True, scale=14)
|
697 |
+
SoVITS_dropdown = gr.Dropdown(label=i18n("SoVITS模型列表"), choices=sorted(SoVITS_names, key=custom_sort_key), value=sovits_path, interactive=True, scale=14)
|
698 |
+
refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary", scale=14)
|
699 |
+
refresh_button.click(fn=change_choices, inputs=[], outputs=[SoVITS_dropdown, GPT_dropdown])
|
700 |
+
gr.Markdown(html_center(i18n("*请上传并填写参考信息"),'h3'))
|
701 |
+
with gr.Row():
|
702 |
+
inp_ref = gr.Audio(label=i18n("请上传3~10秒内参考音频,超过会报错!"), type="filepath", scale=13)
|
703 |
+
with gr.Column(scale=13):
|
704 |
+
ref_text_free = gr.Checkbox(label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。"), value=False, interactive=True, show_label=True,scale=1)
|
705 |
+
gr.Markdown(html_left(i18n("使用无参考文本模式时建议使用微调的GPT,听不清参考音频说的啥(不晓得写啥)可以开。<br>开启后无视填写的参考文本。")))
|
706 |
+
prompt_text = gr.Textbox(label=i18n("参考音频的文本"), value="", lines=5, max_lines=5,scale=1)
|
707 |
+
with gr.Column(scale=14):
|
708 |
+
prompt_language = gr.Dropdown(
|
709 |
+
label=i18n("参考音频的语种"), choices=list(dict_language.keys()), value=i18n("中文"),
|
710 |
+
)
|
711 |
+
inp_refs = gr.File(label=i18n("可选项:通过拖拽多个文件上传多个参考音频(建议同性),平均融合他们的音色。如不填写此项,音色由左侧单个参考音频控制。如是微调模型,建议参考音频全部在微调训练集音色内,底模不用管。"),file_count="multiple")
|
712 |
+
gr.Markdown(html_center(i18n("*请填写需要合成的目标文本和语种模式"),'h3'))
|
713 |
+
with gr.Row():
|
714 |
+
with gr.Column(scale=13):
|
715 |
+
text = gr.Textbox(label=i18n("需要合成的文本"), value="", lines=26, max_lines=26)
|
716 |
+
with gr.Column(scale=7):
|
717 |
+
text_language = gr.Dropdown(
|
718 |
+
label=i18n("需要合成的语种")+i18n(".限制范围越小判别效果越好。"), choices=list(dict_language.keys()), value=i18n("中文"), scale=1
|
719 |
+
)
|
720 |
+
how_to_cut = gr.Dropdown(
|
721 |
+
label=i18n("怎么切"),
|
722 |
+
choices=[i18n("不切"), i18n("凑四句一切"), i18n("凑50字一切"), i18n("按中文句号。切"), i18n("按英文句号.切"), i18n("按标点符号切"), ],
|
723 |
+
value=i18n("凑四句一切"),
|
724 |
+
interactive=True, scale=1
|
725 |
+
)
|
726 |
+
gr.Markdown(value=html_center(i18n("语速调整,高为更快")))
|
727 |
+
if_freeze=gr.Checkbox(label=i18n("是否直接对上次合成结果调整语速和音色。防止随机性。"), value=False, interactive=True,show_label=True, scale=1)
|
728 |
+
speed = gr.Slider(minimum=0.6,maximum=1.65,step=0.05,label=i18n("语速"),value=1,interactive=True, scale=1)
|
729 |
+
gr.Markdown(html_center(i18n("GPT采样参数(无参考文本时不要太低。不懂就用默认):")))
|
730 |
+
top_k = gr.Slider(minimum=1,maximum=100,step=1,label=i18n("top_k"),value=15,interactive=True, scale=1)
|
731 |
+
top_p = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("top_p"),value=1,interactive=True, scale=1)
|
732 |
+
temperature = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("temperature"),value=1,interactive=True, scale=1)
|
733 |
+
# with gr.Column():
|
734 |
+
# gr.Markdown(value=i18n("手工调整音素。当音素框不为空时使用手工音素输入推理,无视目标文本框。"))
|
735 |
+
# phoneme=gr.Textbox(label=i18n("音素框"), value="")
|
736 |
+
# get_phoneme_button = gr.Button(i18n("目标文本转音素"), variant="primary")
|
737 |
+
with gr.Row():
|
738 |
+
inference_button = gr.Button(i18n("合成语音"), variant="primary", size='lg', scale=25)
|
739 |
+
output = gr.Audio(label=i18n("输出的语音"), scale=14)
|
740 |
+
|
741 |
+
inference_button.click(
|
742 |
+
get_tts_wav,
|
743 |
+
[inp_ref, prompt_text, prompt_language, text, text_language, how_to_cut, top_k, top_p, temperature, ref_text_free,speed,if_freeze,inp_refs],
|
744 |
+
[output],
|
745 |
+
)
|
746 |
+
SoVITS_dropdown.change(change_sovits_weights, [SoVITS_dropdown,prompt_language,text_language], [prompt_language,text_language,prompt_text,prompt_language,text,text_language])
|
747 |
+
GPT_dropdown.change(change_gpt_weights, [GPT_dropdown], [])
|
748 |
+
|
749 |
+
# gr.Markdown(value=i18n("文本切分工具。太长的文本合成出来效果不一定好,所以太长建议先切。合成会根据文本的换行分开合成再拼起来。"))
|
750 |
+
# with gr.Row():
|
751 |
+
# text_inp = gr.Textbox(label=i18n("需要合成的切分前文本"), value="")
|
752 |
+
# button1 = gr.Button(i18n("凑四句一切"), variant="primary")
|
753 |
+
# button2 = gr.Button(i18n("凑50字一切"), variant="primary")
|
754 |
+
# button3 = gr.Button(i18n("按中文句号。切"), variant="primary")
|
755 |
+
# button4 = gr.Button(i18n("按英文句号.切"), variant="primary")
|
756 |
+
# button5 = gr.Button(i18n("按标点符号切"), variant="primary")
|
757 |
+
# text_opt = gr.Textbox(label=i18n("切分后文本"), value="")
|
758 |
+
# button1.click(cut1, [text_inp], [text_opt])
|
759 |
+
# button2.click(cut2, [text_inp], [text_opt])
|
760 |
+
# button3.click(cut3, [text_inp], [text_opt])
|
761 |
+
# button4.click(cut4, [text_inp], [text_opt])
|
762 |
+
# button5.click(cut5, [text_inp], [text_opt])
|
763 |
+
# gr.Markdown(html_center(i18n("后续将支持转音素、手工修改音素、语音合成分步执行。")))
|
764 |
+
|
765 |
+
if __name__ == '__main__':
|
766 |
+
app.queue().launch(#concurrency_count=511, max_size=1022
|
767 |
+
server_name="0.0.0.0",
|
768 |
+
inbrowser=True,
|
769 |
+
share=is_share,
|
770 |
+
server_port=infer_ttswebui,
|
771 |
+
quiet=True,
|
772 |
+
)
|
GPT_SoVITS/inference_webui_fast.py
ADDED
@@ -0,0 +1,336 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'''
|
2 |
+
按中英混合识别
|
3 |
+
按日英混合识别
|
4 |
+
多语种启动切分识别语种
|
5 |
+
全部按中文识别
|
6 |
+
全部按英文识别
|
7 |
+
全部按日文识别
|
8 |
+
'''
|
9 |
+
import random
|
10 |
+
import os, re, logging
|
11 |
+
import sys
|
12 |
+
now_dir = os.getcwd()
|
13 |
+
sys.path.append(now_dir)
|
14 |
+
sys.path.append("%s/GPT_SoVITS" % (now_dir))
|
15 |
+
|
16 |
+
logging.getLogger("markdown_it").setLevel(logging.ERROR)
|
17 |
+
logging.getLogger("urllib3").setLevel(logging.ERROR)
|
18 |
+
logging.getLogger("httpcore").setLevel(logging.ERROR)
|
19 |
+
logging.getLogger("httpx").setLevel(logging.ERROR)
|
20 |
+
logging.getLogger("asyncio").setLevel(logging.ERROR)
|
21 |
+
logging.getLogger("charset_normalizer").setLevel(logging.ERROR)
|
22 |
+
logging.getLogger("torchaudio._extension").setLevel(logging.ERROR)
|
23 |
+
import pdb
|
24 |
+
import torch
|
25 |
+
|
26 |
+
try:
|
27 |
+
import gradio.analytics as analytics
|
28 |
+
analytics.version_check = lambda:None
|
29 |
+
except:...
|
30 |
+
|
31 |
+
|
32 |
+
infer_ttswebui = os.environ.get("infer_ttswebui", 9872)
|
33 |
+
infer_ttswebui = int(infer_ttswebui)
|
34 |
+
is_share = os.environ.get("is_share", "False")
|
35 |
+
is_share = eval(is_share)
|
36 |
+
if "_CUDA_VISIBLE_DEVICES" in os.environ:
|
37 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
|
38 |
+
|
39 |
+
is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
|
40 |
+
gpt_path = os.environ.get("gpt_path", None)
|
41 |
+
sovits_path = os.environ.get("sovits_path", None)
|
42 |
+
cnhubert_base_path = os.environ.get("cnhubert_base_path", None)
|
43 |
+
bert_path = os.environ.get("bert_path", None)
|
44 |
+
version=os.environ.get("version","v2")
|
45 |
+
|
46 |
+
import gradio as gr
|
47 |
+
from TTS_infer_pack.TTS import TTS, TTS_Config
|
48 |
+
from TTS_infer_pack.text_segmentation_method import get_method
|
49 |
+
from tools.i18n.i18n import I18nAuto, scan_language_list
|
50 |
+
|
51 |
+
language=os.environ.get("language","Auto")
|
52 |
+
language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
|
53 |
+
i18n = I18nAuto(language=language)
|
54 |
+
|
55 |
+
|
56 |
+
# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 确保直接启动推理UI时也能够设置。
|
57 |
+
|
58 |
+
if torch.cuda.is_available():
|
59 |
+
device = "cuda"
|
60 |
+
# elif torch.backends.mps.is_available():
|
61 |
+
# device = "mps"
|
62 |
+
else:
|
63 |
+
device = "cpu"
|
64 |
+
|
65 |
+
dict_language_v1 = {
|
66 |
+
i18n("中文"): "all_zh",#全部按中文识别
|
67 |
+
i18n("英文"): "en",#全部按英文识别#######不变
|
68 |
+
i18n("日文"): "all_ja",#全部按日文识别
|
69 |
+
i18n("中英混合"): "zh",#按中英混合识别####不变
|
70 |
+
i18n("日英混合"): "ja",#按日英混合识别####不变
|
71 |
+
i18n("多语种混合"): "auto",#多语种启动切分识别语种
|
72 |
+
}
|
73 |
+
dict_language_v2 = {
|
74 |
+
i18n("中文"): "all_zh",#全部按中文识别
|
75 |
+
i18n("英文"): "en",#全部按英文识别#######不变
|
76 |
+
i18n("日文"): "all_ja",#全部按日文识别
|
77 |
+
i18n("粤语"): "all_yue",#全部按中文识别
|
78 |
+
i18n("韩文"): "all_ko",#全部按韩文识别
|
79 |
+
i18n("中英混合"): "zh",#按中英混合识别####不变
|
80 |
+
i18n("日英混合"): "ja",#按日英混合识别####不变
|
81 |
+
i18n("粤英混合"): "yue",#按粤英混合识别####不变
|
82 |
+
i18n("韩英混合"): "ko",#按韩英混合识别####不变
|
83 |
+
i18n("多语种混合"): "auto",#多语种启动切分识别语种
|
84 |
+
i18n("多语种混合(粤语)"): "auto_yue",#多语种启动切分识别语种
|
85 |
+
}
|
86 |
+
dict_language = dict_language_v1 if version =='v1' else dict_language_v2
|
87 |
+
|
88 |
+
cut_method = {
|
89 |
+
i18n("不切"):"cut0",
|
90 |
+
i18n("凑四句一切"): "cut1",
|
91 |
+
i18n("凑50字一切"): "cut2",
|
92 |
+
i18n("按中文句号。切"): "cut3",
|
93 |
+
i18n("按英文句号.切"): "cut4",
|
94 |
+
i18n("按标点符号切"): "cut5",
|
95 |
+
}
|
96 |
+
|
97 |
+
tts_config = TTS_Config("GPT_SoVITS/configs/tts_infer.yaml")
|
98 |
+
tts_config.device = device
|
99 |
+
tts_config.is_half = is_half
|
100 |
+
tts_config.version = version
|
101 |
+
if gpt_path is not None:
|
102 |
+
tts_config.t2s_weights_path = gpt_path
|
103 |
+
if sovits_path is not None:
|
104 |
+
tts_config.vits_weights_path = sovits_path
|
105 |
+
if cnhubert_base_path is not None:
|
106 |
+
tts_config.cnhuhbert_base_path = cnhubert_base_path
|
107 |
+
if bert_path is not None:
|
108 |
+
tts_config.bert_base_path = bert_path
|
109 |
+
|
110 |
+
print(tts_config)
|
111 |
+
tts_pipeline = TTS(tts_config)
|
112 |
+
gpt_path = tts_config.t2s_weights_path
|
113 |
+
sovits_path = tts_config.vits_weights_path
|
114 |
+
version = tts_config.version
|
115 |
+
|
116 |
+
def inference(text, text_lang,
|
117 |
+
ref_audio_path,
|
118 |
+
aux_ref_audio_paths,
|
119 |
+
prompt_text,
|
120 |
+
prompt_lang, top_k,
|
121 |
+
top_p, temperature,
|
122 |
+
text_split_method, batch_size,
|
123 |
+
speed_factor, ref_text_free,
|
124 |
+
split_bucket,fragment_interval,
|
125 |
+
seed, keep_random, parallel_infer,
|
126 |
+
repetition_penalty
|
127 |
+
):
|
128 |
+
|
129 |
+
seed = -1 if keep_random else seed
|
130 |
+
actual_seed = seed if seed not in [-1, "", None] else random.randrange(1 << 32)
|
131 |
+
inputs={
|
132 |
+
"text": text,
|
133 |
+
"text_lang": dict_language[text_lang],
|
134 |
+
"ref_audio_path": ref_audio_path,
|
135 |
+
"aux_ref_audio_paths": [item.name for item in aux_ref_audio_paths] if aux_ref_audio_paths is not None else [],
|
136 |
+
"prompt_text": prompt_text if not ref_text_free else "",
|
137 |
+
"prompt_lang": dict_language[prompt_lang],
|
138 |
+
"top_k": top_k,
|
139 |
+
"top_p": top_p,
|
140 |
+
"temperature": temperature,
|
141 |
+
"text_split_method": cut_method[text_split_method],
|
142 |
+
"batch_size":int(batch_size),
|
143 |
+
"speed_factor":float(speed_factor),
|
144 |
+
"split_bucket":split_bucket,
|
145 |
+
"return_fragment":False,
|
146 |
+
"fragment_interval":fragment_interval,
|
147 |
+
"seed":actual_seed,
|
148 |
+
"parallel_infer": parallel_infer,
|
149 |
+
"repetition_penalty": repetition_penalty,
|
150 |
+
}
|
151 |
+
for item in tts_pipeline.run(inputs):
|
152 |
+
yield item, actual_seed
|
153 |
+
|
154 |
+
def custom_sort_key(s):
|
155 |
+
# 使用正则表达式提取字符串中的数字部分和非数字部分
|
156 |
+
parts = re.split('(\d+)', s)
|
157 |
+
# 将数字部分转换为整数,非数字部分保持不变
|
158 |
+
parts = [int(part) if part.isdigit() else part for part in parts]
|
159 |
+
return parts
|
160 |
+
|
161 |
+
|
162 |
+
def change_choices():
|
163 |
+
SoVITS_names, GPT_names = get_weights_names(GPT_weight_root, SoVITS_weight_root)
|
164 |
+
return {"choices": sorted(SoVITS_names, key=custom_sort_key), "__type__": "update"}, {"choices": sorted(GPT_names, key=custom_sort_key), "__type__": "update"}
|
165 |
+
|
166 |
+
|
167 |
+
pretrained_sovits_name=["GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth", "GPT_SoVITS/pretrained_models/s2G488k.pth"]
|
168 |
+
pretrained_gpt_name=["GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt", "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"]
|
169 |
+
_ =[[],[]]
|
170 |
+
for i in range(2):
|
171 |
+
if os.path.exists(pretrained_gpt_name[i]):
|
172 |
+
_[0].append(pretrained_gpt_name[i])
|
173 |
+
if os.path.exists(pretrained_sovits_name[i]):
|
174 |
+
_[-1].append(pretrained_sovits_name[i])
|
175 |
+
pretrained_gpt_name,pretrained_sovits_name = _
|
176 |
+
|
177 |
+
SoVITS_weight_root=["SoVITS_weights_v2","SoVITS_weights"]
|
178 |
+
GPT_weight_root=["GPT_weights_v2","GPT_weights"]
|
179 |
+
for path in SoVITS_weight_root+GPT_weight_root:
|
180 |
+
os.makedirs(path,exist_ok=True)
|
181 |
+
|
182 |
+
def get_weights_names(GPT_weight_root, SoVITS_weight_root):
|
183 |
+
SoVITS_names = [i for i in pretrained_sovits_name]
|
184 |
+
for path in SoVITS_weight_root:
|
185 |
+
for name in os.listdir(path):
|
186 |
+
if name.endswith(".pth"): SoVITS_names.append("%s/%s" % (path, name))
|
187 |
+
GPT_names = [i for i in pretrained_gpt_name]
|
188 |
+
for path in GPT_weight_root:
|
189 |
+
for name in os.listdir(path):
|
190 |
+
if name.endswith(".ckpt"): GPT_names.append("%s/%s" % (path, name))
|
191 |
+
return SoVITS_names, GPT_names
|
192 |
+
|
193 |
+
|
194 |
+
SoVITS_names, GPT_names = get_weights_names(GPT_weight_root, SoVITS_weight_root)
|
195 |
+
|
196 |
+
|
197 |
+
|
198 |
+
def change_sovits_weights(sovits_path,prompt_language=None,text_language=None):
|
199 |
+
tts_pipeline.init_vits_weights(sovits_path)
|
200 |
+
global version, dict_language
|
201 |
+
dict_language = dict_language_v1 if tts_pipeline.configs.version =='v1' else dict_language_v2
|
202 |
+
if prompt_language is not None and text_language is not None:
|
203 |
+
if prompt_language in list(dict_language.keys()):
|
204 |
+
prompt_text_update, prompt_language_update = {'__type__':'update'}, {'__type__':'update', 'value':prompt_language}
|
205 |
+
else:
|
206 |
+
prompt_text_update = {'__type__':'update', 'value':''}
|
207 |
+
prompt_language_update = {'__type__':'update', 'value':i18n("中文")}
|
208 |
+
if text_language in list(dict_language.keys()):
|
209 |
+
text_update, text_language_update = {'__type__':'update'}, {'__type__':'update', 'value':text_language}
|
210 |
+
else:
|
211 |
+
text_update = {'__type__':'update', 'value':''}
|
212 |
+
text_language_update = {'__type__':'update', 'value':i18n("中文")}
|
213 |
+
return {'__type__':'update', 'choices':list(dict_language.keys())}, {'__type__':'update', 'choices':list(dict_language.keys())}, prompt_text_update, prompt_language_update, text_update, text_language_update
|
214 |
+
|
215 |
+
|
216 |
+
|
217 |
+
with gr.Blocks(title="GPT-SoVITS WebUI") as app:
|
218 |
+
gr.Markdown(
|
219 |
+
value=i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>LICENSE</b>.")
|
220 |
+
)
|
221 |
+
|
222 |
+
with gr.Column():
|
223 |
+
# with gr.Group():
|
224 |
+
gr.Markdown(value=i18n("模型切换"))
|
225 |
+
with gr.Row():
|
226 |
+
GPT_dropdown = gr.Dropdown(label=i18n("GPT模型列表"), choices=sorted(GPT_names, key=custom_sort_key), value=gpt_path, interactive=True)
|
227 |
+
SoVITS_dropdown = gr.Dropdown(label=i18n("SoVITS模型列表"), choices=sorted(SoVITS_names, key=custom_sort_key), value=sovits_path, interactive=True)
|
228 |
+
refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary")
|
229 |
+
refresh_button.click(fn=change_choices, inputs=[], outputs=[SoVITS_dropdown, GPT_dropdown])
|
230 |
+
|
231 |
+
|
232 |
+
with gr.Row():
|
233 |
+
with gr.Column():
|
234 |
+
gr.Markdown(value=i18n("*请上传并填写参考信息"))
|
235 |
+
with gr.Row():
|
236 |
+
inp_ref = gr.Audio(label=i18n("主参考音频(请上传3~10秒内参考音频,超过会报错!)"), type="filepath")
|
237 |
+
inp_refs = gr.File(label=i18n("辅参考音频(可选多个,或不选)"),file_count="multiple")
|
238 |
+
prompt_text = gr.Textbox(label=i18n("主参考音频的文本"), value="", lines=2)
|
239 |
+
with gr.Row():
|
240 |
+
prompt_language = gr.Dropdown(
|
241 |
+
label=i18n("主参考音频的语种"), choices=list(dict_language.keys()), value=i18n("中文")
|
242 |
+
)
|
243 |
+
with gr.Column():
|
244 |
+
ref_text_free = gr.Checkbox(label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。"), value=False, interactive=True, show_label=True)
|
245 |
+
gr.Markdown(i18n("使用无参考文本模式时建议使用微调的GPT,听不清参考音频说的啥(不晓得写啥)可以开,开启后无视填写的参考文本。"))
|
246 |
+
|
247 |
+
with gr.Column():
|
248 |
+
gr.Markdown(value=i18n("*请填写需要合成的目标文本和语种模式"))
|
249 |
+
text = gr.Textbox(label=i18n("需要合成的文本"), value="", lines=20, max_lines=20)
|
250 |
+
text_language = gr.Dropdown(
|
251 |
+
label=i18n("需要合成的文本的语种"), choices=list(dict_language.keys()), value=i18n("中文")
|
252 |
+
)
|
253 |
+
|
254 |
+
|
255 |
+
with gr.Group():
|
256 |
+
gr.Markdown(value=i18n("推理设置"))
|
257 |
+
with gr.Row():
|
258 |
+
|
259 |
+
with gr.Column():
|
260 |
+
batch_size = gr.Slider(minimum=1,maximum=200,step=1,label=i18n("batch_size"),value=20,interactive=True)
|
261 |
+
fragment_interval = gr.Slider(minimum=0.01,maximum=1,step=0.01,label=i18n("分段间隔(秒)"),value=0.3,interactive=True)
|
262 |
+
speed_factor = gr.Slider(minimum=0.6,maximum=1.65,step=0.05,label="speed_factor",value=1.0,interactive=True)
|
263 |
+
top_k = gr.Slider(minimum=1,maximum=100,step=1,label=i18n("top_k"),value=5,interactive=True)
|
264 |
+
top_p = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("top_p"),value=1,interactive=True)
|
265 |
+
temperature = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("temperature"),value=1,interactive=True)
|
266 |
+
repetition_penalty = gr.Slider(minimum=0,maximum=2,step=0.05,label=i18n("重复惩罚"),value=1.35,interactive=True)
|
267 |
+
with gr.Column():
|
268 |
+
with gr.Row():
|
269 |
+
how_to_cut = gr.Dropdown(
|
270 |
+
label=i18n("怎么切"),
|
271 |
+
choices=[i18n("不切"), i18n("凑四句一切"), i18n("凑50字一切"), i18n("按中文句号。切"), i18n("按英文句号.切"), i18n("按标点符号切"), ],
|
272 |
+
value=i18n("凑四句一切"),
|
273 |
+
interactive=True, scale=1
|
274 |
+
)
|
275 |
+
parallel_infer = gr.Checkbox(label=i18n("并行推理"), value=True, interactive=True, show_label=True)
|
276 |
+
split_bucket = gr.Checkbox(label=i18n("数据分桶(并行推理时会降低一点计算量)"), value=True, interactive=True, show_label=True)
|
277 |
+
|
278 |
+
with gr.Row():
|
279 |
+
seed = gr.Number(label=i18n("随机种子"),value=-1)
|
280 |
+
keep_random = gr.Checkbox(label=i18n("保持随机"), value=True, interactive=True, show_label=True)
|
281 |
+
|
282 |
+
output = gr.Audio(label=i18n("输出的语音"))
|
283 |
+
with gr.Row():
|
284 |
+
inference_button = gr.Button(i18n("合成语音"), variant="primary")
|
285 |
+
stop_infer = gr.Button(i18n("终止合成"), variant="primary")
|
286 |
+
|
287 |
+
|
288 |
+
inference_button.click(
|
289 |
+
inference,
|
290 |
+
[
|
291 |
+
text,text_language, inp_ref, inp_refs,
|
292 |
+
prompt_text, prompt_language,
|
293 |
+
top_k, top_p, temperature,
|
294 |
+
how_to_cut, batch_size,
|
295 |
+
speed_factor, ref_text_free,
|
296 |
+
split_bucket,fragment_interval,
|
297 |
+
seed, keep_random, parallel_infer,
|
298 |
+
repetition_penalty
|
299 |
+
],
|
300 |
+
[output, seed],
|
301 |
+
)
|
302 |
+
stop_infer.click(tts_pipeline.stop, [], [])
|
303 |
+
SoVITS_dropdown.change(change_sovits_weights, [SoVITS_dropdown,prompt_language,text_language], [prompt_language,text_language,prompt_text,prompt_language,text,text_language])
|
304 |
+
GPT_dropdown.change(tts_pipeline.init_t2s_weights, [GPT_dropdown], [])
|
305 |
+
|
306 |
+
with gr.Group():
|
307 |
+
gr.Markdown(value=i18n("文本切分工具。太长的文本合成出来效果不一定好,所以太长建议先切。合成会根据文本的换行分开合成再拼起来。"))
|
308 |
+
with gr.Row():
|
309 |
+
text_inp = gr.Textbox(label=i18n("需要合成的切分前文本"), value="", lines=4)
|
310 |
+
with gr.Column():
|
311 |
+
_how_to_cut = gr.Radio(
|
312 |
+
label=i18n("怎么切"),
|
313 |
+
choices=[i18n("不切"), i18n("凑四句一切"), i18n("凑50字一切"), i18n("按中文句号。切"), i18n("按英文句号.切"), i18n("按标点符号切"), ],
|
314 |
+
value=i18n("凑四句一切"),
|
315 |
+
interactive=True,
|
316 |
+
)
|
317 |
+
cut_text= gr.Button(i18n("切分"), variant="primary")
|
318 |
+
|
319 |
+
def to_cut(text_inp, how_to_cut):
|
320 |
+
if len(text_inp.strip()) == 0 or text_inp==[]:
|
321 |
+
return ""
|
322 |
+
method = get_method(cut_method[how_to_cut])
|
323 |
+
return method(text_inp)
|
324 |
+
|
325 |
+
text_opt = gr.Textbox(label=i18n("切分后文本"), value="", lines=4)
|
326 |
+
cut_text.click(to_cut, [text_inp, _how_to_cut], [text_opt])
|
327 |
+
gr.Markdown(value=i18n("后续将支持转音素、手工修改音素、语音合成分步执行。"))
|
328 |
+
|
329 |
+
if __name__ == '__main__':
|
330 |
+
app.queue().launch(#concurrency_count=511, max_size=1022
|
331 |
+
server_name="0.0.0.0",
|
332 |
+
inbrowser=True,
|
333 |
+
share=is_share,
|
334 |
+
server_port=infer_ttswebui,
|
335 |
+
quiet=True,
|
336 |
+
)
|
GPT_SoVITS/onnx_export.py
ADDED
@@ -0,0 +1,344 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from module.models_onnx import SynthesizerTrn, symbols_v1, symbols_v2
|
2 |
+
from AR.models.t2s_lightning_module_onnx import Text2SemanticLightningModule
|
3 |
+
import torch
|
4 |
+
import torchaudio
|
5 |
+
from torch import nn
|
6 |
+
from feature_extractor import cnhubert
|
7 |
+
|
8 |
+
cnhubert_base_path = "GPT_SoVITS/pretrained_models/chinese-hubert-base"
|
9 |
+
cnhubert.cnhubert_base_path = cnhubert_base_path
|
10 |
+
ssl_model = cnhubert.get_model()
|
11 |
+
from text import cleaned_text_to_sequence
|
12 |
+
import soundfile
|
13 |
+
from tools.my_utils import load_audio
|
14 |
+
import os
|
15 |
+
import json
|
16 |
+
|
17 |
+
def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False):
|
18 |
+
hann_window = torch.hann_window(win_size).to(
|
19 |
+
dtype=y.dtype, device=y.device
|
20 |
+
)
|
21 |
+
y = torch.nn.functional.pad(
|
22 |
+
y.unsqueeze(1),
|
23 |
+
(int((n_fft - hop_size) / 2), int((n_fft - hop_size) / 2)),
|
24 |
+
mode="reflect",
|
25 |
+
)
|
26 |
+
y = y.squeeze(1)
|
27 |
+
spec = torch.stft(
|
28 |
+
y,
|
29 |
+
n_fft,
|
30 |
+
hop_length=hop_size,
|
31 |
+
win_length=win_size,
|
32 |
+
window=hann_window,
|
33 |
+
center=center,
|
34 |
+
pad_mode="reflect",
|
35 |
+
normalized=False,
|
36 |
+
onesided=True,
|
37 |
+
return_complex=False,
|
38 |
+
)
|
39 |
+
spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6)
|
40 |
+
return spec
|
41 |
+
|
42 |
+
|
43 |
+
class DictToAttrRecursive(dict):
|
44 |
+
def __init__(self, input_dict):
|
45 |
+
super().__init__(input_dict)
|
46 |
+
for key, value in input_dict.items():
|
47 |
+
if isinstance(value, dict):
|
48 |
+
value = DictToAttrRecursive(value)
|
49 |
+
self[key] = value
|
50 |
+
setattr(self, key, value)
|
51 |
+
|
52 |
+
def __getattr__(self, item):
|
53 |
+
try:
|
54 |
+
return self[item]
|
55 |
+
except KeyError:
|
56 |
+
raise AttributeError(f"Attribute {item} not found")
|
57 |
+
|
58 |
+
def __setattr__(self, key, value):
|
59 |
+
if isinstance(value, dict):
|
60 |
+
value = DictToAttrRecursive(value)
|
61 |
+
super(DictToAttrRecursive, self).__setitem__(key, value)
|
62 |
+
super().__setattr__(key, value)
|
63 |
+
|
64 |
+
def __delattr__(self, item):
|
65 |
+
try:
|
66 |
+
del self[item]
|
67 |
+
except KeyError:
|
68 |
+
raise AttributeError(f"Attribute {item} not found")
|
69 |
+
|
70 |
+
|
71 |
+
class T2SEncoder(nn.Module):
|
72 |
+
def __init__(self, t2s, vits):
|
73 |
+
super().__init__()
|
74 |
+
self.encoder = t2s.onnx_encoder
|
75 |
+
self.vits = vits
|
76 |
+
|
77 |
+
def forward(self, ref_seq, text_seq, ref_bert, text_bert, ssl_content):
|
78 |
+
codes = self.vits.extract_latent(ssl_content)
|
79 |
+
prompt_semantic = codes[0, 0]
|
80 |
+
bert = torch.cat([ref_bert.transpose(0, 1), text_bert.transpose(0, 1)], 1)
|
81 |
+
all_phoneme_ids = torch.cat([ref_seq, text_seq], 1)
|
82 |
+
bert = bert.unsqueeze(0)
|
83 |
+
prompt = prompt_semantic.unsqueeze(0)
|
84 |
+
return self.encoder(all_phoneme_ids, bert), prompt
|
85 |
+
|
86 |
+
|
87 |
+
class T2SModel(nn.Module):
|
88 |
+
def __init__(self, t2s_path, vits_model):
|
89 |
+
super().__init__()
|
90 |
+
dict_s1 = torch.load(t2s_path, map_location="cpu")
|
91 |
+
self.config = dict_s1["config"]
|
92 |
+
self.t2s_model = Text2SemanticLightningModule(self.config, "ojbk", is_train=False)
|
93 |
+
self.t2s_model.load_state_dict(dict_s1["weight"])
|
94 |
+
self.t2s_model.eval()
|
95 |
+
self.vits_model = vits_model.vq_model
|
96 |
+
self.hz = 50
|
97 |
+
self.max_sec = self.config["data"]["max_sec"]
|
98 |
+
self.t2s_model.model.top_k = torch.LongTensor([self.config["inference"]["top_k"]])
|
99 |
+
self.t2s_model.model.early_stop_num = torch.LongTensor([self.hz * self.max_sec])
|
100 |
+
self.t2s_model = self.t2s_model.model
|
101 |
+
self.t2s_model.init_onnx()
|
102 |
+
self.onnx_encoder = T2SEncoder(self.t2s_model, self.vits_model)
|
103 |
+
self.first_stage_decoder = self.t2s_model.first_stage_decoder
|
104 |
+
self.stage_decoder = self.t2s_model.stage_decoder
|
105 |
+
#self.t2s_model = torch.jit.script(self.t2s_model)
|
106 |
+
|
107 |
+
def forward(self, ref_seq, text_seq, ref_bert, text_bert, ssl_content):
|
108 |
+
early_stop_num = self.t2s_model.early_stop_num
|
109 |
+
|
110 |
+
#[1,N] [1,N] [N, 1024] [N, 1024] [1, 768, N]
|
111 |
+
x, prompts = self.onnx_encoder(ref_seq, text_seq, ref_bert, text_bert, ssl_content)
|
112 |
+
|
113 |
+
prefix_len = prompts.shape[1]
|
114 |
+
|
115 |
+
#[1,N,512] [1,N]
|
116 |
+
y, k, v, y_emb, x_example = self.first_stage_decoder(x, prompts)
|
117 |
+
|
118 |
+
stop = False
|
119 |
+
for idx in range(1, 1500):
|
120 |
+
#[1, N] [N_layer, N, 1, 512] [N_layer, N, 1, 512] [1, N, 512] [1] [1, N, 512] [1, N]
|
121 |
+
enco = self.stage_decoder(y, k, v, y_emb, x_example)
|
122 |
+
y, k, v, y_emb, logits, samples = enco
|
123 |
+
if early_stop_num != -1 and (y.shape[1] - prefix_len) > early_stop_num:
|
124 |
+
stop = True
|
125 |
+
if torch.argmax(logits, dim=-1)[0] == self.t2s_model.EOS or samples[0, 0] == self.t2s_model.EOS:
|
126 |
+
stop = True
|
127 |
+
if stop:
|
128 |
+
break
|
129 |
+
y[0, -1] = 0
|
130 |
+
|
131 |
+
return y[:, -idx:].unsqueeze(0)
|
132 |
+
|
133 |
+
def export(self, ref_seq, text_seq, ref_bert, text_bert, ssl_content, project_name, dynamo=False):
|
134 |
+
#self.onnx_encoder = torch.jit.script(self.onnx_encoder)
|
135 |
+
if dynamo:
|
136 |
+
export_options = torch.onnx.ExportOptions(dynamic_shapes=True)
|
137 |
+
onnx_encoder_export_output = torch.onnx.dynamo_export(
|
138 |
+
self.onnx_encoder,
|
139 |
+
(ref_seq, text_seq, ref_bert, text_bert, ssl_content),
|
140 |
+
export_options=export_options
|
141 |
+
)
|
142 |
+
onnx_encoder_export_output.save(f"onnx/{project_name}/{project_name}_t2s_encoder.onnx")
|
143 |
+
return
|
144 |
+
|
145 |
+
torch.onnx.export(
|
146 |
+
self.onnx_encoder,
|
147 |
+
(ref_seq, text_seq, ref_bert, text_bert, ssl_content),
|
148 |
+
f"onnx/{project_name}/{project_name}_t2s_encoder.onnx",
|
149 |
+
input_names=["ref_seq", "text_seq", "ref_bert", "text_bert", "ssl_content"],
|
150 |
+
output_names=["x", "prompts"],
|
151 |
+
dynamic_axes={
|
152 |
+
"ref_seq": {1 : "ref_length"},
|
153 |
+
"text_seq": {1 : "text_length"},
|
154 |
+
"ref_bert": {0 : "ref_length"},
|
155 |
+
"text_bert": {0 : "text_length"},
|
156 |
+
"ssl_content": {2 : "ssl_length"},
|
157 |
+
},
|
158 |
+
opset_version=16
|
159 |
+
)
|
160 |
+
x, prompts = self.onnx_encoder(ref_seq, text_seq, ref_bert, text_bert, ssl_content)
|
161 |
+
|
162 |
+
torch.onnx.export(
|
163 |
+
self.first_stage_decoder,
|
164 |
+
(x, prompts),
|
165 |
+
f"onnx/{project_name}/{project_name}_t2s_fsdec.onnx",
|
166 |
+
input_names=["x", "prompts"],
|
167 |
+
output_names=["y", "k", "v", "y_emb", "x_example"],
|
168 |
+
dynamic_axes={
|
169 |
+
"x": {1 : "x_length"},
|
170 |
+
"prompts": {1 : "prompts_length"},
|
171 |
+
},
|
172 |
+
verbose=False,
|
173 |
+
opset_version=16
|
174 |
+
)
|
175 |
+
y, k, v, y_emb, x_example = self.first_stage_decoder(x, prompts)
|
176 |
+
|
177 |
+
torch.onnx.export(
|
178 |
+
self.stage_decoder,
|
179 |
+
(y, k, v, y_emb, x_example),
|
180 |
+
f"onnx/{project_name}/{project_name}_t2s_sdec.onnx",
|
181 |
+
input_names=["iy", "ik", "iv", "iy_emb", "ix_example"],
|
182 |
+
output_names=["y", "k", "v", "y_emb", "logits", "samples"],
|
183 |
+
dynamic_axes={
|
184 |
+
"iy": {1 : "iy_length"},
|
185 |
+
"ik": {1 : "ik_length"},
|
186 |
+
"iv": {1 : "iv_length"},
|
187 |
+
"iy_emb": {1 : "iy_emb_length"},
|
188 |
+
"ix_example": {1 : "ix_example_length"},
|
189 |
+
},
|
190 |
+
verbose=False,
|
191 |
+
opset_version=16
|
192 |
+
)
|
193 |
+
|
194 |
+
|
195 |
+
class VitsModel(nn.Module):
|
196 |
+
def __init__(self, vits_path):
|
197 |
+
super().__init__()
|
198 |
+
dict_s2 = torch.load(vits_path,map_location="cpu")
|
199 |
+
self.hps = dict_s2["config"]
|
200 |
+
if dict_s2['weight']['enc_p.text_embedding.weight'].shape[0] == 322:
|
201 |
+
self.hps["model"]["version"] = "v1"
|
202 |
+
else:
|
203 |
+
self.hps["model"]["version"] = "v2"
|
204 |
+
|
205 |
+
self.hps = DictToAttrRecursive(self.hps)
|
206 |
+
self.hps.model.semantic_frame_rate = "25hz"
|
207 |
+
self.vq_model = SynthesizerTrn(
|
208 |
+
self.hps.data.filter_length // 2 + 1,
|
209 |
+
self.hps.train.segment_size // self.hps.data.hop_length,
|
210 |
+
n_speakers=self.hps.data.n_speakers,
|
211 |
+
**self.hps.model
|
212 |
+
)
|
213 |
+
self.vq_model.eval()
|
214 |
+
self.vq_model.load_state_dict(dict_s2["weight"], strict=False)
|
215 |
+
|
216 |
+
def forward(self, text_seq, pred_semantic, ref_audio):
|
217 |
+
refer = spectrogram_torch(
|
218 |
+
ref_audio,
|
219 |
+
self.hps.data.filter_length,
|
220 |
+
self.hps.data.sampling_rate,
|
221 |
+
self.hps.data.hop_length,
|
222 |
+
self.hps.data.win_length,
|
223 |
+
center=False
|
224 |
+
)
|
225 |
+
return self.vq_model(pred_semantic, text_seq, refer)[0, 0]
|
226 |
+
|
227 |
+
|
228 |
+
class GptSoVits(nn.Module):
|
229 |
+
def __init__(self, vits, t2s):
|
230 |
+
super().__init__()
|
231 |
+
self.vits = vits
|
232 |
+
self.t2s = t2s
|
233 |
+
|
234 |
+
def forward(self, ref_seq, text_seq, ref_bert, text_bert, ref_audio, ssl_content, debug=False):
|
235 |
+
pred_semantic = self.t2s(ref_seq, text_seq, ref_bert, text_bert, ssl_content)
|
236 |
+
audio = self.vits(text_seq, pred_semantic, ref_audio)
|
237 |
+
if debug:
|
238 |
+
import onnxruntime
|
239 |
+
sess = onnxruntime.InferenceSession("onnx/koharu/koharu_vits.onnx", providers=["CPU"])
|
240 |
+
audio1 = sess.run(None, {
|
241 |
+
"text_seq" : text_seq.detach().cpu().numpy(),
|
242 |
+
"pred_semantic" : pred_semantic.detach().cpu().numpy(),
|
243 |
+
"ref_audio" : ref_audio.detach().cpu().numpy()
|
244 |
+
})
|
245 |
+
return audio, audio1
|
246 |
+
return audio
|
247 |
+
|
248 |
+
def export(self, ref_seq, text_seq, ref_bert, text_bert, ref_audio, ssl_content, project_name):
|
249 |
+
self.t2s.export(ref_seq, text_seq, ref_bert, text_bert, ssl_content, project_name)
|
250 |
+
pred_semantic = self.t2s(ref_seq, text_seq, ref_bert, text_bert, ssl_content)
|
251 |
+
torch.onnx.export(
|
252 |
+
self.vits,
|
253 |
+
(text_seq, pred_semantic, ref_audio),
|
254 |
+
f"onnx/{project_name}/{project_name}_vits.onnx",
|
255 |
+
input_names=["text_seq", "pred_semantic", "ref_audio"],
|
256 |
+
output_names=["audio"],
|
257 |
+
dynamic_axes={
|
258 |
+
"text_seq": {1 : "text_length"},
|
259 |
+
"pred_semantic": {2 : "pred_length"},
|
260 |
+
"ref_audio": {1 : "audio_length"},
|
261 |
+
},
|
262 |
+
opset_version=17,
|
263 |
+
verbose=False
|
264 |
+
)
|
265 |
+
|
266 |
+
|
267 |
+
class SSLModel(nn.Module):
|
268 |
+
def __init__(self):
|
269 |
+
super().__init__()
|
270 |
+
self.ssl = ssl_model
|
271 |
+
|
272 |
+
def forward(self, ref_audio_16k):
|
273 |
+
return self.ssl.model(ref_audio_16k)["last_hidden_state"].transpose(1, 2)
|
274 |
+
|
275 |
+
|
276 |
+
def export(vits_path, gpt_path, project_name, vits_model="v2"):
|
277 |
+
vits = VitsModel(vits_path)
|
278 |
+
gpt = T2SModel(gpt_path, vits)
|
279 |
+
gpt_sovits = GptSoVits(vits, gpt)
|
280 |
+
ssl = SSLModel()
|
281 |
+
ref_seq = torch.LongTensor([cleaned_text_to_sequence(["n", "i2", "h", "ao3", ",", "w", "o3", "sh", "i4", "b", "ai2", "y", "e4"],version=vits_model)])
|
282 |
+
text_seq = torch.LongTensor([cleaned_text_to_sequence(["w", "o3", "sh", "i4", "b", "ai2", "y", "e4", "w", "o3", "sh", "i4", "b", "ai2", "y", "e4", "w", "o3", "sh", "i4", "b", "ai2", "y", "e4"],version=vits_model)])
|
283 |
+
ref_bert = torch.randn((ref_seq.shape[1], 1024)).float()
|
284 |
+
text_bert = torch.randn((text_seq.shape[1], 1024)).float()
|
285 |
+
ref_audio = torch.randn((1, 48000 * 5)).float()
|
286 |
+
# ref_audio = torch.tensor([load_audio("rec.wav", 48000)]).float()
|
287 |
+
ref_audio_16k = torchaudio.functional.resample(ref_audio,48000,16000).float()
|
288 |
+
ref_audio_sr = torchaudio.functional.resample(ref_audio,48000,vits.hps.data.sampling_rate).float()
|
289 |
+
|
290 |
+
try:
|
291 |
+
os.mkdir(f"onnx/{project_name}")
|
292 |
+
except:
|
293 |
+
pass
|
294 |
+
|
295 |
+
ssl_content = ssl(ref_audio_16k).float()
|
296 |
+
|
297 |
+
# debug = False
|
298 |
+
debug = True
|
299 |
+
|
300 |
+
# gpt_sovits.export(ref_seq, text_seq, ref_bert, text_bert, ref_audio_sr, ssl_content, project_name)
|
301 |
+
|
302 |
+
if debug:
|
303 |
+
a, b = gpt_sovits(ref_seq, text_seq, ref_bert, text_bert, ref_audio_sr, ssl_content, debug=debug)
|
304 |
+
soundfile.write("out1.wav", a.cpu().detach().numpy(), vits.hps.data.sampling_rate)
|
305 |
+
soundfile.write("out2.wav", b[0], vits.hps.data.sampling_rate)
|
306 |
+
else:
|
307 |
+
a = gpt_sovits(ref_seq, text_seq, ref_bert, text_bert, ref_audio_sr, ssl_content).detach().cpu().numpy()
|
308 |
+
soundfile.write("out.wav", a, vits.hps.data.sampling_rate)
|
309 |
+
|
310 |
+
if vits_model == "v1":
|
311 |
+
symbols = symbols_v1
|
312 |
+
else:
|
313 |
+
symbols = symbols_v2
|
314 |
+
|
315 |
+
MoeVSConf = {
|
316 |
+
"Folder": f"{project_name}",
|
317 |
+
"Name": f"{project_name}",
|
318 |
+
"Type": "GPT-SoVits",
|
319 |
+
"Rate": vits.hps.data.sampling_rate,
|
320 |
+
"NumLayers": gpt.t2s_model.num_layers,
|
321 |
+
"EmbeddingDim": gpt.t2s_model.embedding_dim,
|
322 |
+
"Dict": "BasicDict",
|
323 |
+
"BertPath": "chinese-roberta-wwm-ext-large",
|
324 |
+
# "Symbol": symbols,
|
325 |
+
"AddBlank": False,
|
326 |
+
}
|
327 |
+
|
328 |
+
MoeVSConfJson = json.dumps(MoeVSConf)
|
329 |
+
with open(f"onnx/{project_name}.json", 'w') as MoeVsConfFile:
|
330 |
+
json.dump(MoeVSConf, MoeVsConfFile, indent = 4)
|
331 |
+
|
332 |
+
|
333 |
+
if __name__ == "__main__":
|
334 |
+
try:
|
335 |
+
os.mkdir("onnx")
|
336 |
+
except:
|
337 |
+
pass
|
338 |
+
|
339 |
+
gpt_path = "GPT_weights/nahida-e25.ckpt"
|
340 |
+
vits_path = "SoVITS_weights/nahida_e30_s3930.pth"
|
341 |
+
exp_path = "nahida"
|
342 |
+
export(vits_path, gpt_path, exp_path)
|
343 |
+
|
344 |
+
# soundfile.write("out.wav", a, vits.hps.data.sampling_rate)
|
GPT_SoVITS/prepare_data.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import shutil
|
3 |
+
import torch
|
4 |
+
import torchaudio
|
5 |
+
from pathlib import Path
|
6 |
+
|
7 |
+
def prepare_data_stage2(data_dir="data8", exp_dir="logs/s2"):
|
8 |
+
"""Prepare data for stage 2 training"""
|
9 |
+
|
10 |
+
# Get project root directory (parent of GPT_SoVITS)
|
11 |
+
root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
12 |
+
|
13 |
+
# Convert relative paths to absolute
|
14 |
+
data_dir = os.path.join(root_dir, data_dir)
|
15 |
+
exp_dir = os.path.join(root_dir, exp_dir)
|
16 |
+
|
17 |
+
print(f"Data directory: {data_dir}")
|
18 |
+
print(f"Experiment directory: {exp_dir}")
|
19 |
+
|
20 |
+
# Create required directories
|
21 |
+
os.makedirs(exp_dir, exist_ok=True)
|
22 |
+
os.makedirs(os.path.join(exp_dir, "4-cnhubert"), exist_ok=True)
|
23 |
+
os.makedirs(os.path.join(exp_dir, "5-wav32k"), exist_ok=True)
|
24 |
+
|
25 |
+
# Convert phoneme.txt to name2text.txt format
|
26 |
+
phoneme_path = os.path.join(data_dir, "phoneme.txt")
|
27 |
+
name2text_path = os.path.join(exp_dir, "2-name2text.txt")
|
28 |
+
|
29 |
+
print(f"Reading phoneme data from: {phoneme_path}")
|
30 |
+
print(f"Writing text data to: {name2text_path}")
|
31 |
+
|
32 |
+
with open(phoneme_path, "r", encoding="utf8") as f_in, \
|
33 |
+
open(name2text_path, "w", encoding="utf8") as f_out:
|
34 |
+
for line in f_in:
|
35 |
+
parts = line.strip().split("|")
|
36 |
+
if len(parts) >= 2:
|
37 |
+
wav_name = os.path.basename(parts[0])
|
38 |
+
text = parts[1]
|
39 |
+
# Format: wav_name \t text \t speaker_id \t language_id
|
40 |
+
f_out.write(f"{wav_name}\t{text}\t0\tHindi\n")
|
41 |
+
|
42 |
+
# Copy wav files to 5-wav32k
|
43 |
+
wav_dir = os.path.join(data_dir, "wavs")
|
44 |
+
wav32k_dir = os.path.join(exp_dir, "5-wav32k")
|
45 |
+
|
46 |
+
print(f"Processing wav files from: {wav_dir}")
|
47 |
+
print(f"Saving to: {wav32k_dir}")
|
48 |
+
|
49 |
+
for wav_file in os.listdir(wav_dir):
|
50 |
+
if wav_file.endswith(".wav"):
|
51 |
+
src_path = os.path.join(wav_dir, wav_file)
|
52 |
+
dst_path = os.path.join(wav32k_dir, wav_file)
|
53 |
+
|
54 |
+
# Load and resample if needed
|
55 |
+
waveform, sr = torchaudio.load(src_path)
|
56 |
+
if sr != 32000:
|
57 |
+
resampler = torchaudio.transforms.Resample(sr, 32000)
|
58 |
+
waveform = resampler(waveform)
|
59 |
+
|
60 |
+
# Save as 32kHz wav
|
61 |
+
torchaudio.save(dst_path, waveform, 32000)
|
62 |
+
|
63 |
+
print("Data preparation complete. Please run the Hubert feature extraction before training.")
|
64 |
+
|
65 |
+
if __name__ == "__main__":
|
66 |
+
prepare_data_stage2()
|
GPT_SoVITS/pretrained_models/.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
*
|
2 |
+
!.gitignore
|
GPT_SoVITS/pretrained_models/README.md
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: mit
|
3 |
+
pipeline_tag: text-to-speech
|
4 |
+
---
|
5 |
+
pretrained models used in https://github.com/RVC-Boss/GPT-SoVITS
|
GPT_SoVITS/process_ckpt.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import traceback
|
2 |
+
from collections import OrderedDict
|
3 |
+
from time import time as ttime
|
4 |
+
import shutil,os
|
5 |
+
import torch
|
6 |
+
from tools.i18n.i18n import I18nAuto
|
7 |
+
|
8 |
+
i18n = I18nAuto()
|
9 |
+
|
10 |
+
def my_save(fea,path):#####fix issue: torch.save doesn't support chinese path
|
11 |
+
dir=os.path.dirname(path)
|
12 |
+
name=os.path.basename(path)
|
13 |
+
tmp_path="%s.pth"%(ttime())
|
14 |
+
torch.save(fea,tmp_path)
|
15 |
+
shutil.move(tmp_path,"%s/%s"%(dir,name))
|
16 |
+
|
17 |
+
def savee(ckpt, name, epoch, steps, hps):
|
18 |
+
try:
|
19 |
+
opt = OrderedDict()
|
20 |
+
opt["weight"] = {}
|
21 |
+
for key in ckpt.keys():
|
22 |
+
if "enc_q" in key:
|
23 |
+
continue
|
24 |
+
opt["weight"][key] = ckpt[key].half()
|
25 |
+
opt["config"] = hps
|
26 |
+
opt["info"] = "%sepoch_%siteration" % (epoch, steps)
|
27 |
+
# torch.save(opt, "%s/%s.pth" % (hps.save_weight_dir, name))
|
28 |
+
my_save(opt, "%s/%s.pth" % (hps.save_weight_dir, name))
|
29 |
+
return "Success."
|
30 |
+
except:
|
31 |
+
return traceback.format_exc()
|
GPT_SoVITS/s1_train.py
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# modified from https://github.com/feng-yufei/shared_debugging_code/blob/main/train_t2s.py
|
2 |
+
import os
|
3 |
+
import pdb
|
4 |
+
import logging
|
5 |
+
import argparse
|
6 |
+
from pathlib import Path
|
7 |
+
import torch, platform
|
8 |
+
from pytorch_lightning import seed_everything
|
9 |
+
from pytorch_lightning import Trainer
|
10 |
+
from pytorch_lightning.callbacks import ModelCheckpoint
|
11 |
+
from pytorch_lightning.strategies import DDPStrategy
|
12 |
+
from AR.data.data_module import Text2SemanticDataModule
|
13 |
+
from AR.models.t2s_lightning_module import Text2SemanticLightningModule
|
14 |
+
from AR.utils.io import load_yaml_config
|
15 |
+
from GPT_SoVITS.utils.wandb_logger import WandbLoggerWithConfig
|
16 |
+
|
17 |
+
logging.getLogger("numba").setLevel(logging.WARNING)
|
18 |
+
logging.getLogger("matplotlib").setLevel(logging.WARNING)
|
19 |
+
torch.set_float32_matmul_precision("high")
|
20 |
+
|
21 |
+
def my_model_ckpt(
|
22 |
+
config,
|
23 |
+
if_save_latest,
|
24 |
+
if_save_every_weights,
|
25 |
+
half_weights_save_dir,
|
26 |
+
exp_name,
|
27 |
+
**kwargs,
|
28 |
+
):
|
29 |
+
if if_save_latest:
|
30 |
+
kwargs["save_last"] = True
|
31 |
+
callbacks = []
|
32 |
+
callbacks.append(
|
33 |
+
ModelCheckpoint(
|
34 |
+
**kwargs,
|
35 |
+
filename=exp_name + "_{epoch}-{step}",
|
36 |
+
)
|
37 |
+
)
|
38 |
+
return callbacks[0]
|
39 |
+
|
40 |
+
def main(args):
|
41 |
+
config = load_yaml_config(args.config_file)
|
42 |
+
|
43 |
+
output_dir = Path(config["output_dir"])
|
44 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
45 |
+
|
46 |
+
ckpt_dir = output_dir / "ckpt"
|
47 |
+
ckpt_dir.mkdir(parents=True, exist_ok=True)
|
48 |
+
|
49 |
+
seed_everything(config["train"]["seed"], workers=True)
|
50 |
+
|
51 |
+
# Initialize wandb logger
|
52 |
+
wandb_logger = WandbLoggerWithConfig(config=config)
|
53 |
+
|
54 |
+
ckpt_callback = my_model_ckpt(
|
55 |
+
config=config,
|
56 |
+
if_save_latest=config["train"]["if_save_latest"],
|
57 |
+
if_save_every_weights=config["train"]["if_save_every_weights"],
|
58 |
+
half_weights_save_dir=config["train"]["half_weights_save_dir"],
|
59 |
+
exp_name=config["train"]["exp_name"],
|
60 |
+
save_top_k=-1,
|
61 |
+
monitor="loss",
|
62 |
+
mode="min",
|
63 |
+
save_on_train_epoch_end=True,
|
64 |
+
every_n_epochs=config["train"]["save_every_n_epoch"],
|
65 |
+
dirpath=ckpt_dir,
|
66 |
+
)
|
67 |
+
|
68 |
+
# Create data module
|
69 |
+
data_module = Text2SemanticDataModule(
|
70 |
+
config=config,
|
71 |
+
train_semantic_path=config.get("train_semantic_path", ""),
|
72 |
+
train_phoneme_path=config.get("train_phoneme_path", "")
|
73 |
+
)
|
74 |
+
|
75 |
+
# Initialize model with correct parameters
|
76 |
+
model = Text2SemanticLightningModule(
|
77 |
+
config=config,
|
78 |
+
output_dir=output_dir,
|
79 |
+
is_train=True
|
80 |
+
)
|
81 |
+
|
82 |
+
# Watch the model in wandb
|
83 |
+
wandb_logger.watch_model(model)
|
84 |
+
|
85 |
+
trainer = Trainer(
|
86 |
+
max_epochs=config["train"]["epochs"],
|
87 |
+
accelerator="gpu" if torch.cuda.is_available() else "cpu",
|
88 |
+
devices=-1 if torch.cuda.is_available() else 1,
|
89 |
+
benchmark=False,
|
90 |
+
fast_dev_run=False,
|
91 |
+
strategy=DDPStrategy(
|
92 |
+
process_group_backend="nccl" if platform.system() != "Windows" else "gloo"
|
93 |
+
) if torch.cuda.is_available() else "auto",
|
94 |
+
precision=config["train"]["precision"],
|
95 |
+
logger=wandb_logger,
|
96 |
+
callbacks=[ckpt_callback],
|
97 |
+
use_distributed_sampler=False,
|
98 |
+
)
|
99 |
+
|
100 |
+
trainer.fit(model, data_module)
|
101 |
+
wandb.finish()
|
102 |
+
|
103 |
+
if __name__ == "__main__":
|
104 |
+
parser = argparse.ArgumentParser()
|
105 |
+
parser.add_argument(
|
106 |
+
"-c",
|
107 |
+
"--config_file",
|
108 |
+
type=str,
|
109 |
+
default="configs/s1.yaml",
|
110 |
+
help="path of config file",
|
111 |
+
)
|
112 |
+
args = parser.parse_args()
|
113 |
+
main(args)
|
GPT_SoVITS/s2_train.py
ADDED
@@ -0,0 +1,610 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import warnings
|
2 |
+
warnings.filterwarnings("ignore")
|
3 |
+
import utils
|
4 |
+
from utils import get_hparams
|
5 |
+
import os
|
6 |
+
os.chdir(os.path.dirname(os.path.abspath(__file__)))
|
7 |
+
hps = get_hparams(stage=2)
|
8 |
+
|
9 |
+
# Set GPU device - use default if not specified in config
|
10 |
+
if hasattr(hps.train, 'gpu_numbers'):
|
11 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = hps.train.gpu_numbers.replace("-", ",")
|
12 |
+
else:
|
13 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Default to first GPU
|
14 |
+
|
15 |
+
import torch
|
16 |
+
from torch.nn import functional as F
|
17 |
+
from torch.utils.data import DataLoader
|
18 |
+
from torch.utils.tensorboard import SummaryWriter
|
19 |
+
import torch.multiprocessing as mp
|
20 |
+
import torch.distributed as dist, traceback
|
21 |
+
from torch.nn.parallel import DistributedDataParallel as DDP
|
22 |
+
from torch.cuda.amp import autocast, GradScaler
|
23 |
+
from tqdm import tqdm
|
24 |
+
import logging, traceback
|
25 |
+
|
26 |
+
logging.getLogger("matplotlib").setLevel(logging.INFO)
|
27 |
+
logging.getLogger("h5py").setLevel(logging.INFO)
|
28 |
+
logging.getLogger("numba").setLevel(logging.INFO)
|
29 |
+
from random import randint
|
30 |
+
from module import commons
|
31 |
+
|
32 |
+
from module.data_utils import (
|
33 |
+
TextAudioSpeakerLoader,
|
34 |
+
TextAudioSpeakerCollate,
|
35 |
+
DistributedBucketSampler,
|
36 |
+
)
|
37 |
+
from module.models import (
|
38 |
+
SynthesizerTrn,
|
39 |
+
MultiPeriodDiscriminator,
|
40 |
+
)
|
41 |
+
from module.losses import generator_loss, discriminator_loss, feature_loss, kl_loss
|
42 |
+
from module.mel_processing import mel_spectrogram_torch, spec_to_mel_torch
|
43 |
+
from process_ckpt import savee
|
44 |
+
|
45 |
+
torch.backends.cudnn.benchmark = False
|
46 |
+
torch.backends.cudnn.deterministic = False
|
47 |
+
###反正A100fp32更快,那试试tf32吧
|
48 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
49 |
+
torch.backends.cudnn.allow_tf32 = True
|
50 |
+
torch.set_float32_matmul_precision("medium") # 最低精度但最快(也就快一丁点),对于结果造成不了影响
|
51 |
+
# from config import pretrained_s2G,pretrained_s2D
|
52 |
+
global_step = 0
|
53 |
+
|
54 |
+
device = "cpu" # cuda以外的设备,等mps优化后加入
|
55 |
+
|
56 |
+
|
57 |
+
def main():
|
58 |
+
|
59 |
+
if torch.cuda.is_available():
|
60 |
+
n_gpus = torch.cuda.device_count()
|
61 |
+
else:
|
62 |
+
n_gpus = 1
|
63 |
+
os.environ["MASTER_ADDR"] = "localhost"
|
64 |
+
os.environ["MASTER_PORT"] = str(randint(20000, 55555))
|
65 |
+
|
66 |
+
mp.spawn(
|
67 |
+
run,
|
68 |
+
nprocs=n_gpus,
|
69 |
+
args=(
|
70 |
+
n_gpus,
|
71 |
+
hps,
|
72 |
+
),
|
73 |
+
)
|
74 |
+
|
75 |
+
|
76 |
+
def run(rank, n_gpus, hps):
|
77 |
+
global global_step
|
78 |
+
if rank == 0:
|
79 |
+
logger = utils.get_logger(hps.data.exp_dir)
|
80 |
+
logger.info(hps)
|
81 |
+
# utils.check_git_hash(hps.s2_ckpt_dir)
|
82 |
+
writer = SummaryWriter(log_dir=hps.s2_ckpt_dir)
|
83 |
+
writer_eval = SummaryWriter(log_dir=os.path.join(hps.s2_ckpt_dir, "eval"))
|
84 |
+
|
85 |
+
dist.init_process_group(
|
86 |
+
backend = "gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl",
|
87 |
+
init_method="env://",
|
88 |
+
world_size=n_gpus,
|
89 |
+
rank=rank,
|
90 |
+
)
|
91 |
+
torch.manual_seed(hps.train.seed)
|
92 |
+
if torch.cuda.is_available():
|
93 |
+
torch.cuda.set_device(rank)
|
94 |
+
|
95 |
+
train_dataset = TextAudioSpeakerLoader(hps.data) ########
|
96 |
+
train_sampler = DistributedBucketSampler(
|
97 |
+
train_dataset,
|
98 |
+
hps.train.batch_size,
|
99 |
+
[
|
100 |
+
32,
|
101 |
+
300,
|
102 |
+
400,
|
103 |
+
500,
|
104 |
+
600,
|
105 |
+
700,
|
106 |
+
800,
|
107 |
+
900,
|
108 |
+
1000,
|
109 |
+
1100,
|
110 |
+
1200,
|
111 |
+
1300,
|
112 |
+
1400,
|
113 |
+
1500,
|
114 |
+
1600,
|
115 |
+
1700,
|
116 |
+
1800,
|
117 |
+
1900,
|
118 |
+
],
|
119 |
+
num_replicas=n_gpus,
|
120 |
+
rank=rank,
|
121 |
+
shuffle=True,
|
122 |
+
)
|
123 |
+
collate_fn = TextAudioSpeakerCollate()
|
124 |
+
train_loader = DataLoader(
|
125 |
+
train_dataset,
|
126 |
+
num_workers=6,
|
127 |
+
shuffle=False,
|
128 |
+
pin_memory=True,
|
129 |
+
collate_fn=collate_fn,
|
130 |
+
batch_sampler=train_sampler,
|
131 |
+
persistent_workers=True,
|
132 |
+
prefetch_factor=4,
|
133 |
+
)
|
134 |
+
# if rank == 0:
|
135 |
+
# eval_dataset = TextAudioSpeakerLoader(hps.data.validation_files, hps.data, val=True)
|
136 |
+
# eval_loader = DataLoader(eval_dataset, num_workers=0, shuffle=False,
|
137 |
+
# batch_size=1, pin_memory=True,
|
138 |
+
# drop_last=False, collate_fn=collate_fn)
|
139 |
+
|
140 |
+
net_g = SynthesizerTrn(
|
141 |
+
hps.data.filter_length // 2 + 1,
|
142 |
+
hps.train.segment_size // hps.data.hop_length,
|
143 |
+
n_speakers=hps.data.n_speakers,
|
144 |
+
**hps.model,
|
145 |
+
).cuda(rank) if torch.cuda.is_available() else SynthesizerTrn(
|
146 |
+
hps.data.filter_length // 2 + 1,
|
147 |
+
hps.train.segment_size // hps.data.hop_length,
|
148 |
+
n_speakers=hps.data.n_speakers,
|
149 |
+
**hps.model,
|
150 |
+
).to(device)
|
151 |
+
|
152 |
+
net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm).cuda(rank) if torch.cuda.is_available() else MultiPeriodDiscriminator(hps.model.use_spectral_norm).to(device)
|
153 |
+
for name, param in net_g.named_parameters():
|
154 |
+
if not param.requires_grad:
|
155 |
+
print(name, "not requires_grad")
|
156 |
+
|
157 |
+
te_p = list(map(id, net_g.enc_p.text_embedding.parameters()))
|
158 |
+
et_p = list(map(id, net_g.enc_p.encoder_text.parameters()))
|
159 |
+
mrte_p = list(map(id, net_g.enc_p.mrte.parameters()))
|
160 |
+
base_params = filter(
|
161 |
+
lambda p: id(p) not in te_p + et_p + mrte_p and p.requires_grad,
|
162 |
+
net_g.parameters(),
|
163 |
+
)
|
164 |
+
|
165 |
+
# te_p=net_g.enc_p.text_embedding.parameters()
|
166 |
+
# et_p=net_g.enc_p.encoder_text.parameters()
|
167 |
+
# mrte_p=net_g.enc_p.mrte.parameters()
|
168 |
+
|
169 |
+
optim_g = torch.optim.AdamW(
|
170 |
+
# filter(lambda p: p.requires_grad, net_g.parameters()),###默认所有层lr一致
|
171 |
+
[
|
172 |
+
{"params": base_params, "lr": hps.train.learning_rate},
|
173 |
+
{
|
174 |
+
"params": net_g.enc_p.text_embedding.parameters(),
|
175 |
+
"lr": hps.train.learning_rate * hps.train.text_low_lr_rate,
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"params": net_g.enc_p.encoder_text.parameters(),
|
179 |
+
"lr": hps.train.learning_rate * hps.train.text_low_lr_rate,
|
180 |
+
},
|
181 |
+
{
|
182 |
+
"params": net_g.enc_p.mrte.parameters(),
|
183 |
+
"lr": hps.train.learning_rate * hps.train.text_low_lr_rate,
|
184 |
+
},
|
185 |
+
],
|
186 |
+
hps.train.learning_rate,
|
187 |
+
betas=hps.train.betas,
|
188 |
+
eps=hps.train.eps,
|
189 |
+
)
|
190 |
+
optim_d = torch.optim.AdamW(
|
191 |
+
net_d.parameters(),
|
192 |
+
hps.train.learning_rate,
|
193 |
+
betas=hps.train.betas,
|
194 |
+
eps=hps.train.eps,
|
195 |
+
)
|
196 |
+
if torch.cuda.is_available():
|
197 |
+
net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True)
|
198 |
+
net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True)
|
199 |
+
else:
|
200 |
+
net_g = net_g.to(device)
|
201 |
+
net_d = net_d.to(device)
|
202 |
+
|
203 |
+
try: # 如果能加载自动resume
|
204 |
+
_, _, _, epoch_str = utils.load_checkpoint(
|
205 |
+
utils.latest_checkpoint_path("%s/logs_s2" % hps.data.exp_dir, "D_*.pth"),
|
206 |
+
net_d,
|
207 |
+
optim_d,
|
208 |
+
) # D多半加载没事
|
209 |
+
if rank == 0:
|
210 |
+
logger.info("loaded D")
|
211 |
+
# _, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.model_dir, "G_*.pth"), net_g, optim_g,load_opt=0)
|
212 |
+
_, _, _, epoch_str = utils.load_checkpoint(
|
213 |
+
utils.latest_checkpoint_path("%s/logs_s2" % hps.data.exp_dir, "G_*.pth"),
|
214 |
+
net_g,
|
215 |
+
optim_g,
|
216 |
+
)
|
217 |
+
global_step = (epoch_str - 1) * len(train_loader)
|
218 |
+
# epoch_str = 1
|
219 |
+
# global_step = 0
|
220 |
+
except: # 如果首次不能加载,加载pretrain
|
221 |
+
# traceback.print_exc()
|
222 |
+
epoch_str = 1
|
223 |
+
global_step = 0
|
224 |
+
if hps.train.pretrained_s2G != ""and hps.train.pretrained_s2G != None and os.path.exists(hps.train.pretrained_s2G):
|
225 |
+
if rank == 0:
|
226 |
+
logger.info("loaded pretrained %s" % hps.train.pretrained_s2G)
|
227 |
+
print(
|
228 |
+
net_g.module.load_state_dict(
|
229 |
+
torch.load(hps.train.pretrained_s2G, map_location="cpu")["weight"],
|
230 |
+
strict=False,
|
231 |
+
) if torch.cuda.is_available() else net_g.load_state_dict(
|
232 |
+
torch.load(hps.train.pretrained_s2G, map_location="cpu")["weight"],
|
233 |
+
strict=False,
|
234 |
+
)
|
235 |
+
) ##测试不加载优化器
|
236 |
+
if hps.train.pretrained_s2D != ""and hps.train.pretrained_s2D != None and os.path.exists(hps.train.pretrained_s2D):
|
237 |
+
if rank == 0:
|
238 |
+
logger.info("loaded pretrained %s" % hps.train.pretrained_s2D)
|
239 |
+
print(
|
240 |
+
net_d.module.load_state_dict(
|
241 |
+
torch.load(hps.train.pretrained_s2D, map_location="cpu")["weight"]
|
242 |
+
) if torch.cuda.is_available() else net_d.load_state_dict(
|
243 |
+
torch.load(hps.train.pretrained_s2D, map_location="cpu")["weight"]
|
244 |
+
)
|
245 |
+
)
|
246 |
+
|
247 |
+
# scheduler_g = torch.optim.lr_scheduler.ExponentialLR(optim_g, gamma=hps.train.lr_decay, last_epoch=epoch_str - 2)
|
248 |
+
# scheduler_d = torch.optim.lr_scheduler.ExponentialLR(optim_d, gamma=hps.train.lr_decay, last_epoch=epoch_str - 2)
|
249 |
+
|
250 |
+
scheduler_g = torch.optim.lr_scheduler.ExponentialLR(
|
251 |
+
optim_g, gamma=hps.train.lr_decay, last_epoch=-1
|
252 |
+
)
|
253 |
+
scheduler_d = torch.optim.lr_scheduler.ExponentialLR(
|
254 |
+
optim_d, gamma=hps.train.lr_decay, last_epoch=-1
|
255 |
+
)
|
256 |
+
for _ in range(epoch_str):
|
257 |
+
scheduler_g.step()
|
258 |
+
scheduler_d.step()
|
259 |
+
|
260 |
+
scaler = GradScaler(enabled=hps.train.fp16_run)
|
261 |
+
|
262 |
+
for epoch in range(epoch_str, hps.train.epochs + 1):
|
263 |
+
if rank == 0:
|
264 |
+
train_and_evaluate(
|
265 |
+
rank,
|
266 |
+
epoch,
|
267 |
+
hps,
|
268 |
+
[net_g, net_d],
|
269 |
+
[optim_g, optim_d],
|
270 |
+
[scheduler_g, scheduler_d],
|
271 |
+
scaler,
|
272 |
+
# [train_loader, eval_loader], logger, [writer, writer_eval])
|
273 |
+
[train_loader, None],
|
274 |
+
logger,
|
275 |
+
[writer, writer_eval],
|
276 |
+
)
|
277 |
+
else:
|
278 |
+
train_and_evaluate(
|
279 |
+
rank,
|
280 |
+
epoch,
|
281 |
+
hps,
|
282 |
+
[net_g, net_d],
|
283 |
+
[optim_g, optim_d],
|
284 |
+
[scheduler_g, scheduler_d],
|
285 |
+
scaler,
|
286 |
+
[train_loader, None],
|
287 |
+
None,
|
288 |
+
None,
|
289 |
+
)
|
290 |
+
scheduler_g.step()
|
291 |
+
scheduler_d.step()
|
292 |
+
|
293 |
+
|
294 |
+
def train_and_evaluate(
|
295 |
+
rank, epoch, hps, nets, optims, schedulers, scaler, loaders, logger, writers
|
296 |
+
):
|
297 |
+
net_g, net_d = nets
|
298 |
+
optim_g, optim_d = optims
|
299 |
+
# scheduler_g, scheduler_d = schedulers
|
300 |
+
train_loader, eval_loader = loaders
|
301 |
+
if writers is not None:
|
302 |
+
writer, writer_eval = writers
|
303 |
+
|
304 |
+
train_loader.batch_sampler.set_epoch(epoch)
|
305 |
+
global global_step
|
306 |
+
|
307 |
+
net_g.train()
|
308 |
+
net_d.train()
|
309 |
+
for batch_idx, (
|
310 |
+
ssl,
|
311 |
+
ssl_lengths,
|
312 |
+
spec,
|
313 |
+
spec_lengths,
|
314 |
+
y,
|
315 |
+
y_lengths,
|
316 |
+
text,
|
317 |
+
text_lengths,
|
318 |
+
) in enumerate(tqdm(train_loader)):
|
319 |
+
if torch.cuda.is_available():
|
320 |
+
spec, spec_lengths = spec.cuda(rank, non_blocking=True), spec_lengths.cuda(
|
321 |
+
rank, non_blocking=True
|
322 |
+
)
|
323 |
+
y, y_lengths = y.cuda(rank, non_blocking=True), y_lengths.cuda(
|
324 |
+
rank, non_blocking=True
|
325 |
+
)
|
326 |
+
ssl = ssl.cuda(rank, non_blocking=True)
|
327 |
+
ssl.requires_grad = False
|
328 |
+
# ssl_lengths = ssl_lengths.cuda(rank, non_blocking=True)
|
329 |
+
text, text_lengths = text.cuda(rank, non_blocking=True), text_lengths.cuda(
|
330 |
+
rank, non_blocking=True
|
331 |
+
)
|
332 |
+
else:
|
333 |
+
spec, spec_lengths = spec.to(device), spec_lengths.to(device)
|
334 |
+
y, y_lengths = y.to(device), y_lengths.to(device)
|
335 |
+
ssl = ssl.to(device)
|
336 |
+
ssl.requires_grad = False
|
337 |
+
# ssl_lengths = ssl_lengths.cuda(rank, non_blocking=True)
|
338 |
+
text, text_lengths = text.to(device), text_lengths.to(device)
|
339 |
+
|
340 |
+
with autocast(enabled=hps.train.fp16_run):
|
341 |
+
(
|
342 |
+
y_hat,
|
343 |
+
kl_ssl,
|
344 |
+
ids_slice,
|
345 |
+
x_mask,
|
346 |
+
z_mask,
|
347 |
+
(z, z_p, m_p, logs_p, m_q, logs_q),
|
348 |
+
stats_ssl,
|
349 |
+
) = net_g(ssl, spec, spec_lengths, text, text_lengths)
|
350 |
+
|
351 |
+
mel = spec_to_mel_torch(
|
352 |
+
spec,
|
353 |
+
hps.data.filter_length,
|
354 |
+
hps.data.n_mel_channels,
|
355 |
+
hps.data.sampling_rate,
|
356 |
+
hps.data.mel_fmin,
|
357 |
+
hps.data.mel_fmax,
|
358 |
+
)
|
359 |
+
y_mel = commons.slice_segments(
|
360 |
+
mel, ids_slice, hps.train.segment_size // hps.data.hop_length
|
361 |
+
)
|
362 |
+
y_hat_mel = mel_spectrogram_torch(
|
363 |
+
y_hat.squeeze(1),
|
364 |
+
hps.data.filter_length,
|
365 |
+
hps.data.n_mel_channels,
|
366 |
+
hps.data.sampling_rate,
|
367 |
+
hps.data.hop_length,
|
368 |
+
hps.data.win_length,
|
369 |
+
hps.data.mel_fmin,
|
370 |
+
hps.data.mel_fmax,
|
371 |
+
)
|
372 |
+
|
373 |
+
y = commons.slice_segments(
|
374 |
+
y, ids_slice * hps.data.hop_length, hps.train.segment_size
|
375 |
+
) # slice
|
376 |
+
|
377 |
+
# Discriminator
|
378 |
+
y_d_hat_r, y_d_hat_g, _, _ = net_d(y, y_hat.detach())
|
379 |
+
with autocast(enabled=False):
|
380 |
+
loss_disc, losses_disc_r, losses_disc_g = discriminator_loss(
|
381 |
+
y_d_hat_r, y_d_hat_g
|
382 |
+
)
|
383 |
+
loss_disc_all = loss_disc
|
384 |
+
optim_d.zero_grad()
|
385 |
+
scaler.scale(loss_disc_all).backward()
|
386 |
+
scaler.unscale_(optim_d)
|
387 |
+
grad_norm_d = commons.clip_grad_value_(net_d.parameters(), None)
|
388 |
+
scaler.step(optim_d)
|
389 |
+
|
390 |
+
with autocast(enabled=hps.train.fp16_run):
|
391 |
+
# Generator
|
392 |
+
y_d_hat_r, y_d_hat_g, fmap_r, fmap_g = net_d(y, y_hat)
|
393 |
+
with autocast(enabled=False):
|
394 |
+
loss_mel = F.l1_loss(y_mel, y_hat_mel) * hps.train.c_mel
|
395 |
+
loss_kl = kl_loss(z_p, logs_q, m_p, logs_p, z_mask) * hps.train.c_kl
|
396 |
+
|
397 |
+
loss_fm = feature_loss(fmap_r, fmap_g)
|
398 |
+
loss_gen, losses_gen = generator_loss(y_d_hat_g)
|
399 |
+
loss_gen_all = loss_gen + loss_fm + loss_mel + kl_ssl * 1 + loss_kl
|
400 |
+
|
401 |
+
optim_g.zero_grad()
|
402 |
+
scaler.scale(loss_gen_all).backward()
|
403 |
+
scaler.unscale_(optim_g)
|
404 |
+
grad_norm_g = commons.clip_grad_value_(net_g.parameters(), None)
|
405 |
+
scaler.step(optim_g)
|
406 |
+
scaler.update()
|
407 |
+
|
408 |
+
if rank == 0:
|
409 |
+
if global_step % hps.train.log_interval == 0:
|
410 |
+
lr = optim_g.param_groups[0]["lr"]
|
411 |
+
losses = [loss_disc, loss_gen, loss_fm, loss_mel, kl_ssl, loss_kl]
|
412 |
+
logger.info(
|
413 |
+
"Train Epoch: {} [{:.0f}%]".format(
|
414 |
+
epoch, 100.0 * batch_idx / len(train_loader)
|
415 |
+
)
|
416 |
+
)
|
417 |
+
logger.info([x.item() for x in losses] + [global_step, lr])
|
418 |
+
|
419 |
+
scalar_dict = {
|
420 |
+
"loss/g/total": loss_gen_all,
|
421 |
+
"loss/d/total": loss_disc_all,
|
422 |
+
"learning_rate": lr,
|
423 |
+
"grad_norm_d": grad_norm_d,
|
424 |
+
"grad_norm_g": grad_norm_g,
|
425 |
+
}
|
426 |
+
scalar_dict.update(
|
427 |
+
{
|
428 |
+
"loss/g/fm": loss_fm,
|
429 |
+
"loss/g/mel": loss_mel,
|
430 |
+
"loss/g/kl_ssl": kl_ssl,
|
431 |
+
"loss/g/kl": loss_kl,
|
432 |
+
}
|
433 |
+
)
|
434 |
+
|
435 |
+
# scalar_dict.update({"loss/g/{}".format(i): v for i, v in enumerate(losses_gen)})
|
436 |
+
# scalar_dict.update({"loss/d_r/{}".format(i): v for i, v in enumerate(losses_disc_r)})
|
437 |
+
# scalar_dict.update({"loss/d_g/{}".format(i): v for i, v in enumerate(losses_disc_g)})
|
438 |
+
image_dict = {
|
439 |
+
"slice/mel_org": utils.plot_spectrogram_to_numpy(
|
440 |
+
y_mel[0].data.cpu().numpy()
|
441 |
+
),
|
442 |
+
"slice/mel_gen": utils.plot_spectrogram_to_numpy(
|
443 |
+
y_hat_mel[0].data.cpu().numpy()
|
444 |
+
),
|
445 |
+
"all/mel": utils.plot_spectrogram_to_numpy(
|
446 |
+
mel[0].data.cpu().numpy()
|
447 |
+
),
|
448 |
+
"all/stats_ssl": utils.plot_spectrogram_to_numpy(
|
449 |
+
stats_ssl[0].data.cpu().numpy()
|
450 |
+
),
|
451 |
+
}
|
452 |
+
utils.summarize(
|
453 |
+
writer=writer,
|
454 |
+
global_step=global_step,
|
455 |
+
images=image_dict,
|
456 |
+
scalars=scalar_dict,
|
457 |
+
)
|
458 |
+
global_step += 1
|
459 |
+
if epoch % hps.train.save_every_epoch == 0 and rank == 0:
|
460 |
+
if hps.train.if_save_latest == 0:
|
461 |
+
utils.save_checkpoint(
|
462 |
+
net_g,
|
463 |
+
optim_g,
|
464 |
+
hps.train.learning_rate,
|
465 |
+
epoch,
|
466 |
+
os.path.join(
|
467 |
+
"%s/logs_s2" % hps.data.exp_dir, "G_{}.pth".format(global_step)
|
468 |
+
),
|
469 |
+
)
|
470 |
+
utils.save_checkpoint(
|
471 |
+
net_d,
|
472 |
+
optim_d,
|
473 |
+
hps.train.learning_rate,
|
474 |
+
epoch,
|
475 |
+
os.path.join(
|
476 |
+
"%s/logs_s2" % hps.data.exp_dir, "D_{}.pth".format(global_step)
|
477 |
+
),
|
478 |
+
)
|
479 |
+
else:
|
480 |
+
utils.save_checkpoint(
|
481 |
+
net_g,
|
482 |
+
optim_g,
|
483 |
+
hps.train.learning_rate,
|
484 |
+
epoch,
|
485 |
+
os.path.join(
|
486 |
+
"%s/logs_s2" % hps.data.exp_dir, "G_{}.pth".format(233333333333)
|
487 |
+
),
|
488 |
+
)
|
489 |
+
utils.save_checkpoint(
|
490 |
+
net_d,
|
491 |
+
optim_d,
|
492 |
+
hps.train.learning_rate,
|
493 |
+
epoch,
|
494 |
+
os.path.join(
|
495 |
+
"%s/logs_s2" % hps.data.exp_dir, "D_{}.pth".format(233333333333)
|
496 |
+
),
|
497 |
+
)
|
498 |
+
if rank == 0 and hps.train.if_save_every_weights == True:
|
499 |
+
if hasattr(net_g, "module"):
|
500 |
+
ckpt = net_g.module.state_dict()
|
501 |
+
else:
|
502 |
+
ckpt = net_g.state_dict()
|
503 |
+
logger.info(
|
504 |
+
"saving ckpt %s_e%s:%s"
|
505 |
+
% (
|
506 |
+
hps.name,
|
507 |
+
epoch,
|
508 |
+
savee(
|
509 |
+
ckpt,
|
510 |
+
hps.name + "_e%s_s%s" % (epoch, global_step),
|
511 |
+
epoch,
|
512 |
+
global_step,
|
513 |
+
hps,
|
514 |
+
),
|
515 |
+
)
|
516 |
+
)
|
517 |
+
|
518 |
+
if rank == 0:
|
519 |
+
logger.info("====> Epoch: {}".format(epoch))
|
520 |
+
|
521 |
+
|
522 |
+
def evaluate(hps, generator, eval_loader, writer_eval):
|
523 |
+
generator.eval()
|
524 |
+
image_dict = {}
|
525 |
+
audio_dict = {}
|
526 |
+
print("Evaluating ...")
|
527 |
+
with torch.no_grad():
|
528 |
+
for batch_idx, (
|
529 |
+
ssl,
|
530 |
+
ssl_lengths,
|
531 |
+
spec,
|
532 |
+
spec_lengths,
|
533 |
+
y,
|
534 |
+
y_lengths,
|
535 |
+
text,
|
536 |
+
text_lengths,
|
537 |
+
) in enumerate(eval_loader):
|
538 |
+
print(111)
|
539 |
+
if torch.cuda.is_available():
|
540 |
+
spec, spec_lengths = spec.cuda(), spec_lengths.cuda()
|
541 |
+
y, y_lengths = y.cuda(), y_lengths.cuda()
|
542 |
+
ssl = ssl.cuda()
|
543 |
+
text, text_lengths = text.cuda(), text_lengths.cuda()
|
544 |
+
else:
|
545 |
+
spec, spec_lengths = spec.to(device), spec_lengths.to(device)
|
546 |
+
y, y_lengths = y.to(device), y_lengths.to(device)
|
547 |
+
ssl = ssl.to(device)
|
548 |
+
text, text_lengths = text.to(device), text_lengths.to(device)
|
549 |
+
for test in [0, 1]:
|
550 |
+
y_hat, mask, *_ = generator.module.infer(
|
551 |
+
ssl, spec, spec_lengths, text, text_lengths, test=test
|
552 |
+
) if torch.cuda.is_available() else generator.infer(
|
553 |
+
ssl, spec, spec_lengths, text, text_lengths, test=test
|
554 |
+
)
|
555 |
+
y_hat_lengths = mask.sum([1, 2]).long() * hps.data.hop_length
|
556 |
+
|
557 |
+
mel = spec_to_mel_torch(
|
558 |
+
spec,
|
559 |
+
hps.data.filter_length,
|
560 |
+
hps.data.n_mel_channels,
|
561 |
+
hps.data.sampling_rate,
|
562 |
+
hps.data.mel_fmin,
|
563 |
+
hps.data.mel_fmax,
|
564 |
+
)
|
565 |
+
y_hat_mel = mel_spectrogram_torch(
|
566 |
+
y_hat.squeeze(1).float(),
|
567 |
+
hps.data.filter_length,
|
568 |
+
hps.data.n_mel_channels,
|
569 |
+
hps.data.sampling_rate,
|
570 |
+
hps.data.hop_length,
|
571 |
+
hps.data.win_length,
|
572 |
+
hps.data.mel_fmin,
|
573 |
+
hps.data.mel_fmax,
|
574 |
+
)
|
575 |
+
image_dict.update(
|
576 |
+
{
|
577 |
+
f"gen/mel_{batch_idx}_{test}": utils.plot_spectrogram_to_numpy(
|
578 |
+
y_hat_mel[0].cpu().numpy()
|
579 |
+
)
|
580 |
+
}
|
581 |
+
)
|
582 |
+
audio_dict.update(
|
583 |
+
{f"gen/audio_{batch_idx}_{test}": y_hat[0, :, : y_hat_lengths[0]]}
|
584 |
+
)
|
585 |
+
image_dict.update(
|
586 |
+
{
|
587 |
+
f"gt/mel_{batch_idx}": utils.plot_spectrogram_to_numpy(
|
588 |
+
mel[0].cpu().numpy()
|
589 |
+
)
|
590 |
+
}
|
591 |
+
)
|
592 |
+
audio_dict.update({f"gt/audio_{batch_idx}": y[0, :, : y_lengths[0]]})
|
593 |
+
|
594 |
+
# y_hat, mask, *_ = generator.module.infer(ssl, spec_lengths, speakers, y=None)
|
595 |
+
# audio_dict.update({
|
596 |
+
# f"gen/audio_{batch_idx}_style_pred": y_hat[0, :, :]
|
597 |
+
# })
|
598 |
+
|
599 |
+
utils.summarize(
|
600 |
+
writer=writer_eval,
|
601 |
+
global_step=global_step,
|
602 |
+
images=image_dict,
|
603 |
+
audios=audio_dict,
|
604 |
+
audio_sampling_rate=hps.data.sampling_rate,
|
605 |
+
)
|
606 |
+
generator.train()
|
607 |
+
|
608 |
+
|
609 |
+
if __name__ == "__main__":
|
610 |
+
main()
|
GPT_SoVITS/text/.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
G2PWModel
|
2 |
+
__pycache__
|
3 |
+
*.zip
|
GPT_SoVITS/text/__init__.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import importlib
|
3 |
+
|
4 |
+
# Force reload the symbol modules to get updated symbols
|
5 |
+
from text import symbols as symbols_v1
|
6 |
+
from text import symbols2 as symbols_v2
|
7 |
+
importlib.reload(symbols_v1)
|
8 |
+
importlib.reload(symbols_v2)
|
9 |
+
|
10 |
+
_symbol_to_id_v1 = {s: i for i, s in enumerate(symbols_v1.symbols)}
|
11 |
+
_symbol_to_id_v2 = {s: i for i, s in enumerate(symbols_v2.symbols)}
|
12 |
+
|
13 |
+
def cleaned_text_to_sequence(cleaned_text, version=None):
|
14 |
+
'''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
|
15 |
+
Args:
|
16 |
+
text: string to convert to a sequence
|
17 |
+
Returns:
|
18 |
+
List of integers corresponding to the symbols in the text
|
19 |
+
'''
|
20 |
+
if version is None:version=os.environ.get('version', 'v2')
|
21 |
+
if version == "v1":
|
22 |
+
phones = [_symbol_to_id_v1[symbol] for symbol in cleaned_text]
|
23 |
+
else:
|
24 |
+
phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
|
25 |
+
|
26 |
+
return phones
|
27 |
+
|
GPT_SoVITS/text/cantonese.py
ADDED
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# reference: https://huggingface.co/spaces/Naozumi0512/Bert-VITS2-Cantonese-Yue/blob/main/text/chinese.py
|
2 |
+
|
3 |
+
import sys
|
4 |
+
import re
|
5 |
+
import cn2an
|
6 |
+
|
7 |
+
from pyjyutping import jyutping
|
8 |
+
from text.symbols import punctuation
|
9 |
+
from text.zh_normalization.text_normlization import TextNormalizer
|
10 |
+
|
11 |
+
normalizer = lambda x: cn2an.transform(x, "an2cn")
|
12 |
+
|
13 |
+
INITIALS = [
|
14 |
+
"aa",
|
15 |
+
"aai",
|
16 |
+
"aak",
|
17 |
+
"aap",
|
18 |
+
"aat",
|
19 |
+
"aau",
|
20 |
+
"ai",
|
21 |
+
"au",
|
22 |
+
"ap",
|
23 |
+
"at",
|
24 |
+
"ak",
|
25 |
+
"a",
|
26 |
+
"p",
|
27 |
+
"b",
|
28 |
+
"e",
|
29 |
+
"ts",
|
30 |
+
"t",
|
31 |
+
"dz",
|
32 |
+
"d",
|
33 |
+
"kw",
|
34 |
+
"k",
|
35 |
+
"gw",
|
36 |
+
"g",
|
37 |
+
"f",
|
38 |
+
"h",
|
39 |
+
"l",
|
40 |
+
"m",
|
41 |
+
"ng",
|
42 |
+
"n",
|
43 |
+
"s",
|
44 |
+
"y",
|
45 |
+
"w",
|
46 |
+
"c",
|
47 |
+
"z",
|
48 |
+
"j",
|
49 |
+
"ong",
|
50 |
+
"on",
|
51 |
+
"ou",
|
52 |
+
"oi",
|
53 |
+
"ok",
|
54 |
+
"o",
|
55 |
+
"uk",
|
56 |
+
"ung",
|
57 |
+
]
|
58 |
+
INITIALS += ["sp", "spl", "spn", "sil"]
|
59 |
+
|
60 |
+
|
61 |
+
rep_map = {
|
62 |
+
":": ",",
|
63 |
+
";": ",",
|
64 |
+
",": ",",
|
65 |
+
"。": ".",
|
66 |
+
"!": "!",
|
67 |
+
"?": "?",
|
68 |
+
"\n": ".",
|
69 |
+
"·": ",",
|
70 |
+
"、": ",",
|
71 |
+
"...": "…",
|
72 |
+
"$": ".",
|
73 |
+
"“": "'",
|
74 |
+
"”": "'",
|
75 |
+
'"': "'",
|
76 |
+
"‘": "'",
|
77 |
+
"’": "'",
|
78 |
+
"(": "'",
|
79 |
+
")": "'",
|
80 |
+
"(": "'",
|
81 |
+
")": "'",
|
82 |
+
"《": "'",
|
83 |
+
"》": "'",
|
84 |
+
"【": "'",
|
85 |
+
"】": "'",
|
86 |
+
"[": "'",
|
87 |
+
"]": "'",
|
88 |
+
"—": "-",
|
89 |
+
"~": "-",
|
90 |
+
"~": "-",
|
91 |
+
"「": "'",
|
92 |
+
"」": "'",
|
93 |
+
}
|
94 |
+
|
95 |
+
|
96 |
+
def replace_punctuation(text):
|
97 |
+
# text = text.replace("嗯", "恩").replace("呣", "母")
|
98 |
+
pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
|
99 |
+
|
100 |
+
replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
|
101 |
+
|
102 |
+
replaced_text = re.sub(
|
103 |
+
r"[^\u4e00-\u9fa5" + "".join(punctuation) + r"]+", "", replaced_text
|
104 |
+
)
|
105 |
+
|
106 |
+
return replaced_text
|
107 |
+
|
108 |
+
|
109 |
+
def text_normalize(text):
|
110 |
+
tx = TextNormalizer()
|
111 |
+
sentences = tx.normalize(text)
|
112 |
+
dest_text = ""
|
113 |
+
for sentence in sentences:
|
114 |
+
dest_text += replace_punctuation(sentence)
|
115 |
+
return dest_text
|
116 |
+
|
117 |
+
|
118 |
+
punctuation_set=set(punctuation)
|
119 |
+
def jyuping_to_initials_finals_tones(jyuping_syllables):
|
120 |
+
initials_finals = []
|
121 |
+
tones = []
|
122 |
+
word2ph = []
|
123 |
+
|
124 |
+
for syllable in jyuping_syllables:
|
125 |
+
if syllable in punctuation:
|
126 |
+
initials_finals.append(syllable)
|
127 |
+
tones.append(0)
|
128 |
+
word2ph.append(1) # Add 1 for punctuation
|
129 |
+
elif syllable == "_":
|
130 |
+
initials_finals.append(syllable)
|
131 |
+
tones.append(0)
|
132 |
+
word2ph.append(1) # Add 1 for underscore
|
133 |
+
else:
|
134 |
+
try:
|
135 |
+
tone = int(syllable[-1])
|
136 |
+
syllable_without_tone = syllable[:-1]
|
137 |
+
except ValueError:
|
138 |
+
tone = 0
|
139 |
+
syllable_without_tone = syllable
|
140 |
+
|
141 |
+
for initial in INITIALS:
|
142 |
+
if syllable_without_tone.startswith(initial):
|
143 |
+
if syllable_without_tone.startswith("nga"):
|
144 |
+
initials_finals.extend(
|
145 |
+
[
|
146 |
+
syllable_without_tone[:2],
|
147 |
+
syllable_without_tone[2:] or syllable_without_tone[-1],
|
148 |
+
]
|
149 |
+
)
|
150 |
+
# tones.extend([tone, tone])
|
151 |
+
tones.extend([-1, tone])
|
152 |
+
word2ph.append(2)
|
153 |
+
else:
|
154 |
+
final = syllable_without_tone[len(initial) :] or initial[-1]
|
155 |
+
initials_finals.extend([initial, final])
|
156 |
+
# tones.extend([tone, tone])
|
157 |
+
tones.extend([-1, tone])
|
158 |
+
word2ph.append(2)
|
159 |
+
break
|
160 |
+
assert len(initials_finals) == len(tones)
|
161 |
+
|
162 |
+
###魔改为辅音+带音调的元音
|
163 |
+
phones=[]
|
164 |
+
for a,b in zip(initials_finals,tones):
|
165 |
+
if(b not in [-1,0]):###防止粤语和普通话重合开头加Y,如果是标点,不加。
|
166 |
+
todo="%s%s"%(a,b)
|
167 |
+
else:todo=a
|
168 |
+
if(todo not in punctuation_set):todo="Y%s"%todo
|
169 |
+
phones.append(todo)
|
170 |
+
|
171 |
+
# return initials_finals, tones, word2ph
|
172 |
+
return phones, word2ph
|
173 |
+
|
174 |
+
|
175 |
+
def get_jyutping(text):
|
176 |
+
jp = jyutping.convert(text)
|
177 |
+
# print(1111111,jp)
|
178 |
+
for symbol in punctuation:
|
179 |
+
jp = jp.replace(symbol, " " + symbol + " ")
|
180 |
+
jp_array = jp.split()
|
181 |
+
return jp_array
|
182 |
+
|
183 |
+
|
184 |
+
def get_bert_feature(text, word2ph):
|
185 |
+
from text import chinese_bert
|
186 |
+
|
187 |
+
return chinese_bert.get_bert_feature(text, word2ph)
|
188 |
+
|
189 |
+
|
190 |
+
def g2p(text):
|
191 |
+
# word2ph = []
|
192 |
+
jyuping = get_jyutping(text)
|
193 |
+
# print(jyuping)
|
194 |
+
# phones, tones, word2ph = jyuping_to_initials_finals_tones(jyuping)
|
195 |
+
phones, word2ph = jyuping_to_initials_finals_tones(jyuping)
|
196 |
+
# phones = ["_"] + phones + ["_"]
|
197 |
+
# tones = [0] + tones + [0]
|
198 |
+
# word2ph = [1] + word2ph + [1]
|
199 |
+
return phones, word2ph
|
200 |
+
|
201 |
+
|
202 |
+
if __name__ == "__main__":
|
203 |
+
# text = "啊!但是《原神》是由,米哈\游自主, [研发]的一款全.新开放世界.冒险游戏"
|
204 |
+
text = "佢個鋤頭太短啦。"
|
205 |
+
text = text_normalize(text)
|
206 |
+
# phones, tones, word2ph = g2p(text)
|
207 |
+
phones, word2ph = g2p(text)
|
208 |
+
# print(phones, tones, word2ph)
|
209 |
+
print(phones, word2ph)
|
GPT_SoVITS/text/chinese.py
ADDED
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pdb
|
3 |
+
import re
|
4 |
+
|
5 |
+
import cn2an
|
6 |
+
from pypinyin import lazy_pinyin, Style
|
7 |
+
|
8 |
+
from text.symbols import punctuation
|
9 |
+
from text.tone_sandhi import ToneSandhi
|
10 |
+
from text.zh_normalization.text_normlization import TextNormalizer
|
11 |
+
|
12 |
+
normalizer = lambda x: cn2an.transform(x, "an2cn")
|
13 |
+
|
14 |
+
current_file_path = os.path.dirname(__file__)
|
15 |
+
pinyin_to_symbol_map = {
|
16 |
+
line.split("\t")[0]: line.strip().split("\t")[1]
|
17 |
+
for line in open(os.path.join(current_file_path, "opencpop-strict.txt")).readlines()
|
18 |
+
}
|
19 |
+
|
20 |
+
import jieba_fast.posseg as psg
|
21 |
+
|
22 |
+
|
23 |
+
rep_map = {
|
24 |
+
":": ",",
|
25 |
+
";": ",",
|
26 |
+
",": ",",
|
27 |
+
"。": ".",
|
28 |
+
"!": "!",
|
29 |
+
"?": "?",
|
30 |
+
"\n": ".",
|
31 |
+
"·": ",",
|
32 |
+
"、": ",",
|
33 |
+
"...": "…",
|
34 |
+
"$": ".",
|
35 |
+
"/": ",",
|
36 |
+
"—": "-",
|
37 |
+
"~": "…",
|
38 |
+
"~":"…",
|
39 |
+
}
|
40 |
+
|
41 |
+
tone_modifier = ToneSandhi()
|
42 |
+
|
43 |
+
|
44 |
+
def replace_punctuation(text):
|
45 |
+
text = text.replace("嗯", "恩").replace("呣", "母")
|
46 |
+
pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
|
47 |
+
|
48 |
+
replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
|
49 |
+
|
50 |
+
replaced_text = re.sub(
|
51 |
+
r"[^\u4e00-\u9fa5" + "".join(punctuation) + r"]+", "", replaced_text
|
52 |
+
)
|
53 |
+
|
54 |
+
return replaced_text
|
55 |
+
|
56 |
+
|
57 |
+
def replace_punctuation_with_en(text):
|
58 |
+
text = text.replace("嗯", "恩").replace("呣", "母")
|
59 |
+
pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
|
60 |
+
|
61 |
+
replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
|
62 |
+
|
63 |
+
replaced_text = re.sub(
|
64 |
+
r"[^\u4e00-\u9fa5A-Za-z" + "".join(punctuation) + r"]+", "", replaced_text
|
65 |
+
)
|
66 |
+
|
67 |
+
return replaced_text
|
68 |
+
|
69 |
+
|
70 |
+
def replace_consecutive_punctuation(text):
|
71 |
+
punctuations = ''.join(re.escape(p) for p in punctuation)
|
72 |
+
pattern = f'([{punctuations}])([{punctuations}])+'
|
73 |
+
result = re.sub(pattern, r'\1', text)
|
74 |
+
return result
|
75 |
+
|
76 |
+
|
77 |
+
def g2p(text):
|
78 |
+
pattern = r"(?<=[{0}])\s*".format("".join(punctuation))
|
79 |
+
sentences = [i for i in re.split(pattern, text) if i.strip() != ""]
|
80 |
+
phones, word2ph = _g2p(sentences)
|
81 |
+
return phones, word2ph
|
82 |
+
|
83 |
+
|
84 |
+
def _get_initials_finals(word):
|
85 |
+
initials = []
|
86 |
+
finals = []
|
87 |
+
orig_initials = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.INITIALS)
|
88 |
+
orig_finals = lazy_pinyin(
|
89 |
+
word, neutral_tone_with_five=True, style=Style.FINALS_TONE3
|
90 |
+
)
|
91 |
+
for c, v in zip(orig_initials, orig_finals):
|
92 |
+
initials.append(c)
|
93 |
+
finals.append(v)
|
94 |
+
return initials, finals
|
95 |
+
|
96 |
+
|
97 |
+
def _g2p(segments):
|
98 |
+
phones_list = []
|
99 |
+
word2ph = []
|
100 |
+
for seg in segments:
|
101 |
+
pinyins = []
|
102 |
+
# Replace all English words in the sentence
|
103 |
+
seg = re.sub("[a-zA-Z]+", "", seg)
|
104 |
+
seg_cut = psg.lcut(seg)
|
105 |
+
initials = []
|
106 |
+
finals = []
|
107 |
+
seg_cut = tone_modifier.pre_merge_for_modify(seg_cut)
|
108 |
+
for word, pos in seg_cut:
|
109 |
+
if pos == "eng":
|
110 |
+
continue
|
111 |
+
sub_initials, sub_finals = _get_initials_finals(word)
|
112 |
+
sub_finals = tone_modifier.modified_tone(word, pos, sub_finals)
|
113 |
+
initials.append(sub_initials)
|
114 |
+
finals.append(sub_finals)
|
115 |
+
|
116 |
+
# assert len(sub_initials) == len(sub_finals) == len(word)
|
117 |
+
initials = sum(initials, [])
|
118 |
+
finals = sum(finals, [])
|
119 |
+
#
|
120 |
+
for c, v in zip(initials, finals):
|
121 |
+
raw_pinyin = c + v
|
122 |
+
# NOTE: post process for pypinyin outputs
|
123 |
+
# we discriminate i, ii and iii
|
124 |
+
if c == v:
|
125 |
+
assert c in punctuation
|
126 |
+
phone = [c]
|
127 |
+
word2ph.append(1)
|
128 |
+
else:
|
129 |
+
v_without_tone = v[:-1]
|
130 |
+
tone = v[-1]
|
131 |
+
|
132 |
+
pinyin = c + v_without_tone
|
133 |
+
assert tone in "12345"
|
134 |
+
|
135 |
+
if c:
|
136 |
+
# 多音节
|
137 |
+
v_rep_map = {
|
138 |
+
"uei": "ui",
|
139 |
+
"iou": "iu",
|
140 |
+
"uen": "un",
|
141 |
+
}
|
142 |
+
if v_without_tone in v_rep_map.keys():
|
143 |
+
pinyin = c + v_rep_map[v_without_tone]
|
144 |
+
else:
|
145 |
+
# 单音节
|
146 |
+
pinyin_rep_map = {
|
147 |
+
"ing": "ying",
|
148 |
+
"i": "yi",
|
149 |
+
"in": "yin",
|
150 |
+
"u": "wu",
|
151 |
+
}
|
152 |
+
if pinyin in pinyin_rep_map.keys():
|
153 |
+
pinyin = pinyin_rep_map[pinyin]
|
154 |
+
else:
|
155 |
+
single_rep_map = {
|
156 |
+
"v": "yu",
|
157 |
+
"e": "e",
|
158 |
+
"i": "y",
|
159 |
+
"u": "w",
|
160 |
+
}
|
161 |
+
if pinyin[0] in single_rep_map.keys():
|
162 |
+
pinyin = single_rep_map[pinyin[0]] + pinyin[1:]
|
163 |
+
|
164 |
+
assert pinyin in pinyin_to_symbol_map.keys(), (pinyin, seg, raw_pinyin)
|
165 |
+
new_c, new_v = pinyin_to_symbol_map[pinyin].split(" ")
|
166 |
+
new_v = new_v + tone
|
167 |
+
phone = [new_c, new_v]
|
168 |
+
word2ph.append(len(phone))
|
169 |
+
|
170 |
+
phones_list += phone
|
171 |
+
return phones_list, word2ph
|
172 |
+
|
173 |
+
|
174 |
+
def text_normalize(text):
|
175 |
+
# https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization
|
176 |
+
tx = TextNormalizer()
|
177 |
+
sentences = tx.normalize(text)
|
178 |
+
dest_text = ""
|
179 |
+
for sentence in sentences:
|
180 |
+
dest_text += replace_punctuation(sentence)
|
181 |
+
|
182 |
+
# 避免重复标点引起的参考泄露
|
183 |
+
dest_text = replace_consecutive_punctuation(dest_text)
|
184 |
+
return dest_text
|
185 |
+
|
186 |
+
|
187 |
+
# 不排除英文的文本格式化
|
188 |
+
def mix_text_normalize(text):
|
189 |
+
# https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization
|
190 |
+
tx = TextNormalizer()
|
191 |
+
sentences = tx.normalize(text)
|
192 |
+
dest_text = ""
|
193 |
+
for sentence in sentences:
|
194 |
+
dest_text += replace_punctuation_with_en(sentence)
|
195 |
+
|
196 |
+
# 避免重复标点引起的参考泄露
|
197 |
+
dest_text = replace_consecutive_punctuation(dest_text)
|
198 |
+
return dest_text
|
199 |
+
|
200 |
+
|
201 |
+
if __name__ == "__main__":
|
202 |
+
text = "啊——但是《原神》是由,米哈\游自主,研发的一款全.新开放世界.冒险游戏"
|
203 |
+
text = "呣呣呣~就是…大人的鼹鼠党吧?"
|
204 |
+
text = "你好"
|
205 |
+
text = text_normalize(text)
|
206 |
+
print(g2p(text))
|
207 |
+
|
208 |
+
|
209 |
+
# # 示例用法
|
210 |
+
# text = "这是一个示例文本:,你好!这是一个测试..."
|
211 |
+
# print(g2p_paddle(text)) # 输出: 这是一个示例文本你好这是一个测试
|
GPT_SoVITS/text/chinese2.py
ADDED
@@ -0,0 +1,308 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pdb
|
3 |
+
import re
|
4 |
+
|
5 |
+
import cn2an
|
6 |
+
from pypinyin import lazy_pinyin, Style
|
7 |
+
from pypinyin.contrib.tone_convert import to_normal, to_finals_tone3, to_initials, to_finals
|
8 |
+
|
9 |
+
from text.symbols import punctuation
|
10 |
+
from text.tone_sandhi import ToneSandhi
|
11 |
+
from text.zh_normalization.text_normlization import TextNormalizer
|
12 |
+
|
13 |
+
normalizer = lambda x: cn2an.transform(x, "an2cn")
|
14 |
+
|
15 |
+
current_file_path = os.path.dirname(__file__)
|
16 |
+
pinyin_to_symbol_map = {
|
17 |
+
line.split("\t")[0]: line.strip().split("\t")[1]
|
18 |
+
for line in open(os.path.join(current_file_path, "opencpop-strict.txt")).readlines()
|
19 |
+
}
|
20 |
+
|
21 |
+
import jieba_fast.posseg as psg
|
22 |
+
|
23 |
+
# is_g2pw_str = os.environ.get("is_g2pw", "True")##默认开启
|
24 |
+
# is_g2pw = False#True if is_g2pw_str.lower() == 'true' else False
|
25 |
+
is_g2pw = True#True if is_g2pw_str.lower() == 'true' else False
|
26 |
+
if is_g2pw:
|
27 |
+
print("当前使用g2pw进行拼音推理")
|
28 |
+
from text.g2pw import G2PWPinyin, correct_pronunciation
|
29 |
+
parent_directory = os.path.dirname(current_file_path)
|
30 |
+
g2pw = G2PWPinyin(model_dir="GPT_SoVITS/text/G2PWModel",model_source=os.environ.get("bert_path","GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large"),v_to_u=False, neutral_tone_with_five=True)
|
31 |
+
|
32 |
+
rep_map = {
|
33 |
+
":": ",",
|
34 |
+
";": ",",
|
35 |
+
",": ",",
|
36 |
+
"。": ".",
|
37 |
+
"!": "!",
|
38 |
+
"?": "?",
|
39 |
+
"\n": ".",
|
40 |
+
"·": ",",
|
41 |
+
"、": ",",
|
42 |
+
"...": "…",
|
43 |
+
"$": ".",
|
44 |
+
"/": ",",
|
45 |
+
"—": "-",
|
46 |
+
"~": "…",
|
47 |
+
"~":"…",
|
48 |
+
}
|
49 |
+
|
50 |
+
tone_modifier = ToneSandhi()
|
51 |
+
|
52 |
+
|
53 |
+
def replace_punctuation(text):
|
54 |
+
text = text.replace("嗯", "恩").replace("呣", "母")
|
55 |
+
pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
|
56 |
+
|
57 |
+
replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
|
58 |
+
|
59 |
+
replaced_text = re.sub(
|
60 |
+
r"[^\u4e00-\u9fa5" + "".join(punctuation) + r"]+", "", replaced_text
|
61 |
+
)
|
62 |
+
|
63 |
+
return replaced_text
|
64 |
+
|
65 |
+
|
66 |
+
def g2p(text):
|
67 |
+
pattern = r"(?<=[{0}])\s*".format("".join(punctuation))
|
68 |
+
sentences = [i for i in re.split(pattern, text) if i.strip() != ""]
|
69 |
+
phones, word2ph = _g2p(sentences)
|
70 |
+
return phones, word2ph
|
71 |
+
|
72 |
+
|
73 |
+
def _get_initials_finals(word):
|
74 |
+
initials = []
|
75 |
+
finals = []
|
76 |
+
|
77 |
+
orig_initials = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.INITIALS)
|
78 |
+
orig_finals = lazy_pinyin(
|
79 |
+
word, neutral_tone_with_five=True, style=Style.FINALS_TONE3
|
80 |
+
)
|
81 |
+
|
82 |
+
for c, v in zip(orig_initials, orig_finals):
|
83 |
+
initials.append(c)
|
84 |
+
finals.append(v)
|
85 |
+
return initials, finals
|
86 |
+
|
87 |
+
|
88 |
+
must_erhua = {
|
89 |
+
"小院儿", "胡同儿", "范儿", "老汉儿", "撒欢儿", "寻老礼儿", "妥妥儿", "媳妇儿"
|
90 |
+
}
|
91 |
+
not_erhua = {
|
92 |
+
"虐儿", "为儿", "护儿", "瞒儿", "救儿", "替儿", "有儿", "一儿", "我儿", "俺儿", "妻儿",
|
93 |
+
"拐儿", "聋儿", "乞儿", "患儿", "幼儿", "孤儿", "婴儿", "婴幼儿", "连体儿", "脑瘫儿",
|
94 |
+
"流浪儿", "体弱儿", "混血儿", "蜜雪儿", "舫儿", "祖儿", "美儿", "应采儿", "可儿", "侄儿",
|
95 |
+
"孙儿", "侄孙儿", "女儿", "男儿", "红孩儿", "花儿", "虫儿", "马儿", "鸟儿", "猪儿", "猫儿",
|
96 |
+
"狗儿", "少儿"
|
97 |
+
}
|
98 |
+
def _merge_erhua(initials: list[str],
|
99 |
+
finals: list[str],
|
100 |
+
word: str,
|
101 |
+
pos: str) -> list[list[str]]:
|
102 |
+
"""
|
103 |
+
Do erhub.
|
104 |
+
"""
|
105 |
+
# fix er1
|
106 |
+
for i, phn in enumerate(finals):
|
107 |
+
if i == len(finals) - 1 and word[i] == "儿" and phn == 'er1':
|
108 |
+
finals[i] = 'er2'
|
109 |
+
|
110 |
+
# 发音
|
111 |
+
if word not in must_erhua and (word in not_erhua or
|
112 |
+
pos in {"a", "j", "nr"}):
|
113 |
+
return initials, finals
|
114 |
+
|
115 |
+
# "……" 等情况直接返回
|
116 |
+
if len(finals) != len(word):
|
117 |
+
return initials, finals
|
118 |
+
|
119 |
+
assert len(finals) == len(word)
|
120 |
+
|
121 |
+
# 与前一个字发同音
|
122 |
+
new_initials = []
|
123 |
+
new_finals = []
|
124 |
+
for i, phn in enumerate(finals):
|
125 |
+
if i == len(finals) - 1 and word[i] == "儿" and phn in {
|
126 |
+
"er2", "er5"
|
127 |
+
} and word[-2:] not in not_erhua and new_finals:
|
128 |
+
phn = "er" + new_finals[-1][-1]
|
129 |
+
|
130 |
+
new_initials.append(initials[i])
|
131 |
+
new_finals.append(phn)
|
132 |
+
|
133 |
+
return new_initials, new_finals
|
134 |
+
|
135 |
+
|
136 |
+
def _g2p(segments):
|
137 |
+
phones_list = []
|
138 |
+
word2ph = []
|
139 |
+
for seg in segments:
|
140 |
+
pinyins = []
|
141 |
+
# Replace all English words in the sentence
|
142 |
+
seg = re.sub("[a-zA-Z]+", "", seg)
|
143 |
+
seg_cut = psg.lcut(seg)
|
144 |
+
seg_cut = tone_modifier.pre_merge_for_modify(seg_cut)
|
145 |
+
initials = []
|
146 |
+
finals = []
|
147 |
+
|
148 |
+
if not is_g2pw:
|
149 |
+
for word, pos in seg_cut:
|
150 |
+
if pos == "eng":
|
151 |
+
continue
|
152 |
+
sub_initials, sub_finals = _get_initials_finals(word)
|
153 |
+
sub_finals = tone_modifier.modified_tone(word, pos, sub_finals)
|
154 |
+
# 儿化
|
155 |
+
sub_initials, sub_finals = _merge_erhua(sub_initials, sub_finals, word, pos)
|
156 |
+
initials.append(sub_initials)
|
157 |
+
finals.append(sub_finals)
|
158 |
+
# assert len(sub_initials) == len(sub_finals) == len(word)
|
159 |
+
initials = sum(initials, [])
|
160 |
+
finals = sum(finals, [])
|
161 |
+
print("pypinyin结果",initials,finals)
|
162 |
+
else:
|
163 |
+
# g2pw采用整句推理
|
164 |
+
pinyins = g2pw.lazy_pinyin(seg, neutral_tone_with_five=True, style=Style.TONE3)
|
165 |
+
|
166 |
+
pre_word_length = 0
|
167 |
+
for word, pos in seg_cut:
|
168 |
+
sub_initials = []
|
169 |
+
sub_finals = []
|
170 |
+
now_word_length = pre_word_length + len(word)
|
171 |
+
|
172 |
+
if pos == 'eng':
|
173 |
+
pre_word_length = now_word_length
|
174 |
+
continue
|
175 |
+
|
176 |
+
word_pinyins = pinyins[pre_word_length:now_word_length]
|
177 |
+
|
178 |
+
# 多音字消歧
|
179 |
+
word_pinyins = correct_pronunciation(word,word_pinyins)
|
180 |
+
|
181 |
+
for pinyin in word_pinyins:
|
182 |
+
if pinyin[0].isalpha():
|
183 |
+
sub_initials.append(to_initials(pinyin))
|
184 |
+
sub_finals.append(to_finals_tone3(pinyin,neutral_tone_with_five=True))
|
185 |
+
else:
|
186 |
+
sub_initials.append(pinyin)
|
187 |
+
sub_finals.append(pinyin)
|
188 |
+
|
189 |
+
pre_word_length = now_word_length
|
190 |
+
sub_finals = tone_modifier.modified_tone(word, pos, sub_finals)
|
191 |
+
# 儿化
|
192 |
+
sub_initials, sub_finals = _merge_erhua(sub_initials, sub_finals, word, pos)
|
193 |
+
initials.append(sub_initials)
|
194 |
+
finals.append(sub_finals)
|
195 |
+
|
196 |
+
initials = sum(initials, [])
|
197 |
+
finals = sum(finals, [])
|
198 |
+
# print("g2pw结果",initials,finals)
|
199 |
+
|
200 |
+
for c, v in zip(initials, finals):
|
201 |
+
raw_pinyin = c + v
|
202 |
+
# NOTE: post process for pypinyin outputs
|
203 |
+
# we discriminate i, ii and iii
|
204 |
+
if c == v:
|
205 |
+
assert c in punctuation
|
206 |
+
phone = [c]
|
207 |
+
word2ph.append(1)
|
208 |
+
else:
|
209 |
+
v_without_tone = v[:-1]
|
210 |
+
tone = v[-1]
|
211 |
+
|
212 |
+
pinyin = c + v_without_tone
|
213 |
+
assert tone in "12345"
|
214 |
+
|
215 |
+
if c:
|
216 |
+
# 多音节
|
217 |
+
v_rep_map = {
|
218 |
+
"uei": "ui",
|
219 |
+
"iou": "iu",
|
220 |
+
"uen": "un",
|
221 |
+
}
|
222 |
+
if v_without_tone in v_rep_map.keys():
|
223 |
+
pinyin = c + v_rep_map[v_without_tone]
|
224 |
+
else:
|
225 |
+
# 单音节
|
226 |
+
pinyin_rep_map = {
|
227 |
+
"ing": "ying",
|
228 |
+
"i": "yi",
|
229 |
+
"in": "yin",
|
230 |
+
"u": "wu",
|
231 |
+
}
|
232 |
+
if pinyin in pinyin_rep_map.keys():
|
233 |
+
pinyin = pinyin_rep_map[pinyin]
|
234 |
+
else:
|
235 |
+
single_rep_map = {
|
236 |
+
"v": "yu",
|
237 |
+
"e": "e",
|
238 |
+
"i": "y",
|
239 |
+
"u": "w",
|
240 |
+
}
|
241 |
+
if pinyin[0] in single_rep_map.keys():
|
242 |
+
pinyin = single_rep_map[pinyin[0]] + pinyin[1:]
|
243 |
+
|
244 |
+
assert pinyin in pinyin_to_symbol_map.keys(), (pinyin, seg, raw_pinyin)
|
245 |
+
new_c, new_v = pinyin_to_symbol_map[pinyin].split(" ")
|
246 |
+
new_v = new_v + tone
|
247 |
+
phone = [new_c, new_v]
|
248 |
+
word2ph.append(len(phone))
|
249 |
+
|
250 |
+
phones_list += phone
|
251 |
+
return phones_list, word2ph
|
252 |
+
|
253 |
+
|
254 |
+
def replace_punctuation_with_en(text):
|
255 |
+
text = text.replace("嗯", "恩").replace("呣", "母")
|
256 |
+
pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
|
257 |
+
|
258 |
+
replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
|
259 |
+
|
260 |
+
replaced_text = re.sub(
|
261 |
+
r"[^\u4e00-\u9fa5A-Za-z" + "".join(punctuation) + r"]+", "", replaced_text
|
262 |
+
)
|
263 |
+
|
264 |
+
return replaced_text
|
265 |
+
|
266 |
+
def replace_consecutive_punctuation(text):
|
267 |
+
punctuations = ''.join(re.escape(p) for p in punctuation)
|
268 |
+
pattern = f'([{punctuations}])([{punctuations}])+'
|
269 |
+
result = re.sub(pattern, r'\1', text)
|
270 |
+
return result
|
271 |
+
|
272 |
+
def text_normalize(text):
|
273 |
+
# https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization
|
274 |
+
tx = TextNormalizer()
|
275 |
+
sentences = tx.normalize(text)
|
276 |
+
dest_text = ""
|
277 |
+
for sentence in sentences:
|
278 |
+
dest_text += replace_punctuation(sentence)
|
279 |
+
|
280 |
+
# 避免重复标点引起的参考泄露
|
281 |
+
dest_text = replace_consecutive_punctuation(dest_text)
|
282 |
+
return dest_text
|
283 |
+
|
284 |
+
# 不排除英文的文本格式化
|
285 |
+
def mix_text_normalize(text):
|
286 |
+
# https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization
|
287 |
+
tx = TextNormalizer()
|
288 |
+
sentences = tx.normalize(text)
|
289 |
+
dest_text = ""
|
290 |
+
for sentence in sentences:
|
291 |
+
dest_text += replace_punctuation_with_en(sentence)
|
292 |
+
|
293 |
+
# 避免重复标点引起的参考泄露
|
294 |
+
dest_text = replace_consecutive_punctuation(dest_text)
|
295 |
+
return dest_text
|
296 |
+
|
297 |
+
|
298 |
+
if __name__ == "__main__":
|
299 |
+
text = "啊——但是《原神》是由,米哈\游自主,研发的一款全.新开放世界.冒险游戏"
|
300 |
+
text = "呣呣呣~就是…大人的鼹鼠党吧?"
|
301 |
+
text = "你好"
|
302 |
+
text = text_normalize(text)
|
303 |
+
print(g2p(text))
|
304 |
+
|
305 |
+
|
306 |
+
# # 示例用法
|
307 |
+
# text = "这是一个示例文本:,你好!这是一个测试..."
|
308 |
+
# print(g2p_paddle(text)) # 输出: 这是一个示例文本你好这是一个测试
|
GPT_SoVITS/text/cleaner.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from text import cleaned_text_to_sequence
|
2 |
+
import os
|
3 |
+
# if os.environ.get("version","v1")=="v1":
|
4 |
+
# from text import chinese
|
5 |
+
# from text.symbols import symbols
|
6 |
+
# else:
|
7 |
+
# from text import chinese2 as chinese
|
8 |
+
# from text.symbols2 import symbols
|
9 |
+
|
10 |
+
from text import symbols as symbols_v1
|
11 |
+
from text import symbols2 as symbols_v2
|
12 |
+
|
13 |
+
special = [
|
14 |
+
# ("%", "zh", "SP"),
|
15 |
+
("¥", "zh", "SP2"),
|
16 |
+
("^", "zh", "SP3"),
|
17 |
+
# ('@', 'zh', "SP4")#不搞鬼畜了,和第二版保持一致吧
|
18 |
+
]
|
19 |
+
|
20 |
+
|
21 |
+
def clean_text(text, language, version=None):
|
22 |
+
if version is None:version=os.environ.get('version', 'v2')
|
23 |
+
if version == "v1":
|
24 |
+
symbols = symbols_v1.symbols
|
25 |
+
language_module_map = {"zh": "chinese", "ja": "japanese", "en": "english"}
|
26 |
+
else:
|
27 |
+
symbols = symbols_v2.symbols
|
28 |
+
language_module_map = {"zh": "chinese2", "ja": "japanese", "en": "english", "ko": "korean","yue":"cantonese"}
|
29 |
+
|
30 |
+
if(language not in language_module_map):
|
31 |
+
language="en"
|
32 |
+
text=" "
|
33 |
+
for special_s, special_l, target_symbol in special:
|
34 |
+
if special_s in text and language == special_l:
|
35 |
+
return clean_special(text, language, special_s, target_symbol, version)
|
36 |
+
language_module = __import__("text."+language_module_map[language],fromlist=[language_module_map[language]])
|
37 |
+
if hasattr(language_module,"text_normalize"):
|
38 |
+
norm_text = language_module.text_normalize(text)
|
39 |
+
else:
|
40 |
+
norm_text=text
|
41 |
+
if language == "zh" or language=="yue":##########
|
42 |
+
phones, word2ph = language_module.g2p(norm_text)
|
43 |
+
assert len(phones) == sum(word2ph)
|
44 |
+
assert len(norm_text) == len(word2ph)
|
45 |
+
elif language == "en":
|
46 |
+
phones = language_module.g2p(norm_text)
|
47 |
+
if len(phones) < 4:
|
48 |
+
phones = [','] + phones
|
49 |
+
word2ph = None
|
50 |
+
else:
|
51 |
+
phones = language_module.g2p(norm_text)
|
52 |
+
word2ph = None
|
53 |
+
phones = ['UNK' if ph not in symbols else ph for ph in phones]
|
54 |
+
return phones, word2ph, norm_text
|
55 |
+
|
56 |
+
|
57 |
+
def clean_special(text, language, special_s, target_symbol, version=None):
|
58 |
+
if version is None:version=os.environ.get('version', 'v2')
|
59 |
+
if version == "v1":
|
60 |
+
symbols = symbols_v1.symbols
|
61 |
+
language_module_map = {"zh": "chinese", "ja": "japanese", "en": "english"}
|
62 |
+
else:
|
63 |
+
symbols = symbols_v2.symbols
|
64 |
+
language_module_map = {"zh": "chinese2", "ja": "japanese", "en": "english", "ko": "korean","yue":"cantonese"}
|
65 |
+
|
66 |
+
"""
|
67 |
+
特殊静音段sp符号处理
|
68 |
+
"""
|
69 |
+
text = text.replace(special_s, ",")
|
70 |
+
language_module = __import__("text."+language_module_map[language],fromlist=[language_module_map[language]])
|
71 |
+
norm_text = language_module.text_normalize(text)
|
72 |
+
phones = language_module.g2p(norm_text)
|
73 |
+
new_ph = []
|
74 |
+
for ph in phones[0]:
|
75 |
+
assert ph in symbols
|
76 |
+
if ph == ",":
|
77 |
+
new_ph.append(target_symbol)
|
78 |
+
else:
|
79 |
+
new_ph.append(ph)
|
80 |
+
return new_ph, phones[1], norm_text
|
81 |
+
|
82 |
+
|
83 |
+
def text_to_sequence(text, language, version=None):
|
84 |
+
version = os.environ.get('version',version)
|
85 |
+
if version is None:version='v2'
|
86 |
+
phones = clean_text(text)
|
87 |
+
return cleaned_text_to_sequence(phones, version)
|
88 |
+
|
89 |
+
|
90 |
+
if __name__ == "__main__":
|
91 |
+
print(clean_text("你好%啊啊啊额、还是到付红四方。", "zh"))
|
GPT_SoVITS/text/cmudict-fast.rep
ADDED
The diff for this file is too large to render.
See raw diff
|
|
GPT_SoVITS/text/cmudict.rep
ADDED
The diff for this file is too large to render.
See raw diff
|
|
GPT_SoVITS/text/engdict-hot.rep
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
CHATGPT CH AE1 T JH IY1 P IY1 T IY1
|
2 |
+
JSON JH EY1 S AH0 N
|
3 |
+
CONDA K AA1 N D AH0
|
GPT_SoVITS/text/english.py
ADDED
@@ -0,0 +1,374 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pickle
|
2 |
+
import os
|
3 |
+
import re
|
4 |
+
import wordsegment
|
5 |
+
from g2p_en import G2p
|
6 |
+
|
7 |
+
from text.symbols import punctuation
|
8 |
+
|
9 |
+
from text.symbols2 import symbols
|
10 |
+
|
11 |
+
import unicodedata
|
12 |
+
from builtins import str as unicode
|
13 |
+
from g2p_en.expand import normalize_numbers
|
14 |
+
from nltk.tokenize import TweetTokenizer
|
15 |
+
word_tokenize = TweetTokenizer().tokenize
|
16 |
+
from nltk import pos_tag
|
17 |
+
|
18 |
+
current_file_path = os.path.dirname(__file__)
|
19 |
+
CMU_DICT_PATH = os.path.join(current_file_path, "cmudict.rep")
|
20 |
+
CMU_DICT_FAST_PATH = os.path.join(current_file_path, "cmudict-fast.rep")
|
21 |
+
CMU_DICT_HOT_PATH = os.path.join(current_file_path, "engdict-hot.rep")
|
22 |
+
CACHE_PATH = os.path.join(current_file_path, "engdict_cache.pickle")
|
23 |
+
NAMECACHE_PATH = os.path.join(current_file_path, "namedict_cache.pickle")
|
24 |
+
|
25 |
+
arpa = {
|
26 |
+
"AH0",
|
27 |
+
"S",
|
28 |
+
"AH1",
|
29 |
+
"EY2",
|
30 |
+
"AE2",
|
31 |
+
"EH0",
|
32 |
+
"OW2",
|
33 |
+
"UH0",
|
34 |
+
"NG",
|
35 |
+
"B",
|
36 |
+
"G",
|
37 |
+
"AY0",
|
38 |
+
"M",
|
39 |
+
"AA0",
|
40 |
+
"F",
|
41 |
+
"AO0",
|
42 |
+
"ER2",
|
43 |
+
"UH1",
|
44 |
+
"IY1",
|
45 |
+
"AH2",
|
46 |
+
"DH",
|
47 |
+
"IY0",
|
48 |
+
"EY1",
|
49 |
+
"IH0",
|
50 |
+
"K",
|
51 |
+
"N",
|
52 |
+
"W",
|
53 |
+
"IY2",
|
54 |
+
"T",
|
55 |
+
"AA1",
|
56 |
+
"ER1",
|
57 |
+
"EH2",
|
58 |
+
"OY0",
|
59 |
+
"UH2",
|
60 |
+
"UW1",
|
61 |
+
"Z",
|
62 |
+
"AW2",
|
63 |
+
"AW1",
|
64 |
+
"V",
|
65 |
+
"UW2",
|
66 |
+
"AA2",
|
67 |
+
"ER",
|
68 |
+
"AW0",
|
69 |
+
"UW0",
|
70 |
+
"R",
|
71 |
+
"OW1",
|
72 |
+
"EH1",
|
73 |
+
"ZH",
|
74 |
+
"AE0",
|
75 |
+
"IH2",
|
76 |
+
"IH",
|
77 |
+
"Y",
|
78 |
+
"JH",
|
79 |
+
"P",
|
80 |
+
"AY1",
|
81 |
+
"EY0",
|
82 |
+
"OY2",
|
83 |
+
"TH",
|
84 |
+
"HH",
|
85 |
+
"D",
|
86 |
+
"ER0",
|
87 |
+
"CH",
|
88 |
+
"AO1",
|
89 |
+
"AE1",
|
90 |
+
"AO2",
|
91 |
+
"OY1",
|
92 |
+
"AY2",
|
93 |
+
"IH1",
|
94 |
+
"OW0",
|
95 |
+
"L",
|
96 |
+
"SH",
|
97 |
+
}
|
98 |
+
|
99 |
+
|
100 |
+
def replace_phs(phs):
|
101 |
+
rep_map = {"'": "-"}
|
102 |
+
phs_new = []
|
103 |
+
for ph in phs:
|
104 |
+
if ph in symbols:
|
105 |
+
phs_new.append(ph)
|
106 |
+
elif ph in rep_map.keys():
|
107 |
+
phs_new.append(rep_map[ph])
|
108 |
+
else:
|
109 |
+
print("ph not in symbols: ", ph)
|
110 |
+
return phs_new
|
111 |
+
|
112 |
+
|
113 |
+
def replace_consecutive_punctuation(text):
|
114 |
+
punctuations = ''.join(re.escape(p) for p in punctuation)
|
115 |
+
pattern = f'([{punctuations}])([{punctuations}])+'
|
116 |
+
result = re.sub(pattern, r'\1', text)
|
117 |
+
return result
|
118 |
+
|
119 |
+
|
120 |
+
def read_dict():
|
121 |
+
g2p_dict = {}
|
122 |
+
start_line = 49
|
123 |
+
with open(CMU_DICT_PATH) as f:
|
124 |
+
line = f.readline()
|
125 |
+
line_index = 1
|
126 |
+
while line:
|
127 |
+
if line_index >= start_line:
|
128 |
+
line = line.strip()
|
129 |
+
word_split = line.split(" ")
|
130 |
+
word = word_split[0].lower()
|
131 |
+
|
132 |
+
syllable_split = word_split[1].split(" - ")
|
133 |
+
g2p_dict[word] = []
|
134 |
+
for syllable in syllable_split:
|
135 |
+
phone_split = syllable.split(" ")
|
136 |
+
g2p_dict[word].append(phone_split)
|
137 |
+
|
138 |
+
line_index = line_index + 1
|
139 |
+
line = f.readline()
|
140 |
+
|
141 |
+
return g2p_dict
|
142 |
+
|
143 |
+
|
144 |
+
def read_dict_new():
|
145 |
+
g2p_dict = {}
|
146 |
+
with open(CMU_DICT_PATH) as f:
|
147 |
+
line = f.readline()
|
148 |
+
line_index = 1
|
149 |
+
while line:
|
150 |
+
if line_index >= 57:
|
151 |
+
line = line.strip()
|
152 |
+
word_split = line.split(" ")
|
153 |
+
word = word_split[0].lower()
|
154 |
+
g2p_dict[word] = [word_split[1].split(" ")]
|
155 |
+
|
156 |
+
line_index = line_index + 1
|
157 |
+
line = f.readline()
|
158 |
+
|
159 |
+
with open(CMU_DICT_FAST_PATH) as f:
|
160 |
+
line = f.readline()
|
161 |
+
line_index = 1
|
162 |
+
while line:
|
163 |
+
if line_index >= 0:
|
164 |
+
line = line.strip()
|
165 |
+
word_split = line.split(" ")
|
166 |
+
word = word_split[0].lower()
|
167 |
+
if word not in g2p_dict:
|
168 |
+
g2p_dict[word] = [word_split[1:]]
|
169 |
+
|
170 |
+
line_index = line_index + 1
|
171 |
+
line = f.readline()
|
172 |
+
|
173 |
+
return g2p_dict
|
174 |
+
|
175 |
+
def hot_reload_hot(g2p_dict):
|
176 |
+
with open(CMU_DICT_HOT_PATH) as f:
|
177 |
+
line = f.readline()
|
178 |
+
line_index = 1
|
179 |
+
while line:
|
180 |
+
if line_index >= 0:
|
181 |
+
line = line.strip()
|
182 |
+
word_split = line.split(" ")
|
183 |
+
word = word_split[0].lower()
|
184 |
+
# 自定义发音词直接覆盖字典
|
185 |
+
g2p_dict[word] = [word_split[1:]]
|
186 |
+
|
187 |
+
line_index = line_index + 1
|
188 |
+
line = f.readline()
|
189 |
+
|
190 |
+
return g2p_dict
|
191 |
+
|
192 |
+
|
193 |
+
def cache_dict(g2p_dict, file_path):
|
194 |
+
with open(file_path, "wb") as pickle_file:
|
195 |
+
pickle.dump(g2p_dict, pickle_file)
|
196 |
+
|
197 |
+
|
198 |
+
def get_dict():
|
199 |
+
if os.path.exists(CACHE_PATH):
|
200 |
+
with open(CACHE_PATH, "rb") as pickle_file:
|
201 |
+
g2p_dict = pickle.load(pickle_file)
|
202 |
+
else:
|
203 |
+
g2p_dict = read_dict_new()
|
204 |
+
cache_dict(g2p_dict, CACHE_PATH)
|
205 |
+
|
206 |
+
g2p_dict = hot_reload_hot(g2p_dict)
|
207 |
+
|
208 |
+
return g2p_dict
|
209 |
+
|
210 |
+
|
211 |
+
def get_namedict():
|
212 |
+
if os.path.exists(NAMECACHE_PATH):
|
213 |
+
with open(NAMECACHE_PATH, "rb") as pickle_file:
|
214 |
+
name_dict = pickle.load(pickle_file)
|
215 |
+
else:
|
216 |
+
name_dict = {}
|
217 |
+
|
218 |
+
return name_dict
|
219 |
+
|
220 |
+
|
221 |
+
def text_normalize(text):
|
222 |
+
# todo: eng text normalize
|
223 |
+
# 适配中文及 g2p_en 标点
|
224 |
+
rep_map = {
|
225 |
+
"[;::��;]": ",",
|
226 |
+
'["’]': "'",
|
227 |
+
"。": ".",
|
228 |
+
"!": "!",
|
229 |
+
"?": "?",
|
230 |
+
}
|
231 |
+
for p, r in rep_map.items():
|
232 |
+
text = re.sub(p, r, text)
|
233 |
+
|
234 |
+
# 来自 g2p_en 文本格式化处理
|
235 |
+
# 增加大写兼容
|
236 |
+
text = unicode(text)
|
237 |
+
text = normalize_numbers(text)
|
238 |
+
text = ''.join(char for char in unicodedata.normalize('NFD', text)
|
239 |
+
if unicodedata.category(char) != 'Mn') # Strip accents
|
240 |
+
text = re.sub("[^ A-Za-z'.,?!\-]", "", text)
|
241 |
+
text = re.sub(r"(?i)i\.e\.", "that is", text)
|
242 |
+
text = re.sub(r"(?i)e\.g\.", "for example", text)
|
243 |
+
|
244 |
+
# 避免重复标点引起的参考泄露
|
245 |
+
text = replace_consecutive_punctuation(text)
|
246 |
+
|
247 |
+
return text
|
248 |
+
|
249 |
+
|
250 |
+
class en_G2p(G2p):
|
251 |
+
def __init__(self):
|
252 |
+
super().__init__()
|
253 |
+
# 分词初始化
|
254 |
+
wordsegment.load()
|
255 |
+
|
256 |
+
# 扩展过时字典, 添加姓名字典
|
257 |
+
self.cmu = get_dict()
|
258 |
+
self.namedict = get_namedict()
|
259 |
+
|
260 |
+
# 剔除读音错误的几个缩写
|
261 |
+
for word in ["AE", "AI", "AR", "IOS", "HUD", "OS"]:
|
262 |
+
del self.cmu[word.lower()]
|
263 |
+
|
264 |
+
# 修正多音字
|
265 |
+
self.homograph2features["read"] = (['R', 'IY1', 'D'], ['R', 'EH1', 'D'], 'VBP')
|
266 |
+
self.homograph2features["complex"] = (['K', 'AH0', 'M', 'P', 'L', 'EH1', 'K', 'S'], ['K', 'AA1', 'M', 'P', 'L', 'EH0', 'K', 'S'], 'JJ')
|
267 |
+
|
268 |
+
|
269 |
+
def __call__(self, text):
|
270 |
+
# tokenization
|
271 |
+
words = word_tokenize(text)
|
272 |
+
tokens = pos_tag(words) # tuples of (word, tag)
|
273 |
+
|
274 |
+
# steps
|
275 |
+
prons = []
|
276 |
+
for o_word, pos in tokens:
|
277 |
+
# 还原 g2p_en 小写操作逻辑
|
278 |
+
word = o_word.lower()
|
279 |
+
|
280 |
+
if re.search("[a-z]", word) is None:
|
281 |
+
pron = [word]
|
282 |
+
# 先把单字母推出去
|
283 |
+
elif len(word) == 1:
|
284 |
+
# 单读 A 发音修正, 这里需要原格式 o_word 判断大写
|
285 |
+
if o_word == "A":
|
286 |
+
pron = ['EY1']
|
287 |
+
else:
|
288 |
+
pron = self.cmu[word][0]
|
289 |
+
# g2p_en 原版多音字处理
|
290 |
+
elif word in self.homograph2features: # Check homograph
|
291 |
+
pron1, pron2, pos1 = self.homograph2features[word]
|
292 |
+
if pos.startswith(pos1):
|
293 |
+
pron = pron1
|
294 |
+
# pos1比pos长仅出现在read
|
295 |
+
elif len(pos) < len(pos1) and pos == pos1[:len(pos)]:
|
296 |
+
pron = pron1
|
297 |
+
else:
|
298 |
+
pron = pron2
|
299 |
+
else:
|
300 |
+
# 递归查找预测
|
301 |
+
pron = self.qryword(o_word)
|
302 |
+
|
303 |
+
prons.extend(pron)
|
304 |
+
prons.extend([" "])
|
305 |
+
|
306 |
+
return prons[:-1]
|
307 |
+
|
308 |
+
|
309 |
+
def qryword(self, o_word):
|
310 |
+
word = o_word.lower()
|
311 |
+
|
312 |
+
# 查字典, 单字母除外
|
313 |
+
if len(word) > 1 and word in self.cmu: # lookup CMU dict
|
314 |
+
return self.cmu[word][0]
|
315 |
+
|
316 |
+
# 单词仅首字母大写时查找姓名字典
|
317 |
+
if o_word.istitle() and word in self.namedict:
|
318 |
+
return self.namedict[word][0]
|
319 |
+
|
320 |
+
# oov 长度小于等于 3 直接读字母
|
321 |
+
if len(word) <= 3:
|
322 |
+
phones = []
|
323 |
+
for w in word:
|
324 |
+
# 单读 A 发音修正, 此处不存在大写的情况
|
325 |
+
if w == "a":
|
326 |
+
phones.extend(['EY1'])
|
327 |
+
elif not w.isalpha():
|
328 |
+
phones.extend([w])
|
329 |
+
else:
|
330 |
+
phones.extend(self.cmu[w][0])
|
331 |
+
return phones
|
332 |
+
|
333 |
+
# 尝试分离所有格
|
334 |
+
if re.match(r"^([a-z]+)('s)$", word):
|
335 |
+
phones = self.qryword(word[:-2])[:]
|
336 |
+
# P T K F TH HH 无声辅音结尾 's 发 ['S']
|
337 |
+
if phones[-1] in ['P', 'T', 'K', 'F', 'TH', 'HH']:
|
338 |
+
phones.extend(['S'])
|
339 |
+
# S Z SH ZH CH JH 擦声结尾 's 发 ['IH1', 'Z'] 或 ['AH0', 'Z']
|
340 |
+
elif phones[-1] in ['S', 'Z', 'SH', 'ZH', 'CH', 'JH']:
|
341 |
+
phones.extend(['AH0', 'Z'])
|
342 |
+
# B D G DH V M N NG L R W Y 有声辅音结尾 's 发 ['Z']
|
343 |
+
# AH0 AH1 AH2 EY0 EY1 EY2 AE0 AE1 AE2 EH0 EH1 EH2 OW0 OW1 OW2 UH0 UH1 UH2 IY0 IY1 IY2 AA0 AA1 AA2 AO0 AO1 AO2
|
344 |
+
# ER ER0 ER1 ER2 UW0 UW1 UW2 AY0 AY1 AY2 AW0 AW1 AW2 OY0 OY1 OY2 IH IH0 IH1 IH2 元音结尾 's 发 ['Z']
|
345 |
+
else:
|
346 |
+
phones.extend(['Z'])
|
347 |
+
return phones
|
348 |
+
|
349 |
+
# 尝试进行分词,应对复合词
|
350 |
+
comps = wordsegment.segment(word.lower())
|
351 |
+
|
352 |
+
# 无法分词的送回去预测
|
353 |
+
if len(comps)==1:
|
354 |
+
return self.predict(word)
|
355 |
+
|
356 |
+
# 可以分词的递归处理
|
357 |
+
return [phone for comp in comps for phone in self.qryword(comp)]
|
358 |
+
|
359 |
+
|
360 |
+
_g2p = en_G2p()
|
361 |
+
|
362 |
+
|
363 |
+
def g2p(text):
|
364 |
+
# g2p_en 整段推理,剔除不存在的arpa返回
|
365 |
+
phone_list = _g2p(text)
|
366 |
+
phones = [ph if ph != "<unk>" else "UNK" for ph in phone_list if ph not in [" ", "<pad>", "UW", "</s>", "<s>"]]
|
367 |
+
|
368 |
+
return replace_phs(phones)
|
369 |
+
|
370 |
+
|
371 |
+
if __name__ == "__main__":
|
372 |
+
print(g2p("hello"))
|
373 |
+
print(g2p(text_normalize("e.g. I used openai's AI tool to draw a picture.")))
|
374 |
+
print(g2p(text_normalize("In this; paper, we propose 1 DSPGAN, a GAN-based universal vocoder.")))
|
GPT_SoVITS/text/g2pw/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from text.g2pw.g2pw import *
|
GPT_SoVITS/text/g2pw/dataset.py
ADDED
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
"""
|
15 |
+
Credits
|
16 |
+
This code is modified from https://github.com/GitYCC/g2pW
|
17 |
+
"""
|
18 |
+
from typing import Dict
|
19 |
+
from typing import List
|
20 |
+
from typing import Tuple
|
21 |
+
|
22 |
+
import numpy as np
|
23 |
+
|
24 |
+
from .utils import tokenize_and_map
|
25 |
+
|
26 |
+
ANCHOR_CHAR = '▁'
|
27 |
+
|
28 |
+
|
29 |
+
def prepare_onnx_input(tokenizer,
|
30 |
+
labels: List[str],
|
31 |
+
char2phonemes: Dict[str, List[int]],
|
32 |
+
chars: List[str],
|
33 |
+
texts: List[str],
|
34 |
+
query_ids: List[int],
|
35 |
+
use_mask: bool=False,
|
36 |
+
window_size: int=None,
|
37 |
+
max_len: int=512) -> Dict[str, np.array]:
|
38 |
+
if window_size is not None:
|
39 |
+
truncated_texts, truncated_query_ids = _truncate_texts(
|
40 |
+
window_size=window_size, texts=texts, query_ids=query_ids)
|
41 |
+
input_ids = []
|
42 |
+
token_type_ids = []
|
43 |
+
attention_masks = []
|
44 |
+
phoneme_masks = []
|
45 |
+
char_ids = []
|
46 |
+
position_ids = []
|
47 |
+
|
48 |
+
for idx in range(len(texts)):
|
49 |
+
text = (truncated_texts if window_size else texts)[idx].lower()
|
50 |
+
query_id = (truncated_query_ids if window_size else query_ids)[idx]
|
51 |
+
|
52 |
+
try:
|
53 |
+
tokens, text2token, token2text = tokenize_and_map(
|
54 |
+
tokenizer=tokenizer, text=text)
|
55 |
+
except Exception:
|
56 |
+
print(f'warning: text "{text}" is invalid')
|
57 |
+
return {}
|
58 |
+
|
59 |
+
text, query_id, tokens, text2token, token2text = _truncate(
|
60 |
+
max_len=max_len,
|
61 |
+
text=text,
|
62 |
+
query_id=query_id,
|
63 |
+
tokens=tokens,
|
64 |
+
text2token=text2token,
|
65 |
+
token2text=token2text)
|
66 |
+
|
67 |
+
processed_tokens = ['[CLS]'] + tokens + ['[SEP]']
|
68 |
+
|
69 |
+
input_id = list(
|
70 |
+
np.array(tokenizer.convert_tokens_to_ids(processed_tokens)))
|
71 |
+
token_type_id = list(np.zeros((len(processed_tokens), ), dtype=int))
|
72 |
+
attention_mask = list(np.ones((len(processed_tokens), ), dtype=int))
|
73 |
+
|
74 |
+
query_char = text[query_id]
|
75 |
+
phoneme_mask = [1 if i in char2phonemes[query_char] else 0 for i in range(len(labels))] \
|
76 |
+
if use_mask else [1] * len(labels)
|
77 |
+
char_id = chars.index(query_char)
|
78 |
+
position_id = text2token[
|
79 |
+
query_id] + 1 # [CLS] token locate at first place
|
80 |
+
|
81 |
+
input_ids.append(input_id)
|
82 |
+
token_type_ids.append(token_type_id)
|
83 |
+
attention_masks.append(attention_mask)
|
84 |
+
phoneme_masks.append(phoneme_mask)
|
85 |
+
char_ids.append(char_id)
|
86 |
+
position_ids.append(position_id)
|
87 |
+
|
88 |
+
outputs = {
|
89 |
+
'input_ids': np.array(input_ids).astype(np.int64),
|
90 |
+
'token_type_ids': np.array(token_type_ids).astype(np.int64),
|
91 |
+
'attention_masks': np.array(attention_masks).astype(np.int64),
|
92 |
+
'phoneme_masks': np.array(phoneme_masks).astype(np.float32),
|
93 |
+
'char_ids': np.array(char_ids).astype(np.int64),
|
94 |
+
'position_ids': np.array(position_ids).astype(np.int64),
|
95 |
+
}
|
96 |
+
return outputs
|
97 |
+
|
98 |
+
|
99 |
+
def _truncate_texts(window_size: int, texts: List[str],
|
100 |
+
query_ids: List[int]) -> Tuple[List[str], List[int]]:
|
101 |
+
truncated_texts = []
|
102 |
+
truncated_query_ids = []
|
103 |
+
for text, query_id in zip(texts, query_ids):
|
104 |
+
start = max(0, query_id - window_size // 2)
|
105 |
+
end = min(len(text), query_id + window_size // 2)
|
106 |
+
truncated_text = text[start:end]
|
107 |
+
truncated_texts.append(truncated_text)
|
108 |
+
|
109 |
+
truncated_query_id = query_id - start
|
110 |
+
truncated_query_ids.append(truncated_query_id)
|
111 |
+
return truncated_texts, truncated_query_ids
|
112 |
+
|
113 |
+
|
114 |
+
def _truncate(max_len: int,
|
115 |
+
text: str,
|
116 |
+
query_id: int,
|
117 |
+
tokens: List[str],
|
118 |
+
text2token: List[int],
|
119 |
+
token2text: List[Tuple[int]]):
|
120 |
+
truncate_len = max_len - 2
|
121 |
+
if len(tokens) <= truncate_len:
|
122 |
+
return (text, query_id, tokens, text2token, token2text)
|
123 |
+
|
124 |
+
token_position = text2token[query_id]
|
125 |
+
|
126 |
+
token_start = token_position - truncate_len // 2
|
127 |
+
token_end = token_start + truncate_len
|
128 |
+
font_exceed_dist = -token_start
|
129 |
+
back_exceed_dist = token_end - len(tokens)
|
130 |
+
if font_exceed_dist > 0:
|
131 |
+
token_start += font_exceed_dist
|
132 |
+
token_end += font_exceed_dist
|
133 |
+
elif back_exceed_dist > 0:
|
134 |
+
token_start -= back_exceed_dist
|
135 |
+
token_end -= back_exceed_dist
|
136 |
+
|
137 |
+
start = token2text[token_start][0]
|
138 |
+
end = token2text[token_end - 1][1]
|
139 |
+
|
140 |
+
return (text[start:end], query_id - start, tokens[token_start:token_end], [
|
141 |
+
i - token_start if i is not None else None
|
142 |
+
for i in text2token[start:end]
|
143 |
+
], [(s - start, e - start) for s, e in token2text[token_start:token_end]])
|
144 |
+
|
145 |
+
|
146 |
+
def get_phoneme_labels(polyphonic_chars: List[List[str]]
|
147 |
+
) -> Tuple[List[str], Dict[str, List[int]]]:
|
148 |
+
labels = sorted(list(set([phoneme for char, phoneme in polyphonic_chars])))
|
149 |
+
char2phonemes = {}
|
150 |
+
for char, phoneme in polyphonic_chars:
|
151 |
+
if char not in char2phonemes:
|
152 |
+
char2phonemes[char] = []
|
153 |
+
char2phonemes[char].append(labels.index(phoneme))
|
154 |
+
return labels, char2phonemes
|
155 |
+
|
156 |
+
|
157 |
+
def get_char_phoneme_labels(polyphonic_chars: List[List[str]]
|
158 |
+
) -> Tuple[List[str], Dict[str, List[int]]]:
|
159 |
+
labels = sorted(
|
160 |
+
list(set([f'{char} {phoneme}' for char, phoneme in polyphonic_chars])))
|
161 |
+
char2phonemes = {}
|
162 |
+
for char, phoneme in polyphonic_chars:
|
163 |
+
if char not in char2phonemes:
|
164 |
+
char2phonemes[char] = []
|
165 |
+
char2phonemes[char].append(labels.index(f'{char} {phoneme}'))
|
166 |
+
return labels, char2phonemes
|
GPT_SoVITS/text/g2pw/g2pw.py
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This code is modified from https://github.com/mozillazg/pypinyin-g2pW
|
2 |
+
|
3 |
+
import pickle
|
4 |
+
import os
|
5 |
+
|
6 |
+
from pypinyin.constants import RE_HANS
|
7 |
+
from pypinyin.core import Pinyin, Style
|
8 |
+
from pypinyin.seg.simpleseg import simple_seg
|
9 |
+
from pypinyin.converter import UltimateConverter
|
10 |
+
from pypinyin.contrib.tone_convert import to_tone
|
11 |
+
from .onnx_api import G2PWOnnxConverter
|
12 |
+
|
13 |
+
current_file_path = os.path.dirname(__file__)
|
14 |
+
CACHE_PATH = os.path.join(current_file_path, "polyphonic.pickle")
|
15 |
+
PP_DICT_PATH = os.path.join(current_file_path, "polyphonic.rep")
|
16 |
+
PP_FIX_DICT_PATH = os.path.join(current_file_path, "polyphonic-fix.rep")
|
17 |
+
|
18 |
+
|
19 |
+
class G2PWPinyin(Pinyin):
|
20 |
+
def __init__(self, model_dir='G2PWModel/', model_source=None,
|
21 |
+
enable_non_tradional_chinese=True,
|
22 |
+
v_to_u=False, neutral_tone_with_five=False, tone_sandhi=False, **kwargs):
|
23 |
+
self._g2pw = G2PWOnnxConverter(
|
24 |
+
model_dir=model_dir,
|
25 |
+
style='pinyin',
|
26 |
+
model_source=model_source,
|
27 |
+
enable_non_tradional_chinese=enable_non_tradional_chinese,
|
28 |
+
)
|
29 |
+
self._converter = Converter(
|
30 |
+
self._g2pw, v_to_u=v_to_u,
|
31 |
+
neutral_tone_with_five=neutral_tone_with_five,
|
32 |
+
tone_sandhi=tone_sandhi,
|
33 |
+
)
|
34 |
+
|
35 |
+
def get_seg(self, **kwargs):
|
36 |
+
return simple_seg
|
37 |
+
|
38 |
+
|
39 |
+
class Converter(UltimateConverter):
|
40 |
+
def __init__(self, g2pw_instance, v_to_u=False,
|
41 |
+
neutral_tone_with_five=False,
|
42 |
+
tone_sandhi=False, **kwargs):
|
43 |
+
super(Converter, self).__init__(
|
44 |
+
v_to_u=v_to_u,
|
45 |
+
neutral_tone_with_five=neutral_tone_with_five,
|
46 |
+
tone_sandhi=tone_sandhi, **kwargs)
|
47 |
+
|
48 |
+
self._g2pw = g2pw_instance
|
49 |
+
|
50 |
+
def convert(self, words, style, heteronym, errors, strict, **kwargs):
|
51 |
+
pys = []
|
52 |
+
if RE_HANS.match(words):
|
53 |
+
pys = self._to_pinyin(words, style=style, heteronym=heteronym,
|
54 |
+
errors=errors, strict=strict)
|
55 |
+
post_data = self.post_pinyin(words, heteronym, pys)
|
56 |
+
if post_data is not None:
|
57 |
+
pys = post_data
|
58 |
+
|
59 |
+
pys = self.convert_styles(
|
60 |
+
pys, words, style, heteronym, errors, strict)
|
61 |
+
|
62 |
+
else:
|
63 |
+
py = self.handle_nopinyin(words, style=style, errors=errors,
|
64 |
+
heteronym=heteronym, strict=strict)
|
65 |
+
if py:
|
66 |
+
pys.extend(py)
|
67 |
+
|
68 |
+
return _remove_dup_and_empty(pys)
|
69 |
+
|
70 |
+
def _to_pinyin(self, han, style, heteronym, errors, strict, **kwargs):
|
71 |
+
pinyins = []
|
72 |
+
|
73 |
+
g2pw_pinyin = self._g2pw(han)
|
74 |
+
|
75 |
+
if not g2pw_pinyin: # g2pw 不支持的汉字改为使用 pypinyin 原有逻辑
|
76 |
+
return super(Converter, self).convert(
|
77 |
+
han, Style.TONE, heteronym, errors, strict, **kwargs)
|
78 |
+
|
79 |
+
for i, item in enumerate(g2pw_pinyin[0]):
|
80 |
+
if item is None: # g2pw 不支持的汉字改为使用 pypinyin 原有逻辑
|
81 |
+
py = super(Converter, self).convert(
|
82 |
+
han[i], Style.TONE, heteronym, errors, strict, **kwargs)
|
83 |
+
pinyins.extend(py)
|
84 |
+
else:
|
85 |
+
pinyins.append([to_tone(item)])
|
86 |
+
|
87 |
+
return pinyins
|
88 |
+
|
89 |
+
|
90 |
+
def _remove_dup_items(lst, remove_empty=False):
|
91 |
+
new_lst = []
|
92 |
+
for item in lst:
|
93 |
+
if remove_empty and not item:
|
94 |
+
continue
|
95 |
+
if item not in new_lst:
|
96 |
+
new_lst.append(item)
|
97 |
+
return new_lst
|
98 |
+
|
99 |
+
|
100 |
+
def _remove_dup_and_empty(lst_list):
|
101 |
+
new_lst_list = []
|
102 |
+
for lst in lst_list:
|
103 |
+
lst = _remove_dup_items(lst, remove_empty=True)
|
104 |
+
if lst:
|
105 |
+
new_lst_list.append(lst)
|
106 |
+
else:
|
107 |
+
new_lst_list.append([''])
|
108 |
+
|
109 |
+
return new_lst_list
|
110 |
+
|
111 |
+
|
112 |
+
def cache_dict(polyphonic_dict, file_path):
|
113 |
+
with open(file_path, "wb") as pickle_file:
|
114 |
+
pickle.dump(polyphonic_dict, pickle_file)
|
115 |
+
|
116 |
+
|
117 |
+
def get_dict():
|
118 |
+
if os.path.exists(CACHE_PATH):
|
119 |
+
with open(CACHE_PATH, "rb") as pickle_file:
|
120 |
+
polyphonic_dict = pickle.load(pickle_file)
|
121 |
+
else:
|
122 |
+
polyphonic_dict = read_dict()
|
123 |
+
cache_dict(polyphonic_dict, CACHE_PATH)
|
124 |
+
|
125 |
+
return polyphonic_dict
|
126 |
+
|
127 |
+
|
128 |
+
def read_dict():
|
129 |
+
polyphonic_dict = {}
|
130 |
+
with open(PP_DICT_PATH,encoding="utf-8") as f:
|
131 |
+
line = f.readline()
|
132 |
+
while line:
|
133 |
+
key, value_str = line.split(':')
|
134 |
+
value = eval(value_str.strip())
|
135 |
+
polyphonic_dict[key.strip()] = value
|
136 |
+
line = f.readline()
|
137 |
+
with open(PP_FIX_DICT_PATH,encoding="utf-8") as f:
|
138 |
+
line = f.readline()
|
139 |
+
while line:
|
140 |
+
key, value_str = line.split(':')
|
141 |
+
value = eval(value_str.strip())
|
142 |
+
polyphonic_dict[key.strip()] = value
|
143 |
+
line = f.readline()
|
144 |
+
return polyphonic_dict
|
145 |
+
|
146 |
+
|
147 |
+
def correct_pronunciation(word,word_pinyins):
|
148 |
+
if word in pp_dict:
|
149 |
+
word_pinyins = pp_dict[word]
|
150 |
+
|
151 |
+
return word_pinyins
|
152 |
+
|
153 |
+
|
154 |
+
pp_dict = get_dict()
|
GPT_SoVITS/text/g2pw/onnx_api.py
ADDED
@@ -0,0 +1,241 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This code is modified from https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/g2pw
|
2 |
+
# This code is modified from https://github.com/GitYCC/g2pW
|
3 |
+
|
4 |
+
import warnings
|
5 |
+
warnings.filterwarnings("ignore")
|
6 |
+
import json
|
7 |
+
import os
|
8 |
+
import zipfile,requests
|
9 |
+
from typing import Any
|
10 |
+
from typing import Dict
|
11 |
+
from typing import List
|
12 |
+
from typing import Tuple
|
13 |
+
|
14 |
+
import numpy as np
|
15 |
+
import onnxruntime
|
16 |
+
onnxruntime.set_default_logger_severity(3)
|
17 |
+
from opencc import OpenCC
|
18 |
+
from transformers import AutoTokenizer
|
19 |
+
from pypinyin import pinyin
|
20 |
+
from pypinyin import Style
|
21 |
+
|
22 |
+
from .dataset import get_char_phoneme_labels
|
23 |
+
from .dataset import get_phoneme_labels
|
24 |
+
from .dataset import prepare_onnx_input
|
25 |
+
from .utils import load_config
|
26 |
+
from ..zh_normalization.char_convert import tranditional_to_simplified
|
27 |
+
|
28 |
+
model_version = '1.1'
|
29 |
+
|
30 |
+
|
31 |
+
def predict(session, onnx_input: Dict[str, Any],
|
32 |
+
labels: List[str]) -> Tuple[List[str], List[float]]:
|
33 |
+
all_preds = []
|
34 |
+
all_confidences = []
|
35 |
+
probs = session.run([], {
|
36 |
+
"input_ids": onnx_input['input_ids'],
|
37 |
+
"token_type_ids": onnx_input['token_type_ids'],
|
38 |
+
"attention_mask": onnx_input['attention_masks'],
|
39 |
+
"phoneme_mask": onnx_input['phoneme_masks'],
|
40 |
+
"char_ids": onnx_input['char_ids'],
|
41 |
+
"position_ids": onnx_input['position_ids']
|
42 |
+
})[0]
|
43 |
+
|
44 |
+
preds = np.argmax(probs, axis=1).tolist()
|
45 |
+
max_probs = []
|
46 |
+
for index, arr in zip(preds, probs.tolist()):
|
47 |
+
max_probs.append(arr[index])
|
48 |
+
all_preds += [labels[pred] for pred in preds]
|
49 |
+
all_confidences += max_probs
|
50 |
+
|
51 |
+
return all_preds, all_confidences
|
52 |
+
|
53 |
+
|
54 |
+
def download_and_decompress(model_dir: str='G2PWModel/'):
|
55 |
+
if not os.path.exists(model_dir):
|
56 |
+
parent_directory = os.path.dirname(model_dir)
|
57 |
+
zip_dir = os.path.join(parent_directory,"G2PWModel_1.1.zip")
|
58 |
+
extract_dir = os.path.join(parent_directory,"G2PWModel_1.1")
|
59 |
+
extract_dir_new = os.path.join(parent_directory,"G2PWModel")
|
60 |
+
print("Downloading g2pw model...")
|
61 |
+
modelscope_url = "https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip"
|
62 |
+
with requests.get(modelscope_url, stream=True) as r:
|
63 |
+
r.raise_for_status()
|
64 |
+
with open(zip_dir, 'wb') as f:
|
65 |
+
for chunk in r.iter_content(chunk_size=8192):
|
66 |
+
if chunk:
|
67 |
+
f.write(chunk)
|
68 |
+
|
69 |
+
print("Extracting g2pw model...")
|
70 |
+
with zipfile.ZipFile(zip_dir, "r") as zip_ref:
|
71 |
+
zip_ref.extractall(parent_directory)
|
72 |
+
|
73 |
+
os.rename(extract_dir, extract_dir_new)
|
74 |
+
|
75 |
+
return model_dir
|
76 |
+
|
77 |
+
class G2PWOnnxConverter:
|
78 |
+
def __init__(self,
|
79 |
+
model_dir: str='G2PWModel/',
|
80 |
+
style: str='bopomofo',
|
81 |
+
model_source: str=None,
|
82 |
+
enable_non_tradional_chinese: bool=False):
|
83 |
+
uncompress_path = download_and_decompress(model_dir)
|
84 |
+
|
85 |
+
sess_options = onnxruntime.SessionOptions()
|
86 |
+
sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
|
87 |
+
sess_options.execution_mode = onnxruntime.ExecutionMode.ORT_SEQUENTIAL
|
88 |
+
sess_options.intra_op_num_threads = 2
|
89 |
+
try:
|
90 |
+
self.session_g2pW = onnxruntime.InferenceSession(os.path.join(uncompress_path, 'g2pW.onnx'),sess_options=sess_options, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
|
91 |
+
except:
|
92 |
+
self.session_g2pW = onnxruntime.InferenceSession(os.path.join(uncompress_path, 'g2pW.onnx'),sess_options=sess_options, providers=['CPUExecutionProvider'])
|
93 |
+
self.config = load_config(
|
94 |
+
config_path=os.path.join(uncompress_path, 'config.py'),
|
95 |
+
use_default=True)
|
96 |
+
|
97 |
+
self.model_source = model_source if model_source else self.config.model_source
|
98 |
+
self.enable_opencc = enable_non_tradional_chinese
|
99 |
+
|
100 |
+
self.tokenizer = AutoTokenizer.from_pretrained(self.model_source)
|
101 |
+
|
102 |
+
polyphonic_chars_path = os.path.join(uncompress_path,
|
103 |
+
'POLYPHONIC_CHARS.txt')
|
104 |
+
monophonic_chars_path = os.path.join(uncompress_path,
|
105 |
+
'MONOPHONIC_CHARS.txt')
|
106 |
+
self.polyphonic_chars = [
|
107 |
+
line.split('\t')
|
108 |
+
for line in open(polyphonic_chars_path, encoding='utf-8').read()
|
109 |
+
.strip().split('\n')
|
110 |
+
]
|
111 |
+
self.non_polyphonic = {
|
112 |
+
'一', '不', '和', '咋', '嗲', '剖', '差', '攢', '倒', '難', '奔', '勁', '拗',
|
113 |
+
'肖', '瘙', '誒', '泊', '听', '噢'
|
114 |
+
}
|
115 |
+
self.non_monophonic = {'似', '攢'}
|
116 |
+
self.monophonic_chars = [
|
117 |
+
line.split('\t')
|
118 |
+
for line in open(monophonic_chars_path, encoding='utf-8').read()
|
119 |
+
.strip().split('\n')
|
120 |
+
]
|
121 |
+
self.labels, self.char2phonemes = get_char_phoneme_labels(
|
122 |
+
polyphonic_chars=self.polyphonic_chars
|
123 |
+
) if self.config.use_char_phoneme else get_phoneme_labels(
|
124 |
+
polyphonic_chars=self.polyphonic_chars)
|
125 |
+
|
126 |
+
self.chars = sorted(list(self.char2phonemes.keys()))
|
127 |
+
|
128 |
+
self.polyphonic_chars_new = set(self.chars)
|
129 |
+
for char in self.non_polyphonic:
|
130 |
+
if char in self.polyphonic_chars_new:
|
131 |
+
self.polyphonic_chars_new.remove(char)
|
132 |
+
|
133 |
+
self.monophonic_chars_dict = {
|
134 |
+
char: phoneme
|
135 |
+
for char, phoneme in self.monophonic_chars
|
136 |
+
}
|
137 |
+
for char in self.non_monophonic:
|
138 |
+
if char in self.monophonic_chars_dict:
|
139 |
+
self.monophonic_chars_dict.pop(char)
|
140 |
+
|
141 |
+
self.pos_tags = [
|
142 |
+
'UNK', 'A', 'C', 'D', 'I', 'N', 'P', 'T', 'V', 'DE', 'SHI'
|
143 |
+
]
|
144 |
+
|
145 |
+
with open(
|
146 |
+
os.path.join(uncompress_path,
|
147 |
+
'bopomofo_to_pinyin_wo_tune_dict.json'),
|
148 |
+
'r',
|
149 |
+
encoding='utf-8') as fr:
|
150 |
+
self.bopomofo_convert_dict = json.load(fr)
|
151 |
+
self.style_convert_func = {
|
152 |
+
'bopomofo': lambda x: x,
|
153 |
+
'pinyin': self._convert_bopomofo_to_pinyin,
|
154 |
+
}[style]
|
155 |
+
|
156 |
+
with open(
|
157 |
+
os.path.join(uncompress_path, 'char_bopomofo_dict.json'),
|
158 |
+
'r',
|
159 |
+
encoding='utf-8') as fr:
|
160 |
+
self.char_bopomofo_dict = json.load(fr)
|
161 |
+
|
162 |
+
if self.enable_opencc:
|
163 |
+
self.cc = OpenCC('s2tw')
|
164 |
+
|
165 |
+
def _convert_bopomofo_to_pinyin(self, bopomofo: str) -> str:
|
166 |
+
tone = bopomofo[-1]
|
167 |
+
assert tone in '12345'
|
168 |
+
component = self.bopomofo_convert_dict.get(bopomofo[:-1])
|
169 |
+
if component:
|
170 |
+
return component + tone
|
171 |
+
else:
|
172 |
+
print(f'Warning: "{bopomofo}" cannot convert to pinyin')
|
173 |
+
return None
|
174 |
+
|
175 |
+
def __call__(self, sentences: List[str]) -> List[List[str]]:
|
176 |
+
if isinstance(sentences, str):
|
177 |
+
sentences = [sentences]
|
178 |
+
|
179 |
+
if self.enable_opencc:
|
180 |
+
translated_sentences = []
|
181 |
+
for sent in sentences:
|
182 |
+
translated_sent = self.cc.convert(sent)
|
183 |
+
assert len(translated_sent) == len(sent)
|
184 |
+
translated_sentences.append(translated_sent)
|
185 |
+
sentences = translated_sentences
|
186 |
+
|
187 |
+
texts, query_ids, sent_ids, partial_results = self._prepare_data(
|
188 |
+
sentences=sentences)
|
189 |
+
if len(texts) == 0:
|
190 |
+
# sentences no polyphonic words
|
191 |
+
return partial_results
|
192 |
+
|
193 |
+
onnx_input = prepare_onnx_input(
|
194 |
+
tokenizer=self.tokenizer,
|
195 |
+
labels=self.labels,
|
196 |
+
char2phonemes=self.char2phonemes,
|
197 |
+
chars=self.chars,
|
198 |
+
texts=texts,
|
199 |
+
query_ids=query_ids,
|
200 |
+
use_mask=self.config.use_mask,
|
201 |
+
window_size=None)
|
202 |
+
|
203 |
+
preds, confidences = predict(
|
204 |
+
session=self.session_g2pW,
|
205 |
+
onnx_input=onnx_input,
|
206 |
+
labels=self.labels)
|
207 |
+
if self.config.use_char_phoneme:
|
208 |
+
preds = [pred.split(' ')[1] for pred in preds]
|
209 |
+
|
210 |
+
results = partial_results
|
211 |
+
for sent_id, query_id, pred in zip(sent_ids, query_ids, preds):
|
212 |
+
results[sent_id][query_id] = self.style_convert_func(pred)
|
213 |
+
|
214 |
+
return results
|
215 |
+
|
216 |
+
def _prepare_data(
|
217 |
+
self, sentences: List[str]
|
218 |
+
) -> Tuple[List[str], List[int], List[int], List[List[str]]]:
|
219 |
+
texts, query_ids, sent_ids, partial_results = [], [], [], []
|
220 |
+
for sent_id, sent in enumerate(sentences):
|
221 |
+
# pypinyin works well for Simplified Chinese than Traditional Chinese
|
222 |
+
sent_s = tranditional_to_simplified(sent)
|
223 |
+
pypinyin_result = pinyin(
|
224 |
+
sent_s, neutral_tone_with_five=True, style=Style.TONE3)
|
225 |
+
partial_result = [None] * len(sent)
|
226 |
+
for i, char in enumerate(sent):
|
227 |
+
if char in self.polyphonic_chars_new:
|
228 |
+
texts.append(sent)
|
229 |
+
query_ids.append(i)
|
230 |
+
sent_ids.append(sent_id)
|
231 |
+
elif char in self.monophonic_chars_dict:
|
232 |
+
partial_result[i] = self.style_convert_func(
|
233 |
+
self.monophonic_chars_dict[char])
|
234 |
+
elif char in self.char_bopomofo_dict:
|
235 |
+
partial_result[i] = pypinyin_result[i][0]
|
236 |
+
# partial_result[i] = self.style_convert_func(self.char_bopomofo_dict[char][0])
|
237 |
+
else:
|
238 |
+
partial_result[i] = pypinyin_result[i][0]
|
239 |
+
|
240 |
+
partial_results.append(partial_result)
|
241 |
+
return texts, query_ids, sent_ids, partial_results
|
GPT_SoVITS/text/g2pw/polyphonic-fix.rep
ADDED
The diff for this file is too large to render.
See raw diff
|
|
GPT_SoVITS/text/g2pw/polyphonic.rep
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
湖泊: ['hu2','po1']
|
2 |
+
地壳: ['di4','qiao4']
|
3 |
+
柏树: ['bai3','shu4']
|
4 |
+
曝光: ['bao4','guang1']
|
5 |
+
弹力: ['tan2','li4']
|
6 |
+
字帖: ['zi4','tie4']
|
7 |
+
口吃: ['kou3','chi1']
|
8 |
+
包扎: ['bao1','za1']
|
9 |
+
哪吒: ['ne2','zha1']
|
10 |
+
说服: ['shuo1','fu2']
|
11 |
+
识字: ['shi2','zi4']
|
12 |
+
骨头: ['gu3','tou5']
|
13 |
+
对称: ['dui4','chen4']
|
14 |
+
口供: ['kou3','gong4']
|
15 |
+
抹布: ['ma1','bu4']
|
16 |
+
露背: ['lu4','bei4']
|
17 |
+
圈养: ['juan4', 'yang3']
|
18 |
+
眼眶: ['yan3', 'kuang4']
|
19 |
+
品行: ['pin3','xing2']
|
20 |
+
颤抖: ['chan4','dou3']
|
21 |
+
差不多: ['cha4','bu5','duo1']
|
22 |
+
鸭绿江: ['ya1','lu4','jiang1']
|
23 |
+
撒切尔: ['sa4','qie4','er3']
|
24 |
+
比比皆是: ['bi3','bi3','jie1','shi4']
|
25 |
+
身无长物: ['shen1','wu2','chang2','wu4']
|
26 |
+
手里: ['shou2','li3']
|
27 |
+
关卡: ['guan1','qia3']
|
28 |
+
怀揣: ['huai2','chuai1']
|
29 |
+
挑剔: ['tiao1','ti4']
|
30 |
+
供称: ['gong4','cheng1']
|
31 |
+
作坊: ['zuo1', 'fang5']
|
32 |
+
中医: ['zhong1','yi1']
|
33 |
+
嚷嚷: ['rang1','rang5']
|
34 |
+
商厦: ['shang1','sha4']
|
35 |
+
大厦: ['da4','sha4']
|
36 |
+
刹车: ['sha1','che1']
|
37 |
+
嘚瑟: ['de4','se5']
|
38 |
+
朝鲜: ['chao2','xian3']
|
39 |
+
阿房宫: ['e1','pang2','gong1']
|
40 |
+
阿胶: ['e1','jiao1']
|
41 |
+
咖喱: ['ga1','li5']
|
42 |
+
时分: ['shi2','fen1']
|
43 |
+
蚌埠: ['beng4','bu4']
|
44 |
+
驯服: ['xun4','fu2']
|
45 |
+
幸免于难: ['xing4','mian3','yu2','nan4']
|
46 |
+
恶行: ['e4','xing2']
|
47 |
+
唉: ['ai4']
|
48 |
+
扎实: ['zha1','shi2']
|
49 |
+
干将: ['gan4','jiang4']
|
50 |
+
陈威行: ['chen2', 'wei1', 'hang2']
|
51 |
+
郭晟: ['guo1', 'sheng4']
|
52 |
+
中标: ['zhong4', 'biao1']
|
53 |
+
抗住: ['kang2', 'zhu4']
|
GPT_SoVITS/text/g2pw/utils.py
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
"""
|
15 |
+
Credits
|
16 |
+
This code is modified from https://github.com/GitYCC/g2pW
|
17 |
+
"""
|
18 |
+
import os
|
19 |
+
import re
|
20 |
+
|
21 |
+
|
22 |
+
def wordize_and_map(text: str):
|
23 |
+
words = []
|
24 |
+
index_map_from_text_to_word = []
|
25 |
+
index_map_from_word_to_text = []
|
26 |
+
while len(text) > 0:
|
27 |
+
match_space = re.match(r'^ +', text)
|
28 |
+
if match_space:
|
29 |
+
space_str = match_space.group(0)
|
30 |
+
index_map_from_text_to_word += [None] * len(space_str)
|
31 |
+
text = text[len(space_str):]
|
32 |
+
continue
|
33 |
+
|
34 |
+
match_en = re.match(r'^[a-zA-Z0-9]+', text)
|
35 |
+
if match_en:
|
36 |
+
en_word = match_en.group(0)
|
37 |
+
|
38 |
+
word_start_pos = len(index_map_from_text_to_word)
|
39 |
+
word_end_pos = word_start_pos + len(en_word)
|
40 |
+
index_map_from_word_to_text.append((word_start_pos, word_end_pos))
|
41 |
+
|
42 |
+
index_map_from_text_to_word += [len(words)] * len(en_word)
|
43 |
+
|
44 |
+
words.append(en_word)
|
45 |
+
text = text[len(en_word):]
|
46 |
+
else:
|
47 |
+
word_start_pos = len(index_map_from_text_to_word)
|
48 |
+
word_end_pos = word_start_pos + 1
|
49 |
+
index_map_from_word_to_text.append((word_start_pos, word_end_pos))
|
50 |
+
|
51 |
+
index_map_from_text_to_word += [len(words)]
|
52 |
+
|
53 |
+
words.append(text[0])
|
54 |
+
text = text[1:]
|
55 |
+
return words, index_map_from_text_to_word, index_map_from_word_to_text
|
56 |
+
|
57 |
+
|
58 |
+
def tokenize_and_map(tokenizer, text: str):
|
59 |
+
words, text2word, word2text = wordize_and_map(text=text)
|
60 |
+
|
61 |
+
tokens = []
|
62 |
+
index_map_from_token_to_text = []
|
63 |
+
for word, (word_start, word_end) in zip(words, word2text):
|
64 |
+
word_tokens = tokenizer.tokenize(word)
|
65 |
+
|
66 |
+
if len(word_tokens) == 0 or word_tokens == ['[UNK]']:
|
67 |
+
index_map_from_token_to_text.append((word_start, word_end))
|
68 |
+
tokens.append('[UNK]')
|
69 |
+
else:
|
70 |
+
current_word_start = word_start
|
71 |
+
for word_token in word_tokens:
|
72 |
+
word_token_len = len(re.sub(r'^##', '', word_token))
|
73 |
+
index_map_from_token_to_text.append(
|
74 |
+
(current_word_start, current_word_start + word_token_len))
|
75 |
+
current_word_start = current_word_start + word_token_len
|
76 |
+
tokens.append(word_token)
|
77 |
+
|
78 |
+
index_map_from_text_to_token = text2word
|
79 |
+
for i, (token_start, token_end) in enumerate(index_map_from_token_to_text):
|
80 |
+
for token_pos in range(token_start, token_end):
|
81 |
+
index_map_from_text_to_token[token_pos] = i
|
82 |
+
|
83 |
+
return tokens, index_map_from_text_to_token, index_map_from_token_to_text
|
84 |
+
|
85 |
+
|
86 |
+
def _load_config(config_path: os.PathLike):
|
87 |
+
import importlib.util
|
88 |
+
spec = importlib.util.spec_from_file_location('__init__', config_path)
|
89 |
+
config = importlib.util.module_from_spec(spec)
|
90 |
+
spec.loader.exec_module(config)
|
91 |
+
return config
|
92 |
+
|
93 |
+
|
94 |
+
default_config_dict = {
|
95 |
+
'manual_seed': 1313,
|
96 |
+
'model_source': 'bert-base-chinese',
|
97 |
+
'window_size': 32,
|
98 |
+
'num_workers': 2,
|
99 |
+
'use_mask': True,
|
100 |
+
'use_char_phoneme': False,
|
101 |
+
'use_conditional': True,
|
102 |
+
'param_conditional': {
|
103 |
+
'affect_location': 'softmax',
|
104 |
+
'bias': True,
|
105 |
+
'char-linear': True,
|
106 |
+
'pos-linear': False,
|
107 |
+
'char+pos-second': True,
|
108 |
+
'char+pos-second_lowrank': False,
|
109 |
+
'lowrank_size': 0,
|
110 |
+
'char+pos-second_fm': False,
|
111 |
+
'fm_size': 0,
|
112 |
+
'fix_mode': None,
|
113 |
+
'count_json': 'train.count.json'
|
114 |
+
},
|
115 |
+
'lr': 5e-5,
|
116 |
+
'val_interval': 200,
|
117 |
+
'num_iter': 10000,
|
118 |
+
'use_focal': False,
|
119 |
+
'param_focal': {
|
120 |
+
'alpha': 0.0,
|
121 |
+
'gamma': 0.7
|
122 |
+
},
|
123 |
+
'use_pos': True,
|
124 |
+
'param_pos ': {
|
125 |
+
'weight': 0.1,
|
126 |
+
'pos_joint_training': True,
|
127 |
+
'train_pos_path': 'train.pos',
|
128 |
+
'valid_pos_path': 'dev.pos',
|
129 |
+
'test_pos_path': 'test.pos'
|
130 |
+
}
|
131 |
+
}
|
132 |
+
|
133 |
+
|
134 |
+
def load_config(config_path: os.PathLike, use_default: bool=False):
|
135 |
+
config = _load_config(config_path)
|
136 |
+
if use_default:
|
137 |
+
for attr, val in default_config_dict.items():
|
138 |
+
if not hasattr(config, attr):
|
139 |
+
setattr(config, attr, val)
|
140 |
+
elif isinstance(val, dict):
|
141 |
+
d = getattr(config, attr)
|
142 |
+
for dict_k, dict_v in val.items():
|
143 |
+
if dict_k not in d:
|
144 |
+
d[dict_k] = dict_v
|
145 |
+
return config
|
GPT_SoVITS/text/hindi.py
ADDED
@@ -0,0 +1,222 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
from typing import List, Set
|
3 |
+
|
4 |
+
# Hindi text normalization rules
|
5 |
+
_hindi_numbers = "०१२३४५६७८९"
|
6 |
+
_english_numbers = "0123456789"
|
7 |
+
_number_map = str.maketrans(_hindi_numbers, _english_numbers)
|
8 |
+
|
9 |
+
# Common abbreviations and their expansions
|
10 |
+
_abbreviations = {
|
11 |
+
# Titles and honorifics
|
12 |
+
"डॉ": "डॉक्टर",
|
13 |
+
"श्री": "श्रीमान",
|
14 |
+
"श्रीमती": "श्रीमती",
|
15 |
+
"कु": "कुमारी",
|
16 |
+
"प्रो": "प्रोफेसर",
|
17 |
+
"चौ": "चौधरी",
|
18 |
+
"स्व": "स्वर्गीय",
|
19 |
+
|
20 |
+
# Common organizations
|
21 |
+
"भा": "भारत",
|
22 |
+
"सं": "संघ",
|
23 |
+
"वि": "विश्वविद्यालय",
|
24 |
+
"म": "महा",
|
25 |
+
|
26 |
+
# Common words
|
27 |
+
"क्र": "क्रमांक",
|
28 |
+
"रु": "रुपये",
|
29 |
+
"ज़ि": "ज़िला",
|
30 |
+
"उ": "उत्तर",
|
31 |
+
"द": "दक्षिण",
|
32 |
+
"पू": "पूर्व",
|
33 |
+
"प": "पश्चिम"
|
34 |
+
}
|
35 |
+
|
36 |
+
# Common conjunct consonants (consonant clusters)
|
37 |
+
_common_conjuncts = {
|
38 |
+
# क-based conjuncts
|
39 |
+
"क्क", "क्त", "क्र", "क्ल", "क्व", "क्ष", "क्स",
|
40 |
+
# ग-based conjuncts
|
41 |
+
"ग्र", "ग्ल", "ग्व", "ग्न", "ग्म",
|
42 |
+
# च-based conjuncts
|
43 |
+
"च्च", "च्छ", "च्य", "च्र",
|
44 |
+
# ज-based conjuncts
|
45 |
+
"ज्ज", "ज्ञ", "ज्य", "ज्र", "ज्व",
|
46 |
+
# त-based conjuncts
|
47 |
+
"त्त", "त्र", "त्य", "त्व", "त्न", "त्म",
|
48 |
+
# द-based conjuncts
|
49 |
+
"द्द", "द्य", "द्व", "द्र", "द्म", "द्ध",
|
50 |
+
# न-based conjuncts
|
51 |
+
"न्न", "न्त", "न्द", "न्य", "न्र", "न्व",
|
52 |
+
# प-based conjuncts
|
53 |
+
"प्प", "प्त", "प्र", "प्ल", "प्स",
|
54 |
+
# ब-based conjuncts
|
55 |
+
"ब्र", "ब्ल", "ब्ज",
|
56 |
+
# म-based conjuncts
|
57 |
+
"म्प", "म्ब", "म्म", "म्ल", "म्र",
|
58 |
+
# य-based conjuncts
|
59 |
+
"य्य", "य्र",
|
60 |
+
# र-based conjuncts (reph forms)
|
61 |
+
"र्क", "र्ग", "र्च", "र्ज", "र्त", "र्द", "र्प", "र्ब", "र्म", "र्य", "र्ल", "र्व", "र्श", "र्स", "र्ह",
|
62 |
+
# ल-based conjuncts
|
63 |
+
"ल्क", "ल्ग", "ल्ट", "ल्ड", "ल्प", "ल्म", "ल्ल", "ल्व",
|
64 |
+
# श-based conjuncts
|
65 |
+
"श्च", "श्न", "श्प", "श्म", "श्य", "श्र", "श्ल", "श्व",
|
66 |
+
# स-based conjuncts
|
67 |
+
"स्क", "स्ट", "स्त", "स्थ", "स्न", "स्प", "स्फ", "स्म", "स्य", "स्र", "स्व", "स्स",
|
68 |
+
# ह-based conjuncts
|
69 |
+
"ह्य", "ह्र", "ह्व", "ह्ल", "ह्न", "ह्म"
|
70 |
+
}
|
71 |
+
|
72 |
+
def _is_final_position(text: str, pos: int) -> bool:
|
73 |
+
"""Check if the position is at the end of a word."""
|
74 |
+
return pos == len(text) - 1 or text[pos + 1] in {' ', ',', '.', '!', '?', '-'}
|
75 |
+
|
76 |
+
def _is_light_syllable(text: str, pos: int) -> bool:
|
77 |
+
"""Check if the syllable at given position is light (no long vowels or conjuncts)."""
|
78 |
+
if pos >= len(text) - 1:
|
79 |
+
return True
|
80 |
+
next_char = text[pos + 1]
|
81 |
+
return not (('\u093E' <= next_char <= '\u094C') or next_char == '\u094D')
|
82 |
+
|
83 |
+
def get_schwa_deletion_positions(text: str) -> Set[int]:
|
84 |
+
"""Determine positions where schwa should be deleted in Hindi words.
|
85 |
+
Enhanced with more accurate rules."""
|
86 |
+
positions = set()
|
87 |
+
words = text.split()
|
88 |
+
|
89 |
+
for word in words:
|
90 |
+
word_start = text.find(word)
|
91 |
+
length = len(word)
|
92 |
+
|
93 |
+
for i in range(length):
|
94 |
+
pos = word_start + i
|
95 |
+
|
96 |
+
# Basic conditions for schwa deletion
|
97 |
+
if ('\u0915' <= word[i] <= '\u0939' and # Current char is consonant
|
98 |
+
i < length - 1 and
|
99 |
+
not '\u093E' <= word[i + 1] <= '\u094D'): # Next char is not a vowel mark
|
100 |
+
|
101 |
+
# Rule 1: Delete schwa in final position of word
|
102 |
+
if i == length - 1:
|
103 |
+
positions.add(pos)
|
104 |
+
continue
|
105 |
+
|
106 |
+
# Rule 2: Delete schwa between consonants in non-final light syllables
|
107 |
+
if (i < length - 2 and
|
108 |
+
'\u0915' <= word[i + 1] <= '\u0939' and # Next char is consonant
|
109 |
+
_is_light_syllable(word, i)):
|
110 |
+
positions.add(pos)
|
111 |
+
continue
|
112 |
+
|
113 |
+
# Rule 3: Delete schwa in compound words at morpheme boundaries
|
114 |
+
if (i < length - 2 and
|
115 |
+
word[i + 1] == '\u094D' and # Virama
|
116 |
+
'\u0915' <= word[i + 2] <= '\u0939'): # Followed by consonant
|
117 |
+
positions.add(pos)
|
118 |
+
continue
|
119 |
+
|
120 |
+
return positions
|
121 |
+
|
122 |
+
def normalize_hindi_text(text: str) -> str:
|
123 |
+
"""Normalize Hindi text by applying various rules."""
|
124 |
+
# Convert Hindi numbers to English numbers
|
125 |
+
text = text.translate(_number_map)
|
126 |
+
|
127 |
+
# Replace abbreviations with their full forms
|
128 |
+
for abbr, full in _abbreviations.items():
|
129 |
+
text = re.sub(rf'\b{abbr}\b', full, text)
|
130 |
+
|
131 |
+
# Remove extra spaces
|
132 |
+
text = re.sub(r'\s+', ' ', text)
|
133 |
+
|
134 |
+
# Normalize chandrabindu to anusvara
|
135 |
+
text = text.replace('\u0901', '\u0902')
|
136 |
+
|
137 |
+
# Normalize nukta variations
|
138 |
+
nukta_chars = {
|
139 |
+
'क़': 'क', 'ख़': 'ख', 'ग़': 'ग', 'ज़': 'ज',
|
140 |
+
'ड़': 'ड', 'ढ़': 'ढ', 'फ़': 'फ'
|
141 |
+
}
|
142 |
+
for nuk, base in nukta_chars.items():
|
143 |
+
text = text.replace(nuk, base)
|
144 |
+
|
145 |
+
# Remove any non-Devanagari characters except basic punctuation
|
146 |
+
text = re.sub(r'[^\u0900-\u097F\s.,!?-]', '', text)
|
147 |
+
|
148 |
+
return text.strip()
|
149 |
+
|
150 |
+
def hindi_to_phonemes(text: str) -> str:
|
151 |
+
"""Convert Hindi text to phonemes."""
|
152 |
+
text = normalize_hindi_text(text)
|
153 |
+
phonemes = []
|
154 |
+
|
155 |
+
i = 0
|
156 |
+
while i < len(text):
|
157 |
+
char = text[i]
|
158 |
+
|
159 |
+
# Skip spaces and punctuation
|
160 |
+
if char in ' .,!?-':
|
161 |
+
if char == ' ':
|
162 |
+
phonemes.append('SP')
|
163 |
+
else:
|
164 |
+
phonemes.append(char)
|
165 |
+
i += 1
|
166 |
+
continue
|
167 |
+
|
168 |
+
# Handle consonants
|
169 |
+
if '\u0915' <= char <= '\u0939':
|
170 |
+
# Check for conjuncts
|
171 |
+
if i + 2 < len(text) and text[i + 1] == '\u094D':
|
172 |
+
# Look ahead for multi-consonant conjuncts
|
173 |
+
j = i + 2
|
174 |
+
conjunct = text[i:j + 1]
|
175 |
+
while (j < len(text) and text[j] != ' ' and
|
176 |
+
conjunct in _common_conjuncts):
|
177 |
+
j += 1
|
178 |
+
if j < len(text) and text[j-1] == '\u094D':
|
179 |
+
conjunct = text[i:j + 1]
|
180 |
+
else:
|
181 |
+
break
|
182 |
+
|
183 |
+
if conjunct[:-1] in _common_conjuncts:
|
184 |
+
phonemes.append(conjunct[:-1])
|
185 |
+
i = j
|
186 |
+
else:
|
187 |
+
# Handle as single consonant if not a known conjunct
|
188 |
+
phonemes.append(char)
|
189 |
+
i += 1
|
190 |
+
else:
|
191 |
+
# Single consonant
|
192 |
+
phonemes.append(char)
|
193 |
+
|
194 |
+
# Check for vowel marks
|
195 |
+
if i + 1 < len(text) and '\u093E' <= text[i + 1] <= '\u094C':
|
196 |
+
phonemes.append(text[i + 1])
|
197 |
+
i += 2
|
198 |
+
else:
|
199 |
+
# Add implicit 'अ' if no vowel mark and not a schwa deletion position
|
200 |
+
if i not in get_schwa_deletion_positions(text):
|
201 |
+
phonemes.append('अ')
|
202 |
+
i += 1
|
203 |
+
|
204 |
+
# Handle independent vowels
|
205 |
+
elif '\u0904' <= char <= '\u0914':
|
206 |
+
phonemes.append(char)
|
207 |
+
i += 1
|
208 |
+
|
209 |
+
# Handle anusvara and visarga
|
210 |
+
elif char in ['\u0902', '\u0903']:
|
211 |
+
phonemes.append(char)
|
212 |
+
i += 1
|
213 |
+
|
214 |
+
else:
|
215 |
+
i += 1
|
216 |
+
|
217 |
+
return ' '.join(phonemes)
|
218 |
+
|
219 |
+
def get_phoneme_sequence(text: str) -> List[str]:
|
220 |
+
"""Convert text to a sequence of phonemes for the model."""
|
221 |
+
phoneme_string = hindi_to_phonemes(text)
|
222 |
+
return phoneme_string.split()
|
GPT_SoVITS/text/japanese.py
ADDED
@@ -0,0 +1,226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# modified from https://github.com/CjangCjengh/vits/blob/main/text/japanese.py
|
2 |
+
import re
|
3 |
+
import os
|
4 |
+
import hashlib
|
5 |
+
try:
|
6 |
+
import pyopenjtalk
|
7 |
+
current_file_path = os.path.dirname(__file__)
|
8 |
+
def get_hash(fp: str) -> str:
|
9 |
+
hash_md5 = hashlib.md5()
|
10 |
+
with open(fp, "rb") as f:
|
11 |
+
for chunk in iter(lambda: f.read(4096), b""):
|
12 |
+
hash_md5.update(chunk)
|
13 |
+
return hash_md5.hexdigest()
|
14 |
+
|
15 |
+
USERDIC_CSV_PATH = os.path.join(current_file_path, "ja_userdic", "userdict.csv")
|
16 |
+
USERDIC_BIN_PATH = os.path.join(current_file_path, "ja_userdic", "user.dict")
|
17 |
+
USERDIC_HASH_PATH = os.path.join(current_file_path, "ja_userdic", "userdict.md5")
|
18 |
+
# 如果没有用户词典,就生成一个;如果有,就检查md5,如果不一样,就重新生成
|
19 |
+
if os.path.exists(USERDIC_CSV_PATH):
|
20 |
+
if not os.path.exists(USERDIC_BIN_PATH) or get_hash(USERDIC_CSV_PATH) != open(USERDIC_HASH_PATH, "r",encoding='utf-8').read():
|
21 |
+
pyopenjtalk.mecab_dict_index(USERDIC_CSV_PATH, USERDIC_BIN_PATH)
|
22 |
+
with open(USERDIC_HASH_PATH, "w", encoding='utf-8') as f:
|
23 |
+
f.write(get_hash(USERDIC_CSV_PATH))
|
24 |
+
|
25 |
+
if os.path.exists(USERDIC_BIN_PATH):
|
26 |
+
pyopenjtalk.update_global_jtalk_with_user_dict(USERDIC_BIN_PATH)
|
27 |
+
except Exception as e:
|
28 |
+
# print(e)
|
29 |
+
import pyopenjtalk
|
30 |
+
# failed to load user dictionary, ignore.
|
31 |
+
pass
|
32 |
+
|
33 |
+
|
34 |
+
from text.symbols import punctuation
|
35 |
+
# Regular expression matching Japanese without punctuation marks:
|
36 |
+
_japanese_characters = re.compile(
|
37 |
+
r"[A-Za-z\d\u3005\u3040-\u30ff\u4e00-\u9fff\uff11-\uff19\uff21-\uff3a\uff41-\uff5a\uff66-\uff9d]"
|
38 |
+
)
|
39 |
+
|
40 |
+
# Regular expression matching non-Japanese characters or punctuation marks:
|
41 |
+
_japanese_marks = re.compile(
|
42 |
+
r"[^A-Za-z\d\u3005\u3040-\u30ff\u4e00-\u9fff\uff11-\uff19\uff21-\uff3a\uff41-\uff5a\uff66-\uff9d]"
|
43 |
+
)
|
44 |
+
|
45 |
+
# List of (symbol, Japanese) pairs for marks:
|
46 |
+
_symbols_to_japanese = [(re.compile("%s" % x[0]), x[1]) for x in [("%", "パーセント")]]
|
47 |
+
|
48 |
+
|
49 |
+
# List of (consonant, sokuon) pairs:
|
50 |
+
_real_sokuon = [
|
51 |
+
(re.compile("%s" % x[0]), x[1])
|
52 |
+
for x in [
|
53 |
+
(r"Q([↑↓]*[kg])", r"k#\1"),
|
54 |
+
(r"Q([↑↓]*[tdjʧ])", r"t#\1"),
|
55 |
+
(r"Q([↑↓]*[sʃ])", r"s\1"),
|
56 |
+
(r"Q([↑↓]*[pb])", r"p#\1"),
|
57 |
+
]
|
58 |
+
]
|
59 |
+
|
60 |
+
# List of (consonant, hatsuon) pairs:
|
61 |
+
_real_hatsuon = [
|
62 |
+
(re.compile("%s" % x[0]), x[1])
|
63 |
+
for x in [
|
64 |
+
(r"N([↑↓]*[pbm])", r"m\1"),
|
65 |
+
(r"N([↑↓]*[ʧʥj])", r"n^\1"),
|
66 |
+
(r"N([↑↓]*[tdn])", r"n\1"),
|
67 |
+
(r"N([↑↓]*[kg])", r"ŋ\1"),
|
68 |
+
]
|
69 |
+
]
|
70 |
+
|
71 |
+
|
72 |
+
def post_replace_ph(ph):
|
73 |
+
rep_map = {
|
74 |
+
":": ",",
|
75 |
+
";": ",",
|
76 |
+
",": ",",
|
77 |
+
"。": ".",
|
78 |
+
"!": "!",
|
79 |
+
"?": "?",
|
80 |
+
"\n": ".",
|
81 |
+
"·": ",",
|
82 |
+
"、": ",",
|
83 |
+
"...": "…",
|
84 |
+
}
|
85 |
+
|
86 |
+
if ph in rep_map.keys():
|
87 |
+
ph = rep_map[ph]
|
88 |
+
return ph
|
89 |
+
|
90 |
+
|
91 |
+
def replace_consecutive_punctuation(text):
|
92 |
+
punctuations = ''.join(re.escape(p) for p in punctuation)
|
93 |
+
pattern = f'([{punctuations}])([{punctuations}])+'
|
94 |
+
result = re.sub(pattern, r'\1', text)
|
95 |
+
return result
|
96 |
+
|
97 |
+
|
98 |
+
def symbols_to_japanese(text):
|
99 |
+
for regex, replacement in _symbols_to_japanese:
|
100 |
+
text = re.sub(regex, replacement, text)
|
101 |
+
return text
|
102 |
+
|
103 |
+
|
104 |
+
def preprocess_jap(text, with_prosody=False):
|
105 |
+
"""Reference https://r9y9.github.io/ttslearn/latest/notebooks/ch10_Recipe-Tacotron.html"""
|
106 |
+
text = symbols_to_japanese(text)
|
107 |
+
# English words to lower case, should have no influence on japanese words.
|
108 |
+
text = text.lower()
|
109 |
+
sentences = re.split(_japanese_marks, text)
|
110 |
+
marks = re.findall(_japanese_marks, text)
|
111 |
+
text = []
|
112 |
+
for i, sentence in enumerate(sentences):
|
113 |
+
if re.match(_japanese_characters, sentence):
|
114 |
+
if with_prosody:
|
115 |
+
text += pyopenjtalk_g2p_prosody(sentence)[1:-1]
|
116 |
+
else:
|
117 |
+
p = pyopenjtalk.g2p(sentence)
|
118 |
+
text += p.split(" ")
|
119 |
+
|
120 |
+
if i < len(marks):
|
121 |
+
if marks[i] == " ":# 防止意外的UNK
|
122 |
+
continue
|
123 |
+
text += [marks[i].replace(" ", "")]
|
124 |
+
return text
|
125 |
+
|
126 |
+
|
127 |
+
def text_normalize(text):
|
128 |
+
# todo: jap text normalize
|
129 |
+
|
130 |
+
# 避免重复标点引起的参考泄露
|
131 |
+
text = replace_consecutive_punctuation(text)
|
132 |
+
return text
|
133 |
+
|
134 |
+
# Copied from espnet https://github.com/espnet/espnet/blob/master/espnet2/text/phoneme_tokenizer.py
|
135 |
+
def pyopenjtalk_g2p_prosody(text, drop_unvoiced_vowels=True):
|
136 |
+
"""Extract phoneme + prosoody symbol sequence from input full-context labels.
|
137 |
+
|
138 |
+
The algorithm is based on `Prosodic features control by symbols as input of
|
139 |
+
sequence-to-sequence acoustic modeling for neural TTS`_ with some r9y9's tweaks.
|
140 |
+
|
141 |
+
Args:
|
142 |
+
text (str): Input text.
|
143 |
+
drop_unvoiced_vowels (bool): whether to drop unvoiced vowels.
|
144 |
+
|
145 |
+
Returns:
|
146 |
+
List[str]: List of phoneme + prosody symbols.
|
147 |
+
|
148 |
+
Examples:
|
149 |
+
>>> from espnet2.text.phoneme_tokenizer import pyopenjtalk_g2p_prosody
|
150 |
+
>>> pyopenjtalk_g2p_prosody("こんにちは。")
|
151 |
+
['^', 'k', 'o', '[', 'N', 'n', 'i', 'ch', 'i', 'w', 'a', '$']
|
152 |
+
|
153 |
+
.. _`Prosodic features control by symbols as input of sequence-to-sequence acoustic
|
154 |
+
modeling for neural TTS`: https://doi.org/10.1587/transinf.2020EDP7104
|
155 |
+
|
156 |
+
"""
|
157 |
+
labels = pyopenjtalk.make_label(pyopenjtalk.run_frontend(text))
|
158 |
+
N = len(labels)
|
159 |
+
|
160 |
+
phones = []
|
161 |
+
for n in range(N):
|
162 |
+
lab_curr = labels[n]
|
163 |
+
|
164 |
+
# current phoneme
|
165 |
+
p3 = re.search(r"\-(.*?)\+", lab_curr).group(1)
|
166 |
+
# deal unvoiced vowels as normal vowels
|
167 |
+
if drop_unvoiced_vowels and p3 in "AEIOU":
|
168 |
+
p3 = p3.lower()
|
169 |
+
|
170 |
+
# deal with sil at the beginning and the end of text
|
171 |
+
if p3 == "sil":
|
172 |
+
assert n == 0 or n == N - 1
|
173 |
+
if n == 0:
|
174 |
+
phones.append("^")
|
175 |
+
elif n == N - 1:
|
176 |
+
# check question form or not
|
177 |
+
e3 = _numeric_feature_by_regex(r"!(\d+)_", lab_curr)
|
178 |
+
if e3 == 0:
|
179 |
+
phones.append("$")
|
180 |
+
elif e3 == 1:
|
181 |
+
phones.append("?")
|
182 |
+
continue
|
183 |
+
elif p3 == "pau":
|
184 |
+
phones.append("_")
|
185 |
+
continue
|
186 |
+
else:
|
187 |
+
phones.append(p3)
|
188 |
+
|
189 |
+
# accent type and position info (forward or backward)
|
190 |
+
a1 = _numeric_feature_by_regex(r"/A:([0-9\-]+)\+", lab_curr)
|
191 |
+
a2 = _numeric_feature_by_regex(r"\+(\d+)\+", lab_curr)
|
192 |
+
a3 = _numeric_feature_by_regex(r"\+(\d+)/", lab_curr)
|
193 |
+
|
194 |
+
# number of mora in accent phrase
|
195 |
+
f1 = _numeric_feature_by_regex(r"/F:(\d+)_", lab_curr)
|
196 |
+
|
197 |
+
a2_next = _numeric_feature_by_regex(r"\+(\d+)\+", labels[n + 1])
|
198 |
+
# accent phrase border
|
199 |
+
if a3 == 1 and a2_next == 1 and p3 in "aeiouAEIOUNcl":
|
200 |
+
phones.append("#")
|
201 |
+
# pitch falling
|
202 |
+
elif a1 == 0 and a2_next == a2 + 1 and a2 != f1:
|
203 |
+
phones.append("]")
|
204 |
+
# pitch rising
|
205 |
+
elif a2 == 1 and a2_next == 2:
|
206 |
+
phones.append("[")
|
207 |
+
|
208 |
+
return phones
|
209 |
+
|
210 |
+
# Copied from espnet https://github.com/espnet/espnet/blob/master/espnet2/text/phoneme_tokenizer.py
|
211 |
+
def _numeric_feature_by_regex(regex, s):
|
212 |
+
match = re.search(regex, s)
|
213 |
+
if match is None:
|
214 |
+
return -50
|
215 |
+
return int(match.group(1))
|
216 |
+
|
217 |
+
def g2p(norm_text, with_prosody=True):
|
218 |
+
phones = preprocess_jap(norm_text, with_prosody)
|
219 |
+
phones = [post_replace_ph(i) for i in phones]
|
220 |
+
# todo: implement tones and word2ph
|
221 |
+
return phones
|
222 |
+
|
223 |
+
|
224 |
+
if __name__ == "__main__":
|
225 |
+
phones = g2p("Hello.こんにちは!今日もNiCe天気ですね!tokyotowerに行きましょう!")
|
226 |
+
print(phones)
|
GPT_SoVITS/text/korean.py
ADDED
@@ -0,0 +1,265 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# reference: https://github.com/ORI-Muchim/MB-iSTFT-VITS-Korean/blob/main/text/korean.py
|
2 |
+
|
3 |
+
import re
|
4 |
+
from jamo import h2j, j2hcj
|
5 |
+
import ko_pron
|
6 |
+
from g2pk2 import G2p
|
7 |
+
|
8 |
+
from text.symbols2 import symbols
|
9 |
+
|
10 |
+
# This is a list of Korean classifiers preceded by pure Korean numerals.
|
11 |
+
_korean_classifiers = '군데 권 개 그루 닢 대 두 마리 모 모금 뭇 발 발짝 방 번 벌 보루 살 수 술 시 쌈 움큼 정 짝 채 척 첩 축 켤레 톨 통'
|
12 |
+
|
13 |
+
# List of (hangul, hangul divided) pairs:
|
14 |
+
_hangul_divided = [(re.compile('%s' % x[0]), x[1]) for x in [
|
15 |
+
# ('ㄳ', 'ㄱㅅ'), # g2pk2, A Syllable-ending Rule
|
16 |
+
# ('ㄵ', 'ㄴㅈ'),
|
17 |
+
# ('ㄶ', 'ㄴㅎ'),
|
18 |
+
# ('ㄺ', 'ㄹㄱ'),
|
19 |
+
# ('ㄻ', 'ㄹㅁ'),
|
20 |
+
# ('ㄼ', 'ㄹㅂ'),
|
21 |
+
# ('ㄽ', 'ㄹㅅ'),
|
22 |
+
# ('ㄾ', 'ㄹㅌ'),
|
23 |
+
# ('ㄿ', 'ㄹㅍ'),
|
24 |
+
# ('ㅀ', 'ㄹㅎ'),
|
25 |
+
# ('ㅄ', 'ㅂㅅ'),
|
26 |
+
('ㅘ', 'ㅗㅏ'),
|
27 |
+
('ㅙ', 'ㅗㅐ'),
|
28 |
+
('ㅚ', 'ㅗㅣ'),
|
29 |
+
('ㅝ', 'ㅜㅓ'),
|
30 |
+
('ㅞ', 'ㅜㅔ'),
|
31 |
+
('ㅟ', 'ㅜㅣ'),
|
32 |
+
('ㅢ', 'ㅡㅣ'),
|
33 |
+
('ㅑ', 'ㅣㅏ'),
|
34 |
+
('ㅒ', 'ㅣㅐ'),
|
35 |
+
('ㅕ', 'ㅣㅓ'),
|
36 |
+
('ㅖ', 'ㅣㅔ'),
|
37 |
+
('ㅛ', 'ㅣㅗ'),
|
38 |
+
('ㅠ', 'ㅣㅜ')
|
39 |
+
]]
|
40 |
+
|
41 |
+
# List of (Latin alphabet, hangul) pairs:
|
42 |
+
_latin_to_hangul = [(re.compile('%s' % x[0], re.IGNORECASE), x[1]) for x in [
|
43 |
+
('a', '에이'),
|
44 |
+
('b', '비'),
|
45 |
+
('c', '시'),
|
46 |
+
('d', '디'),
|
47 |
+
('e', '이'),
|
48 |
+
('f', '에프'),
|
49 |
+
('g', '지'),
|
50 |
+
('h', '에이치'),
|
51 |
+
('i', '아이'),
|
52 |
+
('j', '제이'),
|
53 |
+
('k', '케이'),
|
54 |
+
('l', '엘'),
|
55 |
+
('m', '엠'),
|
56 |
+
('n', '엔'),
|
57 |
+
('o', '오'),
|
58 |
+
('p', '피'),
|
59 |
+
('q', '큐'),
|
60 |
+
('r', '아르'),
|
61 |
+
('s', '에스'),
|
62 |
+
('t', '티'),
|
63 |
+
('u', '유'),
|
64 |
+
('v', '브이'),
|
65 |
+
('w', '더블유'),
|
66 |
+
('x', '엑스'),
|
67 |
+
('y', '와이'),
|
68 |
+
('z', '제트')
|
69 |
+
]]
|
70 |
+
|
71 |
+
# List of (ipa, lazy ipa) pairs:
|
72 |
+
_ipa_to_lazy_ipa = [(re.compile('%s' % x[0], re.IGNORECASE), x[1]) for x in [
|
73 |
+
('t͡ɕ','ʧ'),
|
74 |
+
('d͡ʑ','ʥ'),
|
75 |
+
('ɲ','n^'),
|
76 |
+
('ɕ','ʃ'),
|
77 |
+
('ʷ','w'),
|
78 |
+
('ɭ','l`'),
|
79 |
+
('ʎ','ɾ'),
|
80 |
+
('ɣ','ŋ'),
|
81 |
+
('ɰ','ɯ'),
|
82 |
+
('ʝ','j'),
|
83 |
+
('ʌ','ə'),
|
84 |
+
('ɡ','g'),
|
85 |
+
('\u031a','#'),
|
86 |
+
('\u0348','='),
|
87 |
+
('\u031e',''),
|
88 |
+
('\u0320',''),
|
89 |
+
('\u0339','')
|
90 |
+
]]
|
91 |
+
|
92 |
+
|
93 |
+
def fix_g2pk2_error(text):
|
94 |
+
new_text = ""
|
95 |
+
i = 0
|
96 |
+
while i < len(text) - 4:
|
97 |
+
if (text[i:i+3] == 'ㅇㅡㄹ' or text[i:i+3] == 'ㄹㅡㄹ') and text[i+3] == ' ' and text[i+4] == 'ㄹ':
|
98 |
+
new_text += text[i:i+3] + ' ' + 'ㄴ'
|
99 |
+
i += 5
|
100 |
+
else:
|
101 |
+
new_text += text[i]
|
102 |
+
i += 1
|
103 |
+
|
104 |
+
new_text += text[i:]
|
105 |
+
return new_text
|
106 |
+
|
107 |
+
|
108 |
+
def latin_to_hangul(text):
|
109 |
+
for regex, replacement in _latin_to_hangul:
|
110 |
+
text = re.sub(regex, replacement, text)
|
111 |
+
return text
|
112 |
+
|
113 |
+
|
114 |
+
def divide_hangul(text):
|
115 |
+
text = j2hcj(h2j(text))
|
116 |
+
for regex, replacement in _hangul_divided:
|
117 |
+
text = re.sub(regex, replacement, text)
|
118 |
+
return text
|
119 |
+
|
120 |
+
|
121 |
+
def hangul_number(num, sino=True):
|
122 |
+
'''Reference https://github.com/Kyubyong/g2pK'''
|
123 |
+
num = re.sub(',', '', num)
|
124 |
+
|
125 |
+
if num == '0':
|
126 |
+
return '영'
|
127 |
+
if not sino and num == '20':
|
128 |
+
return '스무'
|
129 |
+
|
130 |
+
digits = '123456789'
|
131 |
+
names = '일이삼사오육칠팔구'
|
132 |
+
digit2name = {d: n for d, n in zip(digits, names)}
|
133 |
+
|
134 |
+
modifiers = '한 두 세 네 다섯 여섯 일곱 여덟 아홉'
|
135 |
+
decimals = '열 스물 서른 마흔 쉰 예순 일흔 여든 아흔'
|
136 |
+
digit2mod = {d: mod for d, mod in zip(digits, modifiers.split())}
|
137 |
+
digit2dec = {d: dec for d, dec in zip(digits, decimals.split())}
|
138 |
+
|
139 |
+
spelledout = []
|
140 |
+
for i, digit in enumerate(num):
|
141 |
+
i = len(num) - i - 1
|
142 |
+
if sino:
|
143 |
+
if i == 0:
|
144 |
+
name = digit2name.get(digit, '')
|
145 |
+
elif i == 1:
|
146 |
+
name = digit2name.get(digit, '') + '십'
|
147 |
+
name = name.replace('일십', '십')
|
148 |
+
else:
|
149 |
+
if i == 0:
|
150 |
+
name = digit2mod.get(digit, '')
|
151 |
+
elif i == 1:
|
152 |
+
name = digit2dec.get(digit, '')
|
153 |
+
if digit == '0':
|
154 |
+
if i % 4 == 0:
|
155 |
+
last_three = spelledout[-min(3, len(spelledout)):]
|
156 |
+
if ''.join(last_three) == '':
|
157 |
+
spelledout.append('')
|
158 |
+
continue
|
159 |
+
else:
|
160 |
+
spelledout.append('')
|
161 |
+
continue
|
162 |
+
if i == 2:
|
163 |
+
name = digit2name.get(digit, '') + '백'
|
164 |
+
name = name.replace('일백', '백')
|
165 |
+
elif i == 3:
|
166 |
+
name = digit2name.get(digit, '') + '천'
|
167 |
+
name = name.replace('일천', '천')
|
168 |
+
elif i == 4:
|
169 |
+
name = digit2name.get(digit, '') + '만'
|
170 |
+
name = name.replace('일만', '만')
|
171 |
+
elif i == 5:
|
172 |
+
name = digit2name.get(digit, '') + '십'
|
173 |
+
name = name.replace('일십', '십')
|
174 |
+
elif i == 6:
|
175 |
+
name = digit2name.get(digit, '') + '백'
|
176 |
+
name = name.replace('일백', '백')
|
177 |
+
elif i == 7:
|
178 |
+
name = digit2name.get(digit, '') + '천'
|
179 |
+
name = name.replace('일천', '천')
|
180 |
+
elif i == 8:
|
181 |
+
name = digit2name.get(digit, '') + '억'
|
182 |
+
elif i == 9:
|
183 |
+
name = digit2name.get(digit, '') + '십'
|
184 |
+
elif i == 10:
|
185 |
+
name = digit2name.get(digit, '') + '백'
|
186 |
+
elif i == 11:
|
187 |
+
name = digit2name.get(digit, '') + '천'
|
188 |
+
elif i == 12:
|
189 |
+
name = digit2name.get(digit, '') + '조'
|
190 |
+
elif i == 13:
|
191 |
+
name = digit2name.get(digit, '') + '십'
|
192 |
+
elif i == 14:
|
193 |
+
name = digit2name.get(digit, '') + '백'
|
194 |
+
elif i == 15:
|
195 |
+
name = digit2name.get(digit, '') + '천'
|
196 |
+
spelledout.append(name)
|
197 |
+
return ''.join(elem for elem in spelledout)
|
198 |
+
|
199 |
+
|
200 |
+
def number_to_hangul(text):
|
201 |
+
'''Reference https://github.com/Kyubyong/g2pK'''
|
202 |
+
tokens = set(re.findall(r'(\d[\d,]*)([\uac00-\ud71f]+)', text))
|
203 |
+
for token in tokens:
|
204 |
+
num, classifier = token
|
205 |
+
if classifier[:2] in _korean_classifiers or classifier[0] in _korean_classifiers:
|
206 |
+
spelledout = hangul_number(num, sino=False)
|
207 |
+
else:
|
208 |
+
spelledout = hangul_number(num, sino=True)
|
209 |
+
text = text.replace(f'{num}{classifier}', f'{spelledout}{classifier}')
|
210 |
+
# digit by digit for remaining digits
|
211 |
+
digits = '0123456789'
|
212 |
+
names = '영일이삼사오육칠팔구'
|
213 |
+
for d, n in zip(digits, names):
|
214 |
+
text = text.replace(d, n)
|
215 |
+
return text
|
216 |
+
|
217 |
+
|
218 |
+
def korean_to_lazy_ipa(text):
|
219 |
+
text = latin_to_hangul(text)
|
220 |
+
text = number_to_hangul(text)
|
221 |
+
text=re.sub('[\uac00-\ud7af]+',lambda x:ko_pron.romanise(x.group(0),'ipa').split('] ~ [')[0],text)
|
222 |
+
for regex, replacement in _ipa_to_lazy_ipa:
|
223 |
+
text = re.sub(regex, replacement, text)
|
224 |
+
return text
|
225 |
+
|
226 |
+
_g2p=G2p()
|
227 |
+
def korean_to_ipa(text):
|
228 |
+
text = latin_to_hangul(text)
|
229 |
+
text = number_to_hangul(text)
|
230 |
+
text = _g2p(text)
|
231 |
+
text = fix_g2pk2_error(text)
|
232 |
+
text = korean_to_lazy_ipa(text)
|
233 |
+
return text.replace('ʧ','tʃ').replace('ʥ','dʑ')
|
234 |
+
|
235 |
+
def post_replace_ph(ph):
|
236 |
+
rep_map = {
|
237 |
+
":": ",",
|
238 |
+
";": ",",
|
239 |
+
",": ",",
|
240 |
+
"。": ".",
|
241 |
+
"!": "!",
|
242 |
+
"?": "?",
|
243 |
+
"\n": ".",
|
244 |
+
"·": ",",
|
245 |
+
"、": ",",
|
246 |
+
"...": "…",
|
247 |
+
" ": "空",
|
248 |
+
}
|
249 |
+
if ph in rep_map.keys():
|
250 |
+
ph = rep_map[ph]
|
251 |
+
if ph in symbols:
|
252 |
+
return ph
|
253 |
+
if ph not in symbols:
|
254 |
+
ph = "停"
|
255 |
+
return ph
|
256 |
+
|
257 |
+
def g2p(text):
|
258 |
+
text = latin_to_hangul(text)
|
259 |
+
text = _g2p(text)
|
260 |
+
text = divide_hangul(text)
|
261 |
+
text = fix_g2pk2_error(text)
|
262 |
+
text = re.sub(r'([\u3131-\u3163])$', r'\1.', text)
|
263 |
+
# text = "".join([post_replace_ph(i) for i in text])
|
264 |
+
text = [post_replace_ph(i) for i in text]
|
265 |
+
return text
|
GPT_SoVITS/text/opencpop-strict.txt
ADDED
@@ -0,0 +1,429 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
a AA a
|
2 |
+
ai AA ai
|
3 |
+
an AA an
|
4 |
+
ang AA ang
|
5 |
+
ao AA ao
|
6 |
+
ba b a
|
7 |
+
bai b ai
|
8 |
+
ban b an
|
9 |
+
bang b ang
|
10 |
+
bao b ao
|
11 |
+
bei b ei
|
12 |
+
ben b en
|
13 |
+
beng b eng
|
14 |
+
bi b i
|
15 |
+
bian b ian
|
16 |
+
biao b iao
|
17 |
+
bie b ie
|
18 |
+
bin b in
|
19 |
+
bing b ing
|
20 |
+
bo b o
|
21 |
+
bu b u
|
22 |
+
ca c a
|
23 |
+
cai c ai
|
24 |
+
can c an
|
25 |
+
cang c ang
|
26 |
+
cao c ao
|
27 |
+
ce c e
|
28 |
+
cei c ei
|
29 |
+
cen c en
|
30 |
+
ceng c eng
|
31 |
+
cha ch a
|
32 |
+
chai ch ai
|
33 |
+
chan ch an
|
34 |
+
chang ch ang
|
35 |
+
chao ch ao
|
36 |
+
che ch e
|
37 |
+
chen ch en
|
38 |
+
cheng ch eng
|
39 |
+
chi ch ir
|
40 |
+
chong ch ong
|
41 |
+
chou ch ou
|
42 |
+
chu ch u
|
43 |
+
chua ch ua
|
44 |
+
chuai ch uai
|
45 |
+
chuan ch uan
|
46 |
+
chuang ch uang
|
47 |
+
chui ch ui
|
48 |
+
chun ch un
|
49 |
+
chuo ch uo
|
50 |
+
ci c i0
|
51 |
+
cong c ong
|
52 |
+
cou c ou
|
53 |
+
cu c u
|
54 |
+
cuan c uan
|
55 |
+
cui c ui
|
56 |
+
cun c un
|
57 |
+
cuo c uo
|
58 |
+
da d a
|
59 |
+
dai d ai
|
60 |
+
dan d an
|
61 |
+
dang d ang
|
62 |
+
dao d ao
|
63 |
+
de d e
|
64 |
+
dei d ei
|
65 |
+
den d en
|
66 |
+
deng d eng
|
67 |
+
di d i
|
68 |
+
dia d ia
|
69 |
+
dian d ian
|
70 |
+
diao d iao
|
71 |
+
die d ie
|
72 |
+
ding d ing
|
73 |
+
diu d iu
|
74 |
+
dong d ong
|
75 |
+
dou d ou
|
76 |
+
du d u
|
77 |
+
duan d uan
|
78 |
+
dui d ui
|
79 |
+
dun d un
|
80 |
+
duo d uo
|
81 |
+
e EE e
|
82 |
+
ei EE ei
|
83 |
+
en EE en
|
84 |
+
eng EE eng
|
85 |
+
er EE er
|
86 |
+
fa f a
|
87 |
+
fan f an
|
88 |
+
fang f ang
|
89 |
+
fei f ei
|
90 |
+
fen f en
|
91 |
+
feng f eng
|
92 |
+
fo f o
|
93 |
+
fou f ou
|
94 |
+
fu f u
|
95 |
+
ga g a
|
96 |
+
gai g ai
|
97 |
+
gan g an
|
98 |
+
gang g ang
|
99 |
+
gao g ao
|
100 |
+
ge g e
|
101 |
+
gei g ei
|
102 |
+
gen g en
|
103 |
+
geng g eng
|
104 |
+
gong g ong
|
105 |
+
gou g ou
|
106 |
+
gu g u
|
107 |
+
gua g ua
|
108 |
+
guai g uai
|
109 |
+
guan g uan
|
110 |
+
guang g uang
|
111 |
+
gui g ui
|
112 |
+
gun g un
|
113 |
+
guo g uo
|
114 |
+
ha h a
|
115 |
+
hai h ai
|
116 |
+
han h an
|
117 |
+
hang h ang
|
118 |
+
hao h ao
|
119 |
+
he h e
|
120 |
+
hei h ei
|
121 |
+
hen h en
|
122 |
+
heng h eng
|
123 |
+
hong h ong
|
124 |
+
hou h ou
|
125 |
+
hu h u
|
126 |
+
hua h ua
|
127 |
+
huai h uai
|
128 |
+
huan h uan
|
129 |
+
huang h uang
|
130 |
+
hui h ui
|
131 |
+
hun h un
|
132 |
+
huo h uo
|
133 |
+
ji j i
|
134 |
+
jia j ia
|
135 |
+
jian j ian
|
136 |
+
jiang j iang
|
137 |
+
jiao j iao
|
138 |
+
jie j ie
|
139 |
+
jin j in
|
140 |
+
jing j ing
|
141 |
+
jiong j iong
|
142 |
+
jiu j iu
|
143 |
+
ju j v
|
144 |
+
jv j v
|
145 |
+
juan j van
|
146 |
+
jvan j van
|
147 |
+
jue j ve
|
148 |
+
jve j ve
|
149 |
+
jun j vn
|
150 |
+
jvn j vn
|
151 |
+
ka k a
|
152 |
+
kai k ai
|
153 |
+
kan k an
|
154 |
+
kang k ang
|
155 |
+
kao k ao
|
156 |
+
ke k e
|
157 |
+
kei k ei
|
158 |
+
ken k en
|
159 |
+
keng k eng
|
160 |
+
kong k ong
|
161 |
+
kou k ou
|
162 |
+
ku k u
|
163 |
+
kua k ua
|
164 |
+
kuai k uai
|
165 |
+
kuan k uan
|
166 |
+
kuang k uang
|
167 |
+
kui k ui
|
168 |
+
kun k un
|
169 |
+
kuo k uo
|
170 |
+
la l a
|
171 |
+
lai l ai
|
172 |
+
lan l an
|
173 |
+
lang l ang
|
174 |
+
lao l ao
|
175 |
+
le l e
|
176 |
+
lei l ei
|
177 |
+
leng l eng
|
178 |
+
li l i
|
179 |
+
lia l ia
|
180 |
+
lian l ian
|
181 |
+
liang l iang
|
182 |
+
liao l iao
|
183 |
+
lie l ie
|
184 |
+
lin l in
|
185 |
+
ling l ing
|
186 |
+
liu l iu
|
187 |
+
lo l o
|
188 |
+
long l ong
|
189 |
+
lou l ou
|
190 |
+
lu l u
|
191 |
+
luan l uan
|
192 |
+
lun l un
|
193 |
+
luo l uo
|
194 |
+
lv l v
|
195 |
+
lve l ve
|
196 |
+
ma m a
|
197 |
+
mai m ai
|
198 |
+
man m an
|
199 |
+
mang m ang
|
200 |
+
mao m ao
|
201 |
+
me m e
|
202 |
+
mei m ei
|
203 |
+
men m en
|
204 |
+
meng m eng
|
205 |
+
mi m i
|
206 |
+
mian m ian
|
207 |
+
miao m iao
|
208 |
+
mie m ie
|
209 |
+
min m in
|
210 |
+
ming m ing
|
211 |
+
miu m iu
|
212 |
+
mo m o
|
213 |
+
mou m ou
|
214 |
+
mu m u
|
215 |
+
na n a
|
216 |
+
nai n ai
|
217 |
+
nan n an
|
218 |
+
nang n ang
|
219 |
+
nao n ao
|
220 |
+
ne n e
|
221 |
+
nei n ei
|
222 |
+
nen n en
|
223 |
+
neng n eng
|
224 |
+
ni n i
|
225 |
+
nian n ian
|
226 |
+
niang n iang
|
227 |
+
niao n iao
|
228 |
+
nie n ie
|
229 |
+
nin n in
|
230 |
+
ning n ing
|
231 |
+
niu n iu
|
232 |
+
nong n ong
|
233 |
+
nou n ou
|
234 |
+
nu n u
|
235 |
+
nuan n uan
|
236 |
+
nun n un
|
237 |
+
nuo n uo
|
238 |
+
nv n v
|
239 |
+
nve n ve
|
240 |
+
o OO o
|
241 |
+
ou OO ou
|
242 |
+
pa p a
|
243 |
+
pai p ai
|
244 |
+
pan p an
|
245 |
+
pang p ang
|
246 |
+
pao p ao
|
247 |
+
pei p ei
|
248 |
+
pen p en
|
249 |
+
peng p eng
|
250 |
+
pi p i
|
251 |
+
pian p ian
|
252 |
+
piao p iao
|
253 |
+
pie p ie
|
254 |
+
pin p in
|
255 |
+
ping p ing
|
256 |
+
po p o
|
257 |
+
pou p ou
|
258 |
+
pu p u
|
259 |
+
qi q i
|
260 |
+
qia q ia
|
261 |
+
qian q ian
|
262 |
+
qiang q iang
|
263 |
+
qiao q iao
|
264 |
+
qie q ie
|
265 |
+
qin q in
|
266 |
+
qing q ing
|
267 |
+
qiong q iong
|
268 |
+
qiu q iu
|
269 |
+
qu q v
|
270 |
+
qv q v
|
271 |
+
quan q van
|
272 |
+
qvan q van
|
273 |
+
que q ve
|
274 |
+
qve q ve
|
275 |
+
qun q vn
|
276 |
+
qvn q vn
|
277 |
+
ran r an
|
278 |
+
rang r ang
|
279 |
+
rao r ao
|
280 |
+
re r e
|
281 |
+
ren r en
|
282 |
+
reng r eng
|
283 |
+
ri r ir
|
284 |
+
rong r ong
|
285 |
+
rou r ou
|
286 |
+
ru r u
|
287 |
+
rua r ua
|
288 |
+
ruan r uan
|
289 |
+
rui r ui
|
290 |
+
run r un
|
291 |
+
ruo r uo
|
292 |
+
sa s a
|
293 |
+
sai s ai
|
294 |
+
san s an
|
295 |
+
sang s ang
|
296 |
+
sao s ao
|
297 |
+
se s e
|
298 |
+
sen s en
|
299 |
+
seng s eng
|
300 |
+
sha sh a
|
301 |
+
shai sh ai
|
302 |
+
shan sh an
|
303 |
+
shang sh ang
|
304 |
+
shao sh ao
|
305 |
+
she sh e
|
306 |
+
shei sh ei
|
307 |
+
shen sh en
|
308 |
+
sheng sh eng
|
309 |
+
shi sh ir
|
310 |
+
shou sh ou
|
311 |
+
shu sh u
|
312 |
+
shua sh ua
|
313 |
+
shuai sh uai
|
314 |
+
shuan sh uan
|
315 |
+
shuang sh uang
|
316 |
+
shui sh ui
|
317 |
+
shun sh un
|
318 |
+
shuo sh uo
|
319 |
+
si s i0
|
320 |
+
song s ong
|
321 |
+
sou s ou
|
322 |
+
su s u
|
323 |
+
suan s uan
|
324 |
+
sui s ui
|
325 |
+
sun s un
|
326 |
+
suo s uo
|
327 |
+
ta t a
|
328 |
+
tai t ai
|
329 |
+
tan t an
|
330 |
+
tang t ang
|
331 |
+
tao t ao
|
332 |
+
te t e
|
333 |
+
tei t ei
|
334 |
+
teng t eng
|
335 |
+
ti t i
|
336 |
+
tian t ian
|
337 |
+
tiao t iao
|
338 |
+
tie t ie
|
339 |
+
ting t ing
|
340 |
+
tong t ong
|
341 |
+
tou t ou
|
342 |
+
tu t u
|
343 |
+
tuan t uan
|
344 |
+
tui t ui
|
345 |
+
tun t un
|
346 |
+
tuo t uo
|
347 |
+
wa w a
|
348 |
+
wai w ai
|
349 |
+
wan w an
|
350 |
+
wang w ang
|
351 |
+
wei w ei
|
352 |
+
wen w en
|
353 |
+
weng w eng
|
354 |
+
wo w o
|
355 |
+
wu w u
|
356 |
+
xi x i
|
357 |
+
xia x ia
|
358 |
+
xian x ian
|
359 |
+
xiang x iang
|
360 |
+
xiao x iao
|
361 |
+
xie x ie
|
362 |
+
xin x in
|
363 |
+
xing x ing
|
364 |
+
xiong x iong
|
365 |
+
xiu x iu
|
366 |
+
xu x v
|
367 |
+
xv x v
|
368 |
+
xuan x van
|
369 |
+
xvan x van
|
370 |
+
xue x ve
|
371 |
+
xve x ve
|
372 |
+
xun x vn
|
373 |
+
xvn x vn
|
374 |
+
ya y a
|
375 |
+
yan y En
|
376 |
+
yang y ang
|
377 |
+
yao y ao
|
378 |
+
ye y E
|
379 |
+
yi y i
|
380 |
+
yin y in
|
381 |
+
ying y ing
|
382 |
+
yo y o
|
383 |
+
yong y ong
|
384 |
+
you y ou
|
385 |
+
yu y v
|
386 |
+
yv y v
|
387 |
+
yuan y van
|
388 |
+
yvan y van
|
389 |
+
yue y ve
|
390 |
+
yve y ve
|
391 |
+
yun y vn
|
392 |
+
yvn y vn
|
393 |
+
za z a
|
394 |
+
zai z ai
|
395 |
+
zan z an
|
396 |
+
zang z ang
|
397 |
+
zao z ao
|
398 |
+
ze z e
|
399 |
+
zei z ei
|
400 |
+
zen z en
|
401 |
+
zeng z eng
|
402 |
+
zha zh a
|
403 |
+
zhai zh ai
|
404 |
+
zhan zh an
|
405 |
+
zhang zh ang
|
406 |
+
zhao zh ao
|
407 |
+
zhe zh e
|
408 |
+
zhei zh ei
|
409 |
+
zhen zh en
|
410 |
+
zheng zh eng
|
411 |
+
zhi zh ir
|
412 |
+
zhong zh ong
|
413 |
+
zhou zh ou
|
414 |
+
zhu zh u
|
415 |
+
zhua zh ua
|
416 |
+
zhuai zh uai
|
417 |
+
zhuan zh uan
|
418 |
+
zhuang zh uang
|
419 |
+
zhui zh ui
|
420 |
+
zhun zh un
|
421 |
+
zhuo zh uo
|
422 |
+
zi z i0
|
423 |
+
zong z ong
|
424 |
+
zou z ou
|
425 |
+
zu z u
|
426 |
+
zuan z uan
|
427 |
+
zui z ui
|
428 |
+
zun z un
|
429 |
+
zuo z uo
|
GPT_SoVITS/text/symbols.py
ADDED
@@ -0,0 +1,427 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
# punctuation = ['!', '?', '…', ",", ".","@"]#@是SP停顿
|
4 |
+
punctuation = ["!", "?", "…", ",", "."] # @是SP停顿
|
5 |
+
punctuation.append("-")
|
6 |
+
pu_symbols = punctuation + ["SP", "SP2", "SP3", "UNK"]
|
7 |
+
# pu_symbols = punctuation + ["SP", 'SP2', 'SP3','SP4', "UNK"]
|
8 |
+
pad = "_"
|
9 |
+
|
10 |
+
c = [
|
11 |
+
"AA",
|
12 |
+
"EE",
|
13 |
+
"OO",
|
14 |
+
"b",
|
15 |
+
"c",
|
16 |
+
"ch",
|
17 |
+
"d",
|
18 |
+
"f",
|
19 |
+
"g",
|
20 |
+
"h",
|
21 |
+
"j",
|
22 |
+
"k",
|
23 |
+
"l",
|
24 |
+
"m",
|
25 |
+
"n",
|
26 |
+
"p",
|
27 |
+
"q",
|
28 |
+
"r",
|
29 |
+
"s",
|
30 |
+
"sh",
|
31 |
+
"t",
|
32 |
+
"w",
|
33 |
+
"x",
|
34 |
+
"y",
|
35 |
+
"z",
|
36 |
+
"zh",
|
37 |
+
]
|
38 |
+
v = [
|
39 |
+
"E1",
|
40 |
+
"En1",
|
41 |
+
"a1",
|
42 |
+
"ai1",
|
43 |
+
"an1",
|
44 |
+
"ang1",
|
45 |
+
"ao1",
|
46 |
+
"e1",
|
47 |
+
"ei1",
|
48 |
+
"en1",
|
49 |
+
"eng1",
|
50 |
+
"er1",
|
51 |
+
"i1",
|
52 |
+
"i01",
|
53 |
+
"ia1",
|
54 |
+
"ian1",
|
55 |
+
"iang1",
|
56 |
+
"iao1",
|
57 |
+
"ie1",
|
58 |
+
"in1",
|
59 |
+
"ing1",
|
60 |
+
"iong1",
|
61 |
+
"ir1",
|
62 |
+
"iu1",
|
63 |
+
"o1",
|
64 |
+
"ong1",
|
65 |
+
"ou1",
|
66 |
+
"u1",
|
67 |
+
"ua1",
|
68 |
+
"uai1",
|
69 |
+
"uan1",
|
70 |
+
"uang1",
|
71 |
+
"ui1",
|
72 |
+
"un1",
|
73 |
+
"uo1",
|
74 |
+
"v1",
|
75 |
+
"van1",
|
76 |
+
"ve1",
|
77 |
+
"vn1",
|
78 |
+
"E2",
|
79 |
+
"En2",
|
80 |
+
"a2",
|
81 |
+
"ai2",
|
82 |
+
"an2",
|
83 |
+
"ang2",
|
84 |
+
"ao2",
|
85 |
+
"e2",
|
86 |
+
"ei2",
|
87 |
+
"en2",
|
88 |
+
"eng2",
|
89 |
+
"er2",
|
90 |
+
"i2",
|
91 |
+
"i02",
|
92 |
+
"ia2",
|
93 |
+
"ian2",
|
94 |
+
"iang2",
|
95 |
+
"iao2",
|
96 |
+
"ie2",
|
97 |
+
"in2",
|
98 |
+
"ing2",
|
99 |
+
"iong2",
|
100 |
+
"ir2",
|
101 |
+
"iu2",
|
102 |
+
"o2",
|
103 |
+
"ong2",
|
104 |
+
"ou2",
|
105 |
+
"u2",
|
106 |
+
"ua2",
|
107 |
+
"uai2",
|
108 |
+
"uan2",
|
109 |
+
"uang2",
|
110 |
+
"ui2",
|
111 |
+
"un2",
|
112 |
+
"uo2",
|
113 |
+
"v2",
|
114 |
+
"van2",
|
115 |
+
"ve2",
|
116 |
+
"vn2",
|
117 |
+
"E3",
|
118 |
+
"En3",
|
119 |
+
"a3",
|
120 |
+
"ai3",
|
121 |
+
"an3",
|
122 |
+
"ang3",
|
123 |
+
"ao3",
|
124 |
+
"e3",
|
125 |
+
"ei3",
|
126 |
+
"en3",
|
127 |
+
"eng3",
|
128 |
+
"er3",
|
129 |
+
"i3",
|
130 |
+
"i03",
|
131 |
+
"ia3",
|
132 |
+
"ian3",
|
133 |
+
"iang3",
|
134 |
+
"iao3",
|
135 |
+
"ie3",
|
136 |
+
"in3",
|
137 |
+
"ing3",
|
138 |
+
"iong3",
|
139 |
+
"ir3",
|
140 |
+
"iu3",
|
141 |
+
"o3",
|
142 |
+
"ong3",
|
143 |
+
"ou3",
|
144 |
+
"u3",
|
145 |
+
"ua3",
|
146 |
+
"uai3",
|
147 |
+
"uan3",
|
148 |
+
"uang3",
|
149 |
+
"ui3",
|
150 |
+
"un3",
|
151 |
+
"uo3",
|
152 |
+
"v3",
|
153 |
+
"van3",
|
154 |
+
"ve3",
|
155 |
+
"vn3",
|
156 |
+
"E4",
|
157 |
+
"En4",
|
158 |
+
"a4",
|
159 |
+
"ai4",
|
160 |
+
"an4",
|
161 |
+
"ang4",
|
162 |
+
"ao4",
|
163 |
+
"e4",
|
164 |
+
"ei4",
|
165 |
+
"en4",
|
166 |
+
"eng4",
|
167 |
+
"er4",
|
168 |
+
"i4",
|
169 |
+
"i04",
|
170 |
+
"ia4",
|
171 |
+
"ian4",
|
172 |
+
"iang4",
|
173 |
+
"iao4",
|
174 |
+
"ie4",
|
175 |
+
"in4",
|
176 |
+
"ing4",
|
177 |
+
"iong4",
|
178 |
+
"ir4",
|
179 |
+
"iu4",
|
180 |
+
"o4",
|
181 |
+
"ong4",
|
182 |
+
"ou4",
|
183 |
+
"u4",
|
184 |
+
"ua4",
|
185 |
+
"uai4",
|
186 |
+
"uan4",
|
187 |
+
"uang4",
|
188 |
+
"ui4",
|
189 |
+
"un4",
|
190 |
+
"uo4",
|
191 |
+
"v4",
|
192 |
+
"van4",
|
193 |
+
"ve4",
|
194 |
+
"vn4",
|
195 |
+
"E5",
|
196 |
+
"En5",
|
197 |
+
"a5",
|
198 |
+
"ai5",
|
199 |
+
"an5",
|
200 |
+
"ang5",
|
201 |
+
"ao5",
|
202 |
+
"e5",
|
203 |
+
"ei5",
|
204 |
+
"en5",
|
205 |
+
"eng5",
|
206 |
+
"er5",
|
207 |
+
"i5",
|
208 |
+
"i05",
|
209 |
+
"ia5",
|
210 |
+
"ian5",
|
211 |
+
"iang5",
|
212 |
+
"iao5",
|
213 |
+
"ie5",
|
214 |
+
"in5",
|
215 |
+
"ing5",
|
216 |
+
"iong5",
|
217 |
+
"ir5",
|
218 |
+
"iu5",
|
219 |
+
"o5",
|
220 |
+
"ong5",
|
221 |
+
"ou5",
|
222 |
+
"u5",
|
223 |
+
"ua5",
|
224 |
+
"uai5",
|
225 |
+
"uan5",
|
226 |
+
"uang5",
|
227 |
+
"ui5",
|
228 |
+
"un5",
|
229 |
+
"uo5",
|
230 |
+
"v5",
|
231 |
+
"van5",
|
232 |
+
"ve5",
|
233 |
+
"vn5",
|
234 |
+
]
|
235 |
+
|
236 |
+
v_without_tone = [
|
237 |
+
"E",
|
238 |
+
"En",
|
239 |
+
"a",
|
240 |
+
"ai",
|
241 |
+
"an",
|
242 |
+
"ang",
|
243 |
+
"ao",
|
244 |
+
"e",
|
245 |
+
"ei",
|
246 |
+
"en",
|
247 |
+
"eng",
|
248 |
+
"er",
|
249 |
+
"i",
|
250 |
+
"i0",
|
251 |
+
"ia",
|
252 |
+
"ian",
|
253 |
+
"iang",
|
254 |
+
"iao",
|
255 |
+
"ie",
|
256 |
+
"in",
|
257 |
+
"ing",
|
258 |
+
"iong",
|
259 |
+
"ir",
|
260 |
+
"iu",
|
261 |
+
"o",
|
262 |
+
"ong",
|
263 |
+
"ou",
|
264 |
+
"u",
|
265 |
+
"ua",
|
266 |
+
"uai",
|
267 |
+
"uan",
|
268 |
+
"uang",
|
269 |
+
"ui",
|
270 |
+
"un",
|
271 |
+
"uo",
|
272 |
+
"v",
|
273 |
+
"van",
|
274 |
+
"ve",
|
275 |
+
"vn",
|
276 |
+
]
|
277 |
+
|
278 |
+
# japanese
|
279 |
+
ja_symbols = [
|
280 |
+
"I",
|
281 |
+
"N",
|
282 |
+
"U",
|
283 |
+
"a",
|
284 |
+
"b",
|
285 |
+
"by",
|
286 |
+
"ch",
|
287 |
+
"cl",
|
288 |
+
"d",
|
289 |
+
"dy",
|
290 |
+
"e",
|
291 |
+
"f",
|
292 |
+
"g",
|
293 |
+
"gy",
|
294 |
+
"h",
|
295 |
+
"hy",
|
296 |
+
"i",
|
297 |
+
"j",
|
298 |
+
"k",
|
299 |
+
"ky",
|
300 |
+
"m",
|
301 |
+
"my",
|
302 |
+
"n",
|
303 |
+
"ny",
|
304 |
+
"o",
|
305 |
+
"p",
|
306 |
+
"py",
|
307 |
+
"r",
|
308 |
+
"ry",
|
309 |
+
"s",
|
310 |
+
"sh",
|
311 |
+
"t",
|
312 |
+
"ts",
|
313 |
+
"u",
|
314 |
+
"v",
|
315 |
+
"w",
|
316 |
+
"y",
|
317 |
+
"z",
|
318 |
+
# "[", #上升调型
|
319 |
+
# "]", #下降调型
|
320 |
+
# "$", #结束符
|
321 |
+
# "^", #开始符
|
322 |
+
]
|
323 |
+
|
324 |
+
arpa = {
|
325 |
+
"AH0",
|
326 |
+
"S",
|
327 |
+
"AH1",
|
328 |
+
"EY2",
|
329 |
+
"AE2",
|
330 |
+
"EH0",
|
331 |
+
"OW2",
|
332 |
+
"UH0",
|
333 |
+
"NG",
|
334 |
+
"B",
|
335 |
+
"G",
|
336 |
+
"AY0",
|
337 |
+
"M",
|
338 |
+
"AA0",
|
339 |
+
"F",
|
340 |
+
"AO0",
|
341 |
+
"ER2",
|
342 |
+
"UH1",
|
343 |
+
"IY1",
|
344 |
+
"AH2",
|
345 |
+
"DH",
|
346 |
+
"IY0",
|
347 |
+
"EY1",
|
348 |
+
"IH0",
|
349 |
+
"K",
|
350 |
+
"N",
|
351 |
+
"W",
|
352 |
+
"IY2",
|
353 |
+
"T",
|
354 |
+
"AA1",
|
355 |
+
"ER1",
|
356 |
+
"EH2",
|
357 |
+
"OY0",
|
358 |
+
"UH2",
|
359 |
+
"UW1",
|
360 |
+
"Z",
|
361 |
+
"AW2",
|
362 |
+
"AW1",
|
363 |
+
"V",
|
364 |
+
"UW2",
|
365 |
+
"AA2",
|
366 |
+
"ER",
|
367 |
+
"AW0",
|
368 |
+
"UW0",
|
369 |
+
"R",
|
370 |
+
"OW1",
|
371 |
+
"EH1",
|
372 |
+
"ZH",
|
373 |
+
"AE0",
|
374 |
+
"IH2",
|
375 |
+
"IH",
|
376 |
+
"Y",
|
377 |
+
"JH",
|
378 |
+
"P",
|
379 |
+
"AY1",
|
380 |
+
"EY0",
|
381 |
+
"OY2",
|
382 |
+
"TH",
|
383 |
+
"HH",
|
384 |
+
"D",
|
385 |
+
"ER0",
|
386 |
+
"CH",
|
387 |
+
"AO1",
|
388 |
+
"AE1",
|
389 |
+
"AO2",
|
390 |
+
"OY1",
|
391 |
+
"AY2",
|
392 |
+
"IH1",
|
393 |
+
"OW0",
|
394 |
+
"L",
|
395 |
+
"SH",
|
396 |
+
}
|
397 |
+
|
398 |
+
# Hindi phonemes
|
399 |
+
hi_consonants = [
|
400 |
+
"क", "ख", "ग", "घ", "ङ",
|
401 |
+
"च", "छ", "ज", "झ", "ञ",
|
402 |
+
"ट", "ठ", "ड", "ढ", "ण",
|
403 |
+
"त", "थ", "द", "ध", "न",
|
404 |
+
"प", "फ", "ब", "भ", "म",
|
405 |
+
"य", "र", "ल", "व",
|
406 |
+
"श", "ष", "स", "ह",
|
407 |
+
"क्ष", "त्र", "ज्ञ"
|
408 |
+
]
|
409 |
+
|
410 |
+
hi_vowels = [
|
411 |
+
"अ", "आ", "इ", "ई", "उ", "ऊ",
|
412 |
+
"ए", "ऐ", "ओ", "औ",
|
413 |
+
"ऋ", "ॠ",
|
414 |
+
"ं", "ः", "ँ", # anusvara, visarga, chandrabindu
|
415 |
+
"्", # virama
|
416 |
+
"ा", "ि", "ी", "ु", "ू",
|
417 |
+
"े", "ै", "ो", "ौ",
|
418 |
+
"ृ", "ॄ",
|
419 |
+
"ॉ", "ऑ" # Added candra o and o with candra
|
420 |
+
]
|
421 |
+
|
422 |
+
hi_numbers = ["०", "१", "२", "३", "४", "५", "६", "७", "८", "९"]
|
423 |
+
|
424 |
+
symbols = [pad] + c + v + ja_symbols + pu_symbols + list(arpa) + hi_consonants + hi_vowels + hi_numbers
|
425 |
+
symbols = sorted(set(symbols))
|
426 |
+
if __name__ == "__main__":
|
427 |
+
print(len(symbols))
|
GPT_SoVITS/text/symbols2.py
ADDED
@@ -0,0 +1,444 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
# punctuation = ['!', '?', '…', ",", ".","@"]#@是SP停顿
|
4 |
+
punctuation = ["!", "?", "…", ",", "."] # @是SP停顿
|
5 |
+
punctuation.append("-")
|
6 |
+
pu_symbols = punctuation + ["SP", "SP2", "SP3", "UNK"]
|
7 |
+
# pu_symbols = punctuation + ["SP", 'SP2', 'SP3','SP4', "UNK"]
|
8 |
+
pad = "_"
|
9 |
+
|
10 |
+
c = [
|
11 |
+
"AA",
|
12 |
+
"EE",
|
13 |
+
"OO",
|
14 |
+
"b",
|
15 |
+
"c",
|
16 |
+
"ch",
|
17 |
+
"d",
|
18 |
+
"f",
|
19 |
+
"g",
|
20 |
+
"h",
|
21 |
+
"j",
|
22 |
+
"k",
|
23 |
+
"l",
|
24 |
+
"m",
|
25 |
+
"n",
|
26 |
+
"p",
|
27 |
+
"q",
|
28 |
+
"r",
|
29 |
+
"s",
|
30 |
+
"sh",
|
31 |
+
"t",
|
32 |
+
"w",
|
33 |
+
"x",
|
34 |
+
"y",
|
35 |
+
"z",
|
36 |
+
"zh",
|
37 |
+
]
|
38 |
+
v = [
|
39 |
+
"E1",
|
40 |
+
"En1",
|
41 |
+
"a1",
|
42 |
+
"ai1",
|
43 |
+
"an1",
|
44 |
+
"ang1",
|
45 |
+
"ao1",
|
46 |
+
"e1",
|
47 |
+
"ei1",
|
48 |
+
"en1",
|
49 |
+
"eng1",
|
50 |
+
"er1",
|
51 |
+
"i1",
|
52 |
+
"i01",
|
53 |
+
"ia1",
|
54 |
+
"ian1",
|
55 |
+
"iang1",
|
56 |
+
"iao1",
|
57 |
+
"ie1",
|
58 |
+
"in1",
|
59 |
+
"ing1",
|
60 |
+
"iong1",
|
61 |
+
"ir1",
|
62 |
+
"iu1",
|
63 |
+
"o1",
|
64 |
+
"ong1",
|
65 |
+
"ou1",
|
66 |
+
"u1",
|
67 |
+
"ua1",
|
68 |
+
"uai1",
|
69 |
+
"uan1",
|
70 |
+
"uang1",
|
71 |
+
"ui1",
|
72 |
+
"un1",
|
73 |
+
"uo1",
|
74 |
+
"v1",
|
75 |
+
"van1",
|
76 |
+
"ve1",
|
77 |
+
"vn1",
|
78 |
+
"E2",
|
79 |
+
"En2",
|
80 |
+
"a2",
|
81 |
+
"ai2",
|
82 |
+
"an2",
|
83 |
+
"ang2",
|
84 |
+
"ao2",
|
85 |
+
"e2",
|
86 |
+
"ei2",
|
87 |
+
"en2",
|
88 |
+
"eng2",
|
89 |
+
"er2",
|
90 |
+
"i2",
|
91 |
+
"i02",
|
92 |
+
"ia2",
|
93 |
+
"ian2",
|
94 |
+
"iang2",
|
95 |
+
"iao2",
|
96 |
+
"ie2",
|
97 |
+
"in2",
|
98 |
+
"ing2",
|
99 |
+
"iong2",
|
100 |
+
"ir2",
|
101 |
+
"iu2",
|
102 |
+
"o2",
|
103 |
+
"ong2",
|
104 |
+
"ou2",
|
105 |
+
"u2",
|
106 |
+
"ua2",
|
107 |
+
"uai2",
|
108 |
+
"uan2",
|
109 |
+
"uang2",
|
110 |
+
"ui2",
|
111 |
+
"un2",
|
112 |
+
"uo2",
|
113 |
+
"v2",
|
114 |
+
"van2",
|
115 |
+
"ve2",
|
116 |
+
"vn2",
|
117 |
+
"E3",
|
118 |
+
"En3",
|
119 |
+
"a3",
|
120 |
+
"ai3",
|
121 |
+
"an3",
|
122 |
+
"ang3",
|
123 |
+
"ao3",
|
124 |
+
"e3",
|
125 |
+
"ei3",
|
126 |
+
"en3",
|
127 |
+
"eng3",
|
128 |
+
"er3",
|
129 |
+
"i3",
|
130 |
+
"i03",
|
131 |
+
"ia3",
|
132 |
+
"ian3",
|
133 |
+
"iang3",
|
134 |
+
"iao3",
|
135 |
+
"ie3",
|
136 |
+
"in3",
|
137 |
+
"ing3",
|
138 |
+
"iong3",
|
139 |
+
"ir3",
|
140 |
+
"iu3",
|
141 |
+
"o3",
|
142 |
+
"ong3",
|
143 |
+
"ou3",
|
144 |
+
"u3",
|
145 |
+
"ua3",
|
146 |
+
"uai3",
|
147 |
+
"uan3",
|
148 |
+
"uang3",
|
149 |
+
"ui3",
|
150 |
+
"un3",
|
151 |
+
"uo3",
|
152 |
+
"v3",
|
153 |
+
"van3",
|
154 |
+
"ve3",
|
155 |
+
"vn3",
|
156 |
+
"E4",
|
157 |
+
"En4",
|
158 |
+
"a4",
|
159 |
+
"ai4",
|
160 |
+
"an4",
|
161 |
+
"ang4",
|
162 |
+
"ao4",
|
163 |
+
"e4",
|
164 |
+
"ei4",
|
165 |
+
"en4",
|
166 |
+
"eng4",
|
167 |
+
"er4",
|
168 |
+
"i4",
|
169 |
+
"i04",
|
170 |
+
"ia4",
|
171 |
+
"ian4",
|
172 |
+
"iang4",
|
173 |
+
"iao4",
|
174 |
+
"ie4",
|
175 |
+
"in4",
|
176 |
+
"ing4",
|
177 |
+
"iong4",
|
178 |
+
"ir4",
|
179 |
+
"iu4",
|
180 |
+
"o4",
|
181 |
+
"ong4",
|
182 |
+
"ou4",
|
183 |
+
"u4",
|
184 |
+
"ua4",
|
185 |
+
"uai4",
|
186 |
+
"uan4",
|
187 |
+
"uang4",
|
188 |
+
"ui4",
|
189 |
+
"un4",
|
190 |
+
"uo4",
|
191 |
+
"v4",
|
192 |
+
"van4",
|
193 |
+
"ve4",
|
194 |
+
"vn4",
|
195 |
+
"E5",
|
196 |
+
"En5",
|
197 |
+
"a5",
|
198 |
+
"ai5",
|
199 |
+
"an5",
|
200 |
+
"ang5",
|
201 |
+
"ao5",
|
202 |
+
"e5",
|
203 |
+
"ei5",
|
204 |
+
"en5",
|
205 |
+
"eng5",
|
206 |
+
"er5",
|
207 |
+
"i5",
|
208 |
+
"i05",
|
209 |
+
"ia5",
|
210 |
+
"ian5",
|
211 |
+
"iang5",
|
212 |
+
"iao5",
|
213 |
+
"ie5",
|
214 |
+
"in5",
|
215 |
+
"ing5",
|
216 |
+
"iong5",
|
217 |
+
"ir5",
|
218 |
+
"iu5",
|
219 |
+
"o5",
|
220 |
+
"ong5",
|
221 |
+
"ou5",
|
222 |
+
"u5",
|
223 |
+
"ua5",
|
224 |
+
"uai5",
|
225 |
+
"uan5",
|
226 |
+
"uang5",
|
227 |
+
"ui5",
|
228 |
+
"un5",
|
229 |
+
"uo5",
|
230 |
+
"v5",
|
231 |
+
"van5",
|
232 |
+
"ve5",
|
233 |
+
"vn5",
|
234 |
+
]
|
235 |
+
|
236 |
+
v_without_tone = [
|
237 |
+
"E",
|
238 |
+
"En",
|
239 |
+
"a",
|
240 |
+
"ai",
|
241 |
+
"an",
|
242 |
+
"ang",
|
243 |
+
"ao",
|
244 |
+
"e",
|
245 |
+
"ei",
|
246 |
+
"en",
|
247 |
+
"eng",
|
248 |
+
"er",
|
249 |
+
"i",
|
250 |
+
"i0",
|
251 |
+
"ia",
|
252 |
+
"ian",
|
253 |
+
"iang",
|
254 |
+
"iao",
|
255 |
+
"ie",
|
256 |
+
"in",
|
257 |
+
"ing",
|
258 |
+
"iong",
|
259 |
+
"ir",
|
260 |
+
"iu",
|
261 |
+
"o",
|
262 |
+
"ong",
|
263 |
+
"ou",
|
264 |
+
"u",
|
265 |
+
"ua",
|
266 |
+
"uai",
|
267 |
+
"uan",
|
268 |
+
"uang",
|
269 |
+
"ui",
|
270 |
+
"un",
|
271 |
+
"uo",
|
272 |
+
"v",
|
273 |
+
"van",
|
274 |
+
"ve",
|
275 |
+
"vn",
|
276 |
+
]
|
277 |
+
|
278 |
+
# japanese
|
279 |
+
ja_symbols = [
|
280 |
+
"I",
|
281 |
+
"N",
|
282 |
+
"U",
|
283 |
+
"a",
|
284 |
+
"b",
|
285 |
+
"by",
|
286 |
+
"ch",
|
287 |
+
"cl",
|
288 |
+
"d",
|
289 |
+
"dy",
|
290 |
+
"e",
|
291 |
+
"f",
|
292 |
+
"g",
|
293 |
+
"gy",
|
294 |
+
"h",
|
295 |
+
"hy",
|
296 |
+
"i",
|
297 |
+
"j",
|
298 |
+
"k",
|
299 |
+
"ky",
|
300 |
+
"m",
|
301 |
+
"my",
|
302 |
+
"n",
|
303 |
+
"ny",
|
304 |
+
"o",
|
305 |
+
"p",
|
306 |
+
"py",
|
307 |
+
"r",
|
308 |
+
"ry",
|
309 |
+
"s",
|
310 |
+
"sh",
|
311 |
+
"t",
|
312 |
+
"ts",
|
313 |
+
"u",
|
314 |
+
"v",
|
315 |
+
"w",
|
316 |
+
"y",
|
317 |
+
"z",
|
318 |
+
###楼下2个留到后面加
|
319 |
+
# "[", #上升调型
|
320 |
+
# "]", #下降调型
|
321 |
+
# "$", #结束符
|
322 |
+
# "^", #开始符
|
323 |
+
]
|
324 |
+
|
325 |
+
arpa = {
|
326 |
+
"AH0",
|
327 |
+
"S",
|
328 |
+
"AH1",
|
329 |
+
"EY2",
|
330 |
+
"AE2",
|
331 |
+
"EH0",
|
332 |
+
"OW2",
|
333 |
+
"UH0",
|
334 |
+
"NG",
|
335 |
+
"B",
|
336 |
+
"G",
|
337 |
+
"AY0",
|
338 |
+
"M",
|
339 |
+
"AA0",
|
340 |
+
"F",
|
341 |
+
"AO0",
|
342 |
+
"ER2",
|
343 |
+
"UH1",
|
344 |
+
"IY1",
|
345 |
+
"AH2",
|
346 |
+
"DH",
|
347 |
+
"IY0",
|
348 |
+
"EY1",
|
349 |
+
"IH0",
|
350 |
+
"K",
|
351 |
+
"N",
|
352 |
+
"W",
|
353 |
+
"IY2",
|
354 |
+
"T",
|
355 |
+
"AA1",
|
356 |
+
"ER1",
|
357 |
+
"EH2",
|
358 |
+
"OY0",
|
359 |
+
"UH2",
|
360 |
+
"UW1",
|
361 |
+
"Z",
|
362 |
+
"AW2",
|
363 |
+
"AW1",
|
364 |
+
"V",
|
365 |
+
"UW2",
|
366 |
+
"AA2",
|
367 |
+
"ER",
|
368 |
+
"AW0",
|
369 |
+
"UW0",
|
370 |
+
"R",
|
371 |
+
"OW1",
|
372 |
+
"EH1",
|
373 |
+
"ZH",
|
374 |
+
"AE0",
|
375 |
+
"IH2",
|
376 |
+
"IH",
|
377 |
+
"Y",
|
378 |
+
"JH",
|
379 |
+
"P",
|
380 |
+
"AY1",
|
381 |
+
"EY0",
|
382 |
+
"OY2",
|
383 |
+
"TH",
|
384 |
+
"HH",
|
385 |
+
"D",
|
386 |
+
"ER0",
|
387 |
+
"CH",
|
388 |
+
"AO1",
|
389 |
+
"AE1",
|
390 |
+
"AO2",
|
391 |
+
"OY1",
|
392 |
+
"AY2",
|
393 |
+
"IH1",
|
394 |
+
"OW0",
|
395 |
+
"L",
|
396 |
+
"SH",
|
397 |
+
}
|
398 |
+
|
399 |
+
ko_symbols='ㄱㄴㄷㄹㅁㅂㅅㅇㅈㅊㅋㅌㅍㅎㄲㄸㅃㅆㅉㅏㅓㅗㅜㅡㅣㅐㅔ空停'
|
400 |
+
# ko_symbols='ㄱㄴㄷㄹㅁㅂㅅㅇㅈㅊㅋㅌㅍㅎㄲㄸㅃㅆㅉㅏㅓㅗㅜㅡㅣㅐㅔ '
|
401 |
+
|
402 |
+
yue_symbols={'Yeot3', 'Yip1', 'Yyu3', 'Yeng4', 'Yut5', 'Yaan5', 'Ym5', 'Yaan6', 'Yang1', 'Yun4', 'Yon2', 'Yui5', 'Yun2', 'Yat3', 'Ye', 'Yeot1', 'Yoeng5', 'Yoek2', 'Yam2', 'Yeon6', 'Yu6', 'Yiu3', 'Yaang6', 'Yp5', 'Yai4', 'Yoek4', 'Yit6', 'Yam5', 'Yoeng6', 'Yg1', 'Yk3', 'Yoe4', 'Yam3', 'Yc', 'Yyu4', 'Yyut1', 'Yiu4', 'Ying3', 'Yip3', 'Yaap3', 'Yau3', 'Yan4', 'Yau1', 'Yap4', 'Yk6', 'Yok3', 'Yai1', 'Yeot6', 'Yan2', 'Yoek6', 'Yt1', 'Yoi1', 'Yit5', 'Yn4', 'Yaau3', 'Yau4', 'Yuk6', 'Ys', 'Yuk', 'Yin6', 'Yung6', 'Ya', 'You', 'Yaai5', 'Yau5', 'Yoi3', 'Yaak3', 'Yaat3', 'Ying2', 'Yok5', 'Yeng2', 'Yyut3', 'Yam1', 'Yip5', 'You1', 'Yam6', 'Yaa5', 'Yi6', 'Yek4', 'Yyu2', 'Yuk5', 'Yaam1', 'Yang2', 'Yai', 'Yiu6', 'Yin4', 'Yok4', 'Yot3', 'Yui2', 'Yeoi5', 'Yyun6', 'Yyu5', 'Yoi5', 'Yeot2', 'Yim4', 'Yeoi2', 'Yaan1', 'Yang6', 'Yong1', 'Yaang4', 'Yung5', 'Yeon1', 'Yin2', 'Ya3', 'Yaang3', 'Yg', 'Yk2', 'Yaau5', 'Yut1', 'Yt5', 'Yip4', 'Yung4', 'Yj', 'Yong3', 'Ya1', 'Yg6', 'Yaau6', 'Yit3', 'Yun3', 'Ying1', 'Yn2', 'Yg4', 'Yl', 'Yp3', 'Yn3', 'Yak1', 'Yang5', 'Yoe6', 'You2', 'Yap2', 'Yak2', 'Yt3', 'Yot5', 'Yim2', 'Yi1', 'Yn6', 'Yaat5', 'Yaam3', 'Yoek5', 'Ye3', 'Yeon4', 'Yaa2', 'Yu3', 'Yim6', 'Ym', 'Yoe3', 'Yaai2', 'Ym2', 'Ya6', 'Yeng6', 'Yik4', 'Yot4', 'Yaai4', 'Yyun3', 'Yu1', 'Yoeng1', 'Yaap2', 'Yuk3', 'Yoek3', 'Yeng5', 'Yeoi1', 'Yiu2', 'Yok1', 'Yo1', 'Yoek1', 'Yoeng2', 'Yeon5', 'Yiu1', 'Yoeng4', 'Yuk2', 'Yat4', 'Yg5', 'Yut4', 'Yan6', 'Yin3', 'Yaa6', 'Yap1', 'Yg2', 'Yoe5', 'Yt4', 'Ya5', 'Yo4', 'Yyu1', 'Yak3', 'Yeon2', 'Yong4', 'Ym1', 'Ye2', 'Yaang5', 'Yoi2', 'Yeng3', 'Yn', 'Yyut4', 'Yau', 'Yaak2', 'Yaan4', 'Yek2', 'Yin1', 'Yi5', 'Yoe2', 'Yei5', 'Yaat6', 'Yak5', 'Yp6', 'Yok6', 'Yei2', 'Yaap1', 'Yyut5', 'Yi4', 'Yim1', 'Yk5', 'Ye4', 'Yok2', 'Yaam6', 'Yat2', 'Yon6', 'Yei3', 'Yyu6', 'Yeot5', 'Yk4', 'Yai6', 'Yd', 'Yg3', 'Yei6', 'Yau2', 'Yok', 'Yau6', 'Yung3', 'Yim5', 'Yut6', 'Yit1', 'Yon3', 'Yat1', 'Yaam2', 'Yyut2', 'Yui6', 'Yt2', 'Yek6', 'Yt', 'Ye6', 'Yang3', 'Ying6', 'Yaau1', 'Yeon3', 'Yng', 'Yh', 'Yang4', 'Ying5', 'Yaap6', 'Yoeng3', 'Yyun4', 'You3', 'Yan5', 'Yat5', 'Yot1', 'Yun1', 'Yi3', 'Yaa1', 'Yaap4', 'You6', 'Yaang2', 'Yaap5', 'Yaa3', 'Yaak6', 'Yeng1', 'Yaak1', 'Yo5', 'Yoi4', 'Yam4', 'Yik1', 'Ye1', 'Yai5', 'Yung1', 'Yp2', 'Yui4', 'Yaak4', 'Yung2', 'Yak4', 'Yaat4', 'Yeoi4', 'Yut2', 'Yin5', 'Yaau4', 'Yap6', 'Yb', 'Yaam4', 'Yw', 'Yut3', 'Yong2', 'Yt6', 'Yaai6', 'Yap5', 'Yik5', 'Yun6', 'Yaam5', 'Yun5', 'Yik3', 'Ya2', 'Yyut6', 'Yon4', 'Yk1', 'Yit4', 'Yak6', 'Yaan2', 'Yuk1', 'Yai2', 'Yik2', 'Yaat2', 'Yo3', 'Ykw', 'Yn5', 'Yaa', 'Ye5', 'Yu4', 'Yei1', 'Yai3', 'Yyun5', 'Yip2', 'Yaau2', 'Yiu5', 'Ym4', 'Yeoi6', 'Yk', 'Ym6', 'Yoe1', 'Yeoi3', 'Yon', 'Yuk4', 'Yaai3', 'Yaa4', 'Yot6', 'Yaang1', 'Yei4', 'Yek1', 'Yo', 'Yp', 'Yo6', 'Yp4', 'Yan3', 'Yoi', 'Yap3', 'Yek3', 'Yim3', 'Yz', 'Yot2', 'Yoi6', 'Yit2', 'Yu5', 'Yaan3', 'Yan1', 'Yon5', 'Yp1', 'Yong5', 'Ygw', 'Yak', 'Yat6', 'Ying4', 'Yu2', 'Yf', 'Ya4', 'Yon1', 'You4', 'Yik6', 'Yui1', 'Yaat1', 'Yeot4', 'Yi2', 'Yaai1', 'Yek5', 'Ym3', 'Yong6', 'You5', 'Yyun1', 'Yn1', 'Yo2', 'Yip6', 'Yui3', 'Yaak5', 'Yyun2'}
|
403 |
+
|
404 |
+
# Hindi consonants
|
405 |
+
hi_consonants = [
|
406 |
+
"क", "ख", "ग", "घ", "ङ",
|
407 |
+
"च", "छ", "ज", "झ", "ञ",
|
408 |
+
"ट", "ठ", "ड", "ढ", "ण",
|
409 |
+
"त", "थ", "द", "ध", "न",
|
410 |
+
"प", "फ", "ब", "भ", "म",
|
411 |
+
"य", "र", "ल", "व",
|
412 |
+
"श", "ष", "स", "ह",
|
413 |
+
"क्ष", "त्र", "ज्ञ"
|
414 |
+
]
|
415 |
+
|
416 |
+
# Hindi vowels and modifiers
|
417 |
+
hi_vowels = [
|
418 |
+
"अ", "आ", "इ", "ई", "उ", "ऊ",
|
419 |
+
"ए", "ऐ", "ओ", "औ",
|
420 |
+
"ऋ", "ॠ",
|
421 |
+
"ं", "ः", "ँ", # anusvara, visarga, chandrabindu
|
422 |
+
"्", # virama
|
423 |
+
"ा", "ि", "ी", "ु", "ू",
|
424 |
+
"े", "ै", "ो", "ौ",
|
425 |
+
"ृ", "ॄ",
|
426 |
+
"ॉ", "ऑ" # candra o and o with candra
|
427 |
+
]
|
428 |
+
|
429 |
+
# Hindi numbers
|
430 |
+
hi_numbers = ["०", "१", "२", "३", "४", "५", "६", "७", "८", "९"]
|
431 |
+
|
432 |
+
# Add Hindi symbols to symbols list
|
433 |
+
symbols = [pad] + c + v + ja_symbols + pu_symbols + list(arpa) + hi_consonants + hi_vowels + hi_numbers
|
434 |
+
symbols = sorted(set(symbols))
|
435 |
+
# print(len(symbols))
|
436 |
+
|
437 |
+
if __name__ == "__main__":
|
438 |
+
print(len(symbols))
|
439 |
+
'''
|
440 |
+
粤语:
|
441 |
+
732-353=379
|
442 |
+
韩文+粤语:
|
443 |
+
732-322=410
|
444 |
+
'''
|
GPT_SoVITS/text/text_processing.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
from typing import List, Optional
|
3 |
+
from .symbols import symbols
|
4 |
+
from .hindi import get_phoneme_sequence as hindi_to_phonemes
|
5 |
+
|
6 |
+
def is_hindi_text(text: str) -> bool:
|
7 |
+
"""Check if the text contains Hindi characters."""
|
8 |
+
return bool(re.search(r'[\u0900-\u097F]', text))
|
9 |
+
|
10 |
+
def text_to_sequence(text: str, language: Optional[str] = None) -> List[str]:
|
11 |
+
"""Convert text to sequence of symbols."""
|
12 |
+
if language is None:
|
13 |
+
# Auto-detect language
|
14 |
+
if is_hindi_text(text):
|
15 |
+
language = 'hi'
|
16 |
+
# Add more language detection as needed
|
17 |
+
else:
|
18 |
+
language = 'en' # Default to English
|
19 |
+
|
20 |
+
# Convert text to phonemes based on language
|
21 |
+
if language == 'hi':
|
22 |
+
phonemes = hindi_to_phonemes(text)
|
23 |
+
else:
|
24 |
+
# Handle other languages or use default
|
25 |
+
phonemes = ['UNK'] # Replace with proper handling for other languages
|
26 |
+
|
27 |
+
# Verify all phonemes are in the symbols list
|
28 |
+
for p in phonemes:
|
29 |
+
if p not in symbols:
|
30 |
+
print(f"Warning: phoneme '{p}' not in symbols list")
|
31 |
+
|
32 |
+
# Filter out unknown phonemes
|
33 |
+
return [p for p in phonemes if p in symbols]
|
34 |
+
|
35 |
+
def sequence_to_text(sequence: List[str]) -> str:
|
36 |
+
"""Convert sequence of symbols back to text."""
|
37 |
+
return ' '.join(sequence)
|
GPT_SoVITS/text/tone_sandhi.py
ADDED
@@ -0,0 +1,807 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
from typing import List
|
15 |
+
from typing import Tuple
|
16 |
+
|
17 |
+
import jieba_fast as jieba
|
18 |
+
from pypinyin import lazy_pinyin
|
19 |
+
from pypinyin import Style
|
20 |
+
|
21 |
+
|
22 |
+
class ToneSandhi:
|
23 |
+
def __init__(self):
|
24 |
+
self.must_neural_tone_words = {
|
25 |
+
"麻烦",
|
26 |
+
"麻利",
|
27 |
+
"鸳鸯",
|
28 |
+
"高粱",
|
29 |
+
"骨头",
|
30 |
+
"骆驼",
|
31 |
+
"马虎",
|
32 |
+
"首饰",
|
33 |
+
"馒头",
|
34 |
+
"馄饨",
|
35 |
+
"风筝",
|
36 |
+
"难为",
|
37 |
+
"队伍",
|
38 |
+
"阔气",
|
39 |
+
"闺女",
|
40 |
+
"门道",
|
41 |
+
"锄头",
|
42 |
+
"铺盖",
|
43 |
+
"铃铛",
|
44 |
+
"铁匠",
|
45 |
+
"钥匙",
|
46 |
+
"里脊",
|
47 |
+
"里头",
|
48 |
+
"部分",
|
49 |
+
"那么",
|
50 |
+
"道士",
|
51 |
+
"造化",
|
52 |
+
"迷糊",
|
53 |
+
"连累",
|
54 |
+
"这么",
|
55 |
+
"这个",
|
56 |
+
"运气",
|
57 |
+
"过去",
|
58 |
+
"软和",
|
59 |
+
"转悠",
|
60 |
+
"踏实",
|
61 |
+
"跳蚤",
|
62 |
+
"跟头",
|
63 |
+
"趔趄",
|
64 |
+
"财主",
|
65 |
+
"豆腐",
|
66 |
+
"讲究",
|
67 |
+
"记性",
|
68 |
+
"记号",
|
69 |
+
"认识",
|
70 |
+
"规矩",
|
71 |
+
"见识",
|
72 |
+
"裁缝",
|
73 |
+
"补丁",
|
74 |
+
"衣裳",
|
75 |
+
"衣服",
|
76 |
+
"衙门",
|
77 |
+
"街坊",
|
78 |
+
"行李",
|
79 |
+
"行当",
|
80 |
+
"蛤蟆",
|
81 |
+
"蘑菇",
|
82 |
+
"薄荷",
|
83 |
+
"葫芦",
|
84 |
+
"葡萄",
|
85 |
+
"萝卜",
|
86 |
+
"荸荠",
|
87 |
+
"苗条",
|
88 |
+
"苗头",
|
89 |
+
"苍蝇",
|
90 |
+
"芝麻",
|
91 |
+
"舒服",
|
92 |
+
"舒坦",
|
93 |
+
"舌头",
|
94 |
+
"自在",
|
95 |
+
"膏药",
|
96 |
+
"脾气",
|
97 |
+
"脑袋",
|
98 |
+
"脊梁",
|
99 |
+
"能耐",
|
100 |
+
"胳膊",
|
101 |
+
"胭脂",
|
102 |
+
"胡萝",
|
103 |
+
"胡琴",
|
104 |
+
"胡同",
|
105 |
+
"聪明",
|
106 |
+
"耽误",
|
107 |
+
"耽搁",
|
108 |
+
"耷拉",
|
109 |
+
"耳朵",
|
110 |
+
"老爷",
|
111 |
+
"老实",
|
112 |
+
"老婆",
|
113 |
+
"老头",
|
114 |
+
"老太",
|
115 |
+
"翻腾",
|
116 |
+
"罗嗦",
|
117 |
+
"罐头",
|
118 |
+
"编辑",
|
119 |
+
"结实",
|
120 |
+
"红火",
|
121 |
+
"累赘",
|
122 |
+
"糨糊",
|
123 |
+
"糊涂",
|
124 |
+
"精神",
|
125 |
+
"粮食",
|
126 |
+
"簸箕",
|
127 |
+
"篱笆",
|
128 |
+
"算计",
|
129 |
+
"算盘",
|
130 |
+
"答应",
|
131 |
+
"笤帚",
|
132 |
+
"笑语",
|
133 |
+
"笑话",
|
134 |
+
"窟窿",
|
135 |
+
"窝囊",
|
136 |
+
"窗户",
|
137 |
+
"稳当",
|
138 |
+
"稀罕",
|
139 |
+
"称呼",
|
140 |
+
"秧歌",
|
141 |
+
"秀气",
|
142 |
+
"秀才",
|
143 |
+
"福气",
|
144 |
+
"祖宗",
|
145 |
+
"砚台",
|
146 |
+
"码头",
|
147 |
+
"石榴",
|
148 |
+
"石头",
|
149 |
+
"石匠",
|
150 |
+
"知识",
|
151 |
+
"眼睛",
|
152 |
+
"眯缝",
|
153 |
+
"眨巴",
|
154 |
+
"眉毛",
|
155 |
+
"相声",
|
156 |
+
"盘算",
|
157 |
+
"白净",
|
158 |
+
"痢疾",
|
159 |
+
"痛快",
|
160 |
+
"疟疾",
|
161 |
+
"疙瘩",
|
162 |
+
"疏忽",
|
163 |
+
"畜生",
|
164 |
+
"生意",
|
165 |
+
"甘蔗",
|
166 |
+
"琵琶",
|
167 |
+
"琢磨",
|
168 |
+
"琉璃",
|
169 |
+
"玻璃",
|
170 |
+
"玫瑰",
|
171 |
+
"玄乎",
|
172 |
+
"狐狸",
|
173 |
+
"状元",
|
174 |
+
"特务",
|
175 |
+
"牲口",
|
176 |
+
"牙碜",
|
177 |
+
"牌楼",
|
178 |
+
"爽快",
|
179 |
+
"爱人",
|
180 |
+
"热闹",
|
181 |
+
"烧饼",
|
182 |
+
"烟筒",
|
183 |
+
"烂糊",
|
184 |
+
"点心",
|
185 |
+
"炊帚",
|
186 |
+
"灯笼",
|
187 |
+
"火候",
|
188 |
+
"漂亮",
|
189 |
+
"滑溜",
|
190 |
+
"溜达",
|
191 |
+
"温和",
|
192 |
+
"清楚",
|
193 |
+
"消息",
|
194 |
+
"浪头",
|
195 |
+
"活泼",
|
196 |
+
"比方",
|
197 |
+
"正经",
|
198 |
+
"欺负",
|
199 |
+
"模糊",
|
200 |
+
"槟榔",
|
201 |
+
"棺材",
|
202 |
+
"棒槌",
|
203 |
+
"棉花",
|
204 |
+
"核桃",
|
205 |
+
"栅栏",
|
206 |
+
"柴火",
|
207 |
+
"架势",
|
208 |
+
"枕头",
|
209 |
+
"���杷",
|
210 |
+
"机灵",
|
211 |
+
"本事",
|
212 |
+
"木头",
|
213 |
+
"木匠",
|
214 |
+
"朋友",
|
215 |
+
"月饼",
|
216 |
+
"月亮",
|
217 |
+
"暖和",
|
218 |
+
"明白",
|
219 |
+
"时候",
|
220 |
+
"新鲜",
|
221 |
+
"故事",
|
222 |
+
"收拾",
|
223 |
+
"收成",
|
224 |
+
"提防",
|
225 |
+
"挖苦",
|
226 |
+
"挑剔",
|
227 |
+
"指甲",
|
228 |
+
"指头",
|
229 |
+
"拾掇",
|
230 |
+
"拳头",
|
231 |
+
"拨弄",
|
232 |
+
"招牌",
|
233 |
+
"招呼",
|
234 |
+
"抬举",
|
235 |
+
"护士",
|
236 |
+
"折腾",
|
237 |
+
"扫帚",
|
238 |
+
"打量",
|
239 |
+
"打算",
|
240 |
+
"打点",
|
241 |
+
"打扮",
|
242 |
+
"打听",
|
243 |
+
"打发",
|
244 |
+
"扎实",
|
245 |
+
"扁担",
|
246 |
+
"戒指",
|
247 |
+
"懒得",
|
248 |
+
"意识",
|
249 |
+
"意思",
|
250 |
+
"情形",
|
251 |
+
"悟性",
|
252 |
+
"怪物",
|
253 |
+
"思量",
|
254 |
+
"怎么",
|
255 |
+
"念头",
|
256 |
+
"念叨",
|
257 |
+
"快活",
|
258 |
+
"忙活",
|
259 |
+
"志气",
|
260 |
+
"心思",
|
261 |
+
"得罪",
|
262 |
+
"张罗",
|
263 |
+
"弟兄",
|
264 |
+
"开通",
|
265 |
+
"应酬",
|
266 |
+
"庄稼",
|
267 |
+
"干事",
|
268 |
+
"帮手",
|
269 |
+
"帐篷",
|
270 |
+
"希罕",
|
271 |
+
"师父",
|
272 |
+
"师傅",
|
273 |
+
"巴结",
|
274 |
+
"巴掌",
|
275 |
+
"差事",
|
276 |
+
"工夫",
|
277 |
+
"岁数",
|
278 |
+
"屁股",
|
279 |
+
"尾巴",
|
280 |
+
"少爷",
|
281 |
+
"小气",
|
282 |
+
"小伙",
|
283 |
+
"将就",
|
284 |
+
"对头",
|
285 |
+
"对付",
|
286 |
+
"寡妇",
|
287 |
+
"家伙",
|
288 |
+
"客气",
|
289 |
+
"实在",
|
290 |
+
"官司",
|
291 |
+
"学问",
|
292 |
+
"学生",
|
293 |
+
"字号",
|
294 |
+
"嫁妆",
|
295 |
+
"媳妇",
|
296 |
+
"媒人",
|
297 |
+
"婆家",
|
298 |
+
"娘家",
|
299 |
+
"委屈",
|
300 |
+
"姑娘",
|
301 |
+
"姐夫",
|
302 |
+
"妯娌",
|
303 |
+
"妥当",
|
304 |
+
"妖精",
|
305 |
+
"奴才",
|
306 |
+
"女婿",
|
307 |
+
"头发",
|
308 |
+
"太阳",
|
309 |
+
"大爷",
|
310 |
+
"大方",
|
311 |
+
"大意",
|
312 |
+
"大夫",
|
313 |
+
"多少",
|
314 |
+
"多么",
|
315 |
+
"外甥",
|
316 |
+
"壮实",
|
317 |
+
"地道",
|
318 |
+
"地方",
|
319 |
+
"在乎",
|
320 |
+
"困难",
|
321 |
+
"嘴巴",
|
322 |
+
"嘱咐",
|
323 |
+
"嘟囔",
|
324 |
+
"嘀咕",
|
325 |
+
"喜欢",
|
326 |
+
"喇嘛",
|
327 |
+
"喇叭",
|
328 |
+
"商量",
|
329 |
+
"唾沫",
|
330 |
+
"哑巴",
|
331 |
+
"哈欠",
|
332 |
+
"哆嗦",
|
333 |
+
"咳嗽",
|
334 |
+
"和尚",
|
335 |
+
"告诉",
|
336 |
+
"告示",
|
337 |
+
"含糊",
|
338 |
+
"吓唬",
|
339 |
+
"后头",
|
340 |
+
"名字",
|
341 |
+
"名堂",
|
342 |
+
"合同",
|
343 |
+
"吆喝",
|
344 |
+
"叫唤",
|
345 |
+
"口袋",
|
346 |
+
"厚道",
|
347 |
+
"厉害",
|
348 |
+
"千斤",
|
349 |
+
"包袱",
|
350 |
+
"包涵",
|
351 |
+
"匀称",
|
352 |
+
"勤快",
|
353 |
+
"动静",
|
354 |
+
"动弹",
|
355 |
+
"功夫",
|
356 |
+
"力气",
|
357 |
+
"前头",
|
358 |
+
"刺猬",
|
359 |
+
"刺激",
|
360 |
+
"别扭",
|
361 |
+
"利落",
|
362 |
+
"利索",
|
363 |
+
"利害",
|
364 |
+
"分析",
|
365 |
+
"出息",
|
366 |
+
"凑合",
|
367 |
+
"凉快",
|
368 |
+
"冷战",
|
369 |
+
"冤枉",
|
370 |
+
"冒失",
|
371 |
+
"养活",
|
372 |
+
"关系",
|
373 |
+
"先生",
|
374 |
+
"兄弟",
|
375 |
+
"便宜",
|
376 |
+
"使唤",
|
377 |
+
"佩服",
|
378 |
+
"作坊",
|
379 |
+
"体面",
|
380 |
+
"位置",
|
381 |
+
"似的",
|
382 |
+
"伙计",
|
383 |
+
"休息",
|
384 |
+
"什么",
|
385 |
+
"人家",
|
386 |
+
"亲戚",
|
387 |
+
"亲家",
|
388 |
+
"交情",
|
389 |
+
"云彩",
|
390 |
+
"事情",
|
391 |
+
"买卖",
|
392 |
+
"主意",
|
393 |
+
"丫头",
|
394 |
+
"丧气",
|
395 |
+
"两口",
|
396 |
+
"东西",
|
397 |
+
"东家",
|
398 |
+
"世故",
|
399 |
+
"不由",
|
400 |
+
"不在",
|
401 |
+
"下水",
|
402 |
+
"下巴",
|
403 |
+
"上头",
|
404 |
+
"上司",
|
405 |
+
"丈夫",
|
406 |
+
"丈人",
|
407 |
+
"一辈",
|
408 |
+
"那个",
|
409 |
+
"菩萨",
|
410 |
+
"父亲",
|
411 |
+
"母亲",
|
412 |
+
"咕噜",
|
413 |
+
"邋遢",
|
414 |
+
"费用",
|
415 |
+
"冤家",
|
416 |
+
"甜头",
|
417 |
+
"介绍",
|
418 |
+
"荒唐",
|
419 |
+
"大人",
|
420 |
+
"泥鳅",
|
421 |
+
"幸福",
|
422 |
+
"熟悉",
|
423 |
+
"计划",
|
424 |
+
"扑腾",
|
425 |
+
"蜡烛",
|
426 |
+
"姥爷",
|
427 |
+
"照顾",
|
428 |
+
"喉咙",
|
429 |
+
"吉他",
|
430 |
+
"弄堂",
|
431 |
+
"蚂蚱",
|
432 |
+
"凤凰",
|
433 |
+
"拖沓",
|
434 |
+
"寒碜",
|
435 |
+
"糟蹋",
|
436 |
+
"倒腾",
|
437 |
+
"报复",
|
438 |
+
"逻辑",
|
439 |
+
"盘缠",
|
440 |
+
"喽啰",
|
441 |
+
"牢骚",
|
442 |
+
"咖喱",
|
443 |
+
"扫把",
|
444 |
+
"惦记",
|
445 |
+
}
|
446 |
+
self.must_not_neural_tone_words = {
|
447 |
+
"男子",
|
448 |
+
"女子",
|
449 |
+
"分子",
|
450 |
+
"原子",
|
451 |
+
"量子",
|
452 |
+
"莲子",
|
453 |
+
"石子",
|
454 |
+
"瓜子",
|
455 |
+
"电子",
|
456 |
+
"人人",
|
457 |
+
"虎虎",
|
458 |
+
"幺幺",
|
459 |
+
"干嘛",
|
460 |
+
"学子",
|
461 |
+
"哈哈",
|
462 |
+
"数数",
|
463 |
+
"袅袅",
|
464 |
+
"局地",
|
465 |
+
"以下",
|
466 |
+
"娃哈哈",
|
467 |
+
"花花草草",
|
468 |
+
"留得",
|
469 |
+
"耕地",
|
470 |
+
"想想",
|
471 |
+
"熙熙",
|
472 |
+
"攘攘",
|
473 |
+
"卵子",
|
474 |
+
"死死",
|
475 |
+
"冉冉",
|
476 |
+
"恳恳",
|
477 |
+
"佼佼",
|
478 |
+
"吵吵",
|
479 |
+
"打打",
|
480 |
+
"考考",
|
481 |
+
"整整",
|
482 |
+
"莘莘",
|
483 |
+
"落地",
|
484 |
+
"算子",
|
485 |
+
"家家户户",
|
486 |
+
"青青",
|
487 |
+
}
|
488 |
+
self.punc = ":,;。?!“”‘’':,;.?!"
|
489 |
+
|
490 |
+
# the meaning of jieba pos tag: https://blog.csdn.net/weixin_44174352/article/details/113731041
|
491 |
+
# e.g.
|
492 |
+
# word: "家里"
|
493 |
+
# pos: "s"
|
494 |
+
# finals: ['ia1', 'i3']
|
495 |
+
def _neural_sandhi(self, word: str, pos: str, finals: List[str]) -> List[str]:
|
496 |
+
# reduplication words for n. and v. e.g. 奶奶, 试试, 旺旺
|
497 |
+
for j, item in enumerate(word):
|
498 |
+
if (
|
499 |
+
j - 1 >= 0
|
500 |
+
and item == word[j - 1]
|
501 |
+
and pos[0] in {"n", "v", "a"}
|
502 |
+
and word not in self.must_not_neural_tone_words
|
503 |
+
):
|
504 |
+
finals[j] = finals[j][:-1] + "5"
|
505 |
+
ge_idx = word.find("个")
|
506 |
+
if len(word) >= 1 and word[-1] in "吧呢哈啊呐噻嘛吖嗨呐哦哒额滴哩哟喽啰耶喔诶":
|
507 |
+
finals[-1] = finals[-1][:-1] + "5"
|
508 |
+
elif len(word) >= 1 and word[-1] in "的地得":
|
509 |
+
finals[-1] = finals[-1][:-1] + "5"
|
510 |
+
# e.g. 走了, 看着, 去过
|
511 |
+
elif len(word) == 1 and word in "了着过" and pos in {"ul", "uz", "ug"}:
|
512 |
+
finals[-1] = finals[-1][:-1] + "5"
|
513 |
+
elif (
|
514 |
+
len(word) > 1
|
515 |
+
and word[-1] in "们子"
|
516 |
+
and pos in {"r", "n"}
|
517 |
+
and word not in self.must_not_neural_tone_words
|
518 |
+
):
|
519 |
+
finals[-1] = finals[-1][:-1] + "5"
|
520 |
+
# e.g. 桌上, 地下, 家里
|
521 |
+
elif len(word) > 1 and word[-1] in "上下里" and pos in {"s", "l", "f"}:
|
522 |
+
finals[-1] = finals[-1][:-1] + "5"
|
523 |
+
# e.g. 上来, 下去
|
524 |
+
elif len(word) > 1 and word[-1] in "来去" and word[-2] in "上下进出回过起开":
|
525 |
+
finals[-1] = finals[-1][:-1] + "5"
|
526 |
+
# 个做量词
|
527 |
+
elif (
|
528 |
+
ge_idx >= 1
|
529 |
+
and (word[ge_idx - 1].isnumeric() or word[ge_idx - 1] in "几有两半多各整每做是")
|
530 |
+
) or word == "个":
|
531 |
+
finals[ge_idx] = finals[ge_idx][:-1] + "5"
|
532 |
+
else:
|
533 |
+
if (
|
534 |
+
word in self.must_neural_tone_words
|
535 |
+
or word[-2:] in self.must_neural_tone_words
|
536 |
+
):
|
537 |
+
finals[-1] = finals[-1][:-1] + "5"
|
538 |
+
|
539 |
+
word_list = self._split_word(word)
|
540 |
+
finals_list = [finals[: len(word_list[0])], finals[len(word_list[0]) :]]
|
541 |
+
for i, word in enumerate(word_list):
|
542 |
+
# conventional neural in Chinese
|
543 |
+
if (
|
544 |
+
word in self.must_neural_tone_words
|
545 |
+
or word[-2:] in self.must_neural_tone_words
|
546 |
+
):
|
547 |
+
finals_list[i][-1] = finals_list[i][-1][:-1] + "5"
|
548 |
+
finals = sum(finals_list, [])
|
549 |
+
return finals
|
550 |
+
|
551 |
+
def _bu_sandhi(self, word: str, finals: List[str]) -> List[str]:
|
552 |
+
# e.g. 看不懂
|
553 |
+
if len(word) == 3 and word[1] == "不":
|
554 |
+
finals[1] = finals[1][:-1] + "5"
|
555 |
+
else:
|
556 |
+
for i, char in enumerate(word):
|
557 |
+
# "不" before tone4 should be bu2, e.g. 不怕
|
558 |
+
if char == "不" and i + 1 < len(word) and finals[i + 1][-1] == "4":
|
559 |
+
finals[i] = finals[i][:-1] + "2"
|
560 |
+
return finals
|
561 |
+
|
562 |
+
def _yi_sandhi(self, word: str, finals: List[str]) -> List[str]:
|
563 |
+
# "一" in number sequences, e.g. 一零零, 二一零
|
564 |
+
if word.find("一") != -1 and all(
|
565 |
+
[item.isnumeric() for item in word if item != "一"]
|
566 |
+
):
|
567 |
+
return finals
|
568 |
+
# "一" between reduplication words shold be yi5, e.g. 看一看
|
569 |
+
elif len(word) == 3 and word[1] == "一" and word[0] == word[-1]:
|
570 |
+
finals[1] = finals[1][:-1] + "5"
|
571 |
+
# when "一" is ordinal word, it should be yi1
|
572 |
+
elif word.startswith("第一"):
|
573 |
+
finals[1] = finals[1][:-1] + "1"
|
574 |
+
else:
|
575 |
+
for i, char in enumerate(word):
|
576 |
+
if char == "一" and i + 1 < len(word):
|
577 |
+
# "一" before tone4 should be yi2, e.g. 一段
|
578 |
+
if finals[i + 1][-1] == "4":
|
579 |
+
finals[i] = finals[i][:-1] + "2"
|
580 |
+
# "一" before non-tone4 should be yi4, e.g. 一天
|
581 |
+
else:
|
582 |
+
# "一" 后面如果是标点,还读一声
|
583 |
+
if word[i + 1] not in self.punc:
|
584 |
+
finals[i] = finals[i][:-1] + "4"
|
585 |
+
return finals
|
586 |
+
|
587 |
+
def _split_word(self, word: str) -> List[str]:
|
588 |
+
word_list = jieba.cut_for_search(word)
|
589 |
+
word_list = sorted(word_list, key=lambda i: len(i), reverse=False)
|
590 |
+
first_subword = word_list[0]
|
591 |
+
first_begin_idx = word.find(first_subword)
|
592 |
+
if first_begin_idx == 0:
|
593 |
+
second_subword = word[len(first_subword) :]
|
594 |
+
new_word_list = [first_subword, second_subword]
|
595 |
+
else:
|
596 |
+
second_subword = word[: -len(first_subword)]
|
597 |
+
new_word_list = [second_subword, first_subword]
|
598 |
+
return new_word_list
|
599 |
+
|
600 |
+
def _three_sandhi(self, word: str, finals: List[str]) -> List[str]:
|
601 |
+
if len(word) == 2 and self._all_tone_three(finals):
|
602 |
+
finals[0] = finals[0][:-1] + "2"
|
603 |
+
elif len(word) == 3:
|
604 |
+
word_list = self._split_word(word)
|
605 |
+
if self._all_tone_three(finals):
|
606 |
+
# disyllabic + monosyllabic, e.g. 蒙古/包
|
607 |
+
if len(word_list[0]) == 2:
|
608 |
+
finals[0] = finals[0][:-1] + "2"
|
609 |
+
finals[1] = finals[1][:-1] + "2"
|
610 |
+
# monosyllabic + disyllabic, e.g. 纸/老虎
|
611 |
+
elif len(word_list[0]) == 1:
|
612 |
+
finals[1] = finals[1][:-1] + "2"
|
613 |
+
else:
|
614 |
+
finals_list = [finals[: len(word_list[0])], finals[len(word_list[0]) :]]
|
615 |
+
if len(finals_list) == 2:
|
616 |
+
for i, sub in enumerate(finals_list):
|
617 |
+
# e.g. 所有/人
|
618 |
+
if self._all_tone_three(sub) and len(sub) == 2:
|
619 |
+
finals_list[i][0] = finals_list[i][0][:-1] + "2"
|
620 |
+
# e.g. 好/喜欢
|
621 |
+
elif (
|
622 |
+
i == 1
|
623 |
+
and not self._all_tone_three(sub)
|
624 |
+
and finals_list[i][0][-1] == "3"
|
625 |
+
and finals_list[0][-1][-1] == "3"
|
626 |
+
):
|
627 |
+
finals_list[0][-1] = finals_list[0][-1][:-1] + "2"
|
628 |
+
finals = sum(finals_list, [])
|
629 |
+
# split idiom into two words who's length is 2
|
630 |
+
elif len(word) == 4:
|
631 |
+
finals_list = [finals[:2], finals[2:]]
|
632 |
+
finals = []
|
633 |
+
for sub in finals_list:
|
634 |
+
if self._all_tone_three(sub):
|
635 |
+
sub[0] = sub[0][:-1] + "2"
|
636 |
+
finals += sub
|
637 |
+
|
638 |
+
return finals
|
639 |
+
|
640 |
+
def _all_tone_three(self, finals: List[str]) -> bool:
|
641 |
+
return all(x[-1] == "3" for x in finals)
|
642 |
+
|
643 |
+
# merge "不" and the word behind it
|
644 |
+
# if don't merge, "不" sometimes appears alone according to jieba, which may occur sandhi error
|
645 |
+
def _merge_bu(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
646 |
+
new_seg = []
|
647 |
+
last_word = ""
|
648 |
+
for word, pos in seg:
|
649 |
+
if last_word == "不":
|
650 |
+
word = last_word + word
|
651 |
+
if word != "不":
|
652 |
+
new_seg.append((word, pos))
|
653 |
+
last_word = word[:]
|
654 |
+
if last_word == "不":
|
655 |
+
new_seg.append((last_word, "d"))
|
656 |
+
last_word = ""
|
657 |
+
return new_seg
|
658 |
+
|
659 |
+
# function 1: merge "一" and reduplication words in it's left and right, e.g. "听","一","听" ->"听一听"
|
660 |
+
# function 2: merge single "一" and the word behind it
|
661 |
+
# if don't merge, "一" sometimes appears alone according to jieba, which may occur sandhi error
|
662 |
+
# e.g.
|
663 |
+
# input seg: [('听', 'v'), ('一', 'm'), ('听', 'v')]
|
664 |
+
# output seg: [['听一听', 'v']]
|
665 |
+
def _merge_yi(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
666 |
+
new_seg = []
|
667 |
+
# function 1
|
668 |
+
for i, (word, pos) in enumerate(seg):
|
669 |
+
if (
|
670 |
+
i - 1 >= 0
|
671 |
+
and word == "一"
|
672 |
+
and i + 1 < len(seg)
|
673 |
+
and seg[i - 1][0] == seg[i + 1][0]
|
674 |
+
and seg[i - 1][1] == "v"
|
675 |
+
and seg[i + 1][1] == "v"
|
676 |
+
):
|
677 |
+
new_seg[i - 1][0] = new_seg[i - 1][0] + "一" + new_seg[i - 1][0]
|
678 |
+
else:
|
679 |
+
if (
|
680 |
+
i - 2 >= 0
|
681 |
+
and seg[i - 1][0] == "一"
|
682 |
+
and seg[i - 2][0] == word
|
683 |
+
and pos == "v"
|
684 |
+
and seg[i - 2][1] == "v"
|
685 |
+
):
|
686 |
+
continue
|
687 |
+
else:
|
688 |
+
new_seg.append([word, pos])
|
689 |
+
seg = new_seg
|
690 |
+
new_seg = []
|
691 |
+
# function 2
|
692 |
+
for i, (word, pos) in enumerate(seg):
|
693 |
+
if new_seg and new_seg[-1][0] == "一":
|
694 |
+
new_seg[-1][0] = new_seg[-1][0] + word
|
695 |
+
else:
|
696 |
+
new_seg.append([word, pos])
|
697 |
+
return new_seg
|
698 |
+
|
699 |
+
# the first and the second words are all_tone_three
|
700 |
+
def _merge_continuous_three_tones(
|
701 |
+
self, seg: List[Tuple[str, str]]
|
702 |
+
) -> List[Tuple[str, str]]:
|
703 |
+
new_seg = []
|
704 |
+
sub_finals_list = [
|
705 |
+
lazy_pinyin(word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
|
706 |
+
for (word, pos) in seg
|
707 |
+
]
|
708 |
+
assert len(sub_finals_list) == len(seg)
|
709 |
+
merge_last = [False] * len(seg)
|
710 |
+
for i, (word, pos) in enumerate(seg):
|
711 |
+
if (
|
712 |
+
i - 1 >= 0
|
713 |
+
and self._all_tone_three(sub_finals_list[i - 1])
|
714 |
+
and self._all_tone_three(sub_finals_list[i])
|
715 |
+
and not merge_last[i - 1]
|
716 |
+
):
|
717 |
+
# if the last word is reduplication, not merge, because reduplication need to be _neural_sandhi
|
718 |
+
if (
|
719 |
+
not self._is_reduplication(seg[i - 1][0])
|
720 |
+
and len(seg[i - 1][0]) + len(seg[i][0]) <= 3
|
721 |
+
):
|
722 |
+
new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
|
723 |
+
merge_last[i] = True
|
724 |
+
else:
|
725 |
+
new_seg.append([word, pos])
|
726 |
+
else:
|
727 |
+
new_seg.append([word, pos])
|
728 |
+
|
729 |
+
return new_seg
|
730 |
+
|
731 |
+
def _is_reduplication(self, word: str) -> bool:
|
732 |
+
return len(word) == 2 and word[0] == word[1]
|
733 |
+
|
734 |
+
# the last char of first word and the first char of second word is tone_three
|
735 |
+
def _merge_continuous_three_tones_2(
|
736 |
+
self, seg: List[Tuple[str, str]]
|
737 |
+
) -> List[Tuple[str, str]]:
|
738 |
+
new_seg = []
|
739 |
+
sub_finals_list = [
|
740 |
+
lazy_pinyin(word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
|
741 |
+
for (word, pos) in seg
|
742 |
+
]
|
743 |
+
assert len(sub_finals_list) == len(seg)
|
744 |
+
merge_last = [False] * len(seg)
|
745 |
+
for i, (word, pos) in enumerate(seg):
|
746 |
+
if (
|
747 |
+
i - 1 >= 0
|
748 |
+
and sub_finals_list[i - 1][-1][-1] == "3"
|
749 |
+
and sub_finals_list[i][0][-1] == "3"
|
750 |
+
and not merge_last[i - 1]
|
751 |
+
):
|
752 |
+
# if the last word is reduplication, not merge, because reduplication need to be _neural_sandhi
|
753 |
+
if (
|
754 |
+
not self._is_reduplication(seg[i - 1][0])
|
755 |
+
and len(seg[i - 1][0]) + len(seg[i][0]) <= 3
|
756 |
+
):
|
757 |
+
new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
|
758 |
+
merge_last[i] = True
|
759 |
+
else:
|
760 |
+
new_seg.append([word, pos])
|
761 |
+
else:
|
762 |
+
new_seg.append([word, pos])
|
763 |
+
return new_seg
|
764 |
+
|
765 |
+
def _merge_er(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
766 |
+
new_seg = []
|
767 |
+
for i, (word, pos) in enumerate(seg):
|
768 |
+
if i - 1 >= 0 and word == "儿" and seg[i - 1][0] != "#":
|
769 |
+
new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
|
770 |
+
else:
|
771 |
+
new_seg.append([word, pos])
|
772 |
+
return new_seg
|
773 |
+
|
774 |
+
def _merge_reduplication(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
775 |
+
new_seg = []
|
776 |
+
for i, (word, pos) in enumerate(seg):
|
777 |
+
if new_seg and word == new_seg[-1][0]:
|
778 |
+
new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
|
779 |
+
else:
|
780 |
+
new_seg.append([word, pos])
|
781 |
+
return new_seg
|
782 |
+
|
783 |
+
def pre_merge_for_modify(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
784 |
+
seg = self._merge_bu(seg)
|
785 |
+
try:
|
786 |
+
seg = self._merge_yi(seg)
|
787 |
+
except:
|
788 |
+
print("_merge_yi failed")
|
789 |
+
seg = self._merge_reduplication(seg)
|
790 |
+
try:
|
791 |
+
seg = self._merge_continuous_three_tones(seg)
|
792 |
+
except:
|
793 |
+
print("_merge_continuous_three_tones failed")
|
794 |
+
try:
|
795 |
+
seg = self._merge_continuous_three_tones_2(seg)
|
796 |
+
except:
|
797 |
+
print("_merge_continuous_three_tones_2 failed")
|
798 |
+
|
799 |
+
seg = self._merge_er(seg)
|
800 |
+
return seg
|
801 |
+
|
802 |
+
def modified_tone(self, word: str, pos: str, finals: List[str]) -> List[str]:
|
803 |
+
finals = self._bu_sandhi(word, finals)
|
804 |
+
finals = self._yi_sandhi(word, finals)
|
805 |
+
finals = self._neural_sandhi(word, pos, finals)
|
806 |
+
finals = self._three_sandhi(word, finals)
|
807 |
+
return finals
|
GPT_SoVITS/text/zh_normalization/README.md
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Supported NSW (Non-Standard-Word) Normalization
|
2 |
+
|
3 |
+
|NSW type|raw|normalized|
|
4 |
+
|:--|:-|:-|
|
5 |
+
|serial number|电影中梁朝伟扮演的陈永仁的编号27149|电影中梁朝伟扮演的陈永仁的编号二七一四九|
|
6 |
+
|cardinal|这块黄金重达324.75克<br>我们班的最高总分为583分|这块黄金重达三百二十四点七五克<br>我们班的最高总分为五百八十三分|
|
7 |
+
|numeric range |12\~23<br>-1.5\~2|十二到二十三<br>负一点五到二|
|
8 |
+
|date|她出生于86年8月18日,她弟弟出生于1995年3月1日|她出生于八六年八月十八日, 她弟弟出生于一九九五年三月一日|
|
9 |
+
|time|等会请在12:05请通知我|等会请在十二点零五分请通知我
|
10 |
+
|temperature|今天的最低气温达到-10°C|今天的最低气温达到零下十度
|
11 |
+
|fraction|现场有7/12的观众投出了赞成票|现场有十二分之七的观众投出了赞成票|
|
12 |
+
|percentage|明天有62%的概率降雨|明天有百分之六十二的概率降雨|
|
13 |
+
|money|随便来几个价格12块5,34.5元,20.1万|随便来几个价格十二块五,三十四点五元,二十点一万|
|
14 |
+
|telephone|这是固话0421-33441122<br>这是手机+86 18544139121|这是固话零四二一三三四四一一二二<br>这是手机八六一八五四四一三九一二一|
|
15 |
+
## References
|
16 |
+
[Pull requests #658 of DeepSpeech](https://github.com/PaddlePaddle/DeepSpeech/pull/658/files)
|
GPT_SoVITS/text/zh_normalization/__init__.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
from text.zh_normalization.text_normlization import *
|
GPT_SoVITS/text/zh_normalization/char_convert.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
"""Traditional and simplified Chinese conversion, a simplified character may correspond to multiple traditional characters.
|
16 |
+
"""
|
17 |
+
simplified_charcters = '制咖片型超声盘鉴定仔点他命书歌粉巾字帐恤手指记忆棒形转弯沟光○〇㐄㐅㐆㐌㐖毒㐜㐡㐤㐰㐺㑇㑳㒳㒸㔾㗂㗎㝵㞎㞙㞞以㢲㢴㤅㥁㥯㨗㫺㬎㮎㮚㮸㲋㲱㲾㳮涧㵪㶸㷖㷭㹢㹴犬㺢狓㺵碗㽮㿝䍃䔢䖟䖸䗈䗥䗪䝓射䥯䦉䯝鲃鱼䲔䳗鹅䵹鼄䶑一对应映射丁不识下儿子做二休世丘之貉并中台原则串为甚谓干净了百事无成八变五十些人得道鸡升天代如并来去个国政策劲幽灵在欧洲游荡接样萝卜坑侧化传价元论醇共再准刀两断切分耕耘收获钱货物向看旧就绪险刻千金动劳永逸匙零夜半卡通回复返影踪反常态口咬气句话同吐快吹周味呼诺呜品红锅哄而散起唱和问三知生熟团漆黑火糟堆场空块面塌糊涂尘染壁厢夔已足多情露水大早到晚夫妻当关万莫开失古恨套所料既往孔见提师要家主审寸阴难买斗牛小撮部阵局展身层巴掌帆风顺席地带过年计于春头载四季期被蛇怕井绳度愿式份弹顷深前律径心意念差愁孤行俱全房厅交遮打技长把抓死拿眼泪鼻涕钥锁折段抿拍即合扫排掬挥拨拥上入击洞掷揽改故辙败文值名斑方面旁族日秋餐隔雅里终父旦时晌会霎间晃暴寒曝更月望垠际朝夕本正经利杯羹东西板枝独秀根筋杆进条龙服务概模次函数又性程总付步脚印趋登毛拔呵氧氮碳决雌雄波未平派谎言流清楚白准溜烟潭有获闻是处降琴鹤甲病发可拾沙目然了直以相眨穿睹瞥瞬矢的解石鸟神教秉虔诚秘种窝蜂穷窍笑置笔苟勾销抹杀煞等奖箍节吃箭仇双雕诗筹箩筐系列纸级士官统丝毫挂维网尽线微吭响股脑胎脉承腔臂力致效资源址器举功投般说讲规贸易叶障着慎满皆输号木电池衣倾钟高低视仁觉醒览遗角银币触溃九鼎蔽抄出驷马追重语破贫洗贯走路安蹴至几蹶振跃役胆汗较辈轮辞赞退六连遍递边针血锤音错门思闪真倒项栽雾类保护川先惊乍体哄鳞爪鸣滴泡邻域党专鼓作齐炒丑烯亥克内酯冬加奴卯肝炎基尺梁街裤镐客宠庭巳汝昌烷玲磊糖肇酉醛啷青县韪良香骨鲷丂七集河市弦喜嘴张舌堵区工业姊妹星架构巧彩扭歪拼凑余热曜武州爷浮屠美乡老阶树荤素碎落能魄鳃鳗珠丄丅丆万俟丈尚摸母娘量管群亚虎必我堂令申件装伏位博侠义界表女墟台戏臭皮匠胜诸葛亮赛顶倍催请运算包立叉戟离疫苗土史志演围揭瓦晒夷姑婆帝村宝烂尖杉碱屉桌山岔岛由纪峡坝库镇废从德后拗汤治旬食明昧曹朋友框栏极权幂曲归依猫民氟硼氯磷铁江侗自旅法司洋浦梅园温暖湾焦班幸用田略番叠皇炮捶硝苯酸腺苷棱草镜穗跳远索锦纲聚氰胺联店胚膲爱色堇紫罗兰芝茶饭菱云虫藏藩乱叛苏亲债凳学座恐恋柱测肌腹衩锥系貂企乌跪叩军车农题迭都甘油屯奏键短阿姨陪姐只顾茅庐槽驾魂鲜鹿页其菜单乘任供势午齿汉组织吊调泻唇坡城报坟外夸将尉建筑岸岗公床扬新剑升杭林栗校楼标款汽社浣海商馆剧院钢华港机械广媒环球融第医科证券综财乐育游涨犹岭疏瘾睑确兵领导缴肢膛船艾瑟尔苍蔡虞效衫覆访诉课谕议轨述野钩限敌鞋颌颔颚饶首龈站例修凡划垂届属崽颏厨拜挫摆放旋削棋榻槛礼沉注滑营狱画确仪聘花葬诏员跌辖周达酒锚闸陷陆雨雪飞威丌于丹久乏予理评产亢卑亦乎舞己悲矩圆词害志但住佞佳便俗信票案幅翁倦伦假偏倚斜亏鬼敲停备伤脾胃仅此像俭匮免宜穴焉戴兼容许冻伯仲负彼昼皂轩轾实刊划颠卫战哥比省非好黄饰别拘束掩奶睬选择摇扰烦苦枚写协厌及格受欢迎约只估侵犯割状告或缺抗拒挽撤救药喻磨灭端倪少逆逾越避靠适吉誉吝玉含延咎歹听啻渊善谋均匀堪忍够太惹妙妥妨孕症孝术室完纳推冠积宣疑辩栗碴称屈挠屑干涉衡待很忙恶忿怎么怠急耻恭息悦惑惜惟想愉愧怍慌愤启懂懈怀材才紧招认扣抵拉舍也罢插揣冒搭撞南墙扩核支攻敢雷攀敬里吗需景智暇曾罪遇朽枉止况竞争辱求愈渝溶济左右袒困补爽特寂寞示弱找谢畏强疾徐痛痒冤符眠睦瞅董何厚云措活疲羞者轻玻璃祥兆禁���稂莠稳佛换答简结果盟绝缕途给谈否羁翼耐肖胫毋宁兴舒若菲莱痕迹窠臼虚衰脸兔撒鹰棺范该详讳抬泰让须眉象众赀账费灰赖奇虑训辍辨菽麦辛近送透逞徒速续逮捕遂遑违逊斧钺艰醉锈随观弃显饱脂肪使丏丐帮丒且慢末丕替桃宗王尊凉爵各图屋脊粮署录坛吾禄职胄袭君厦丗北壑桐疹损逢陵鹬丙寅戌氨腈唑纶辰酮脱氢酶醚丞丢现掉纱帽弄扯炮碗丠両丣坐存激肩臻蒂莲悖序驱丨丩丫挺杈髻鬟细介俄伊犁京尼布订普渡央委监察检查剂圈设警队斯督剩震境航舶革防托播促质版蝾螈锋研艺历残消频谱精密制造陲邮候埔坚压坜凹汇执府究邦俘摄寮彬狼岳肺肿庸英讯诊埋粒胞括控码韩暑枪枢砥澳哇牟寿甸钻探篇签缀缝继耳肯照妇埃悬璧轴柜台辣搁浅邪跑纤阮阳私囊魔丮丰姿采丱烧丳丵丶丷丸参寨朗桂瑞砂衷霞貌凤仆舰因嫌宰峰干络牌持旨祭祷簿编罚宾办丼丿乀乂乃乄仰慕盛旷留考验阔乆乇么丑麽乊湖燃乑乒乓乕乖僻忤戾离谬迕乗危肥劫除隙浪婿乙炔肠酰吡咯盐乚乛乜嘢卿玄宫尾狐龟塔嶷兄弟泉章霄钉耙乞扎哀怜恕讨乢乣乤乥乧乨乩童乪乫乭乳晕汁液瑶浆牙癌突窦罩腐胶猪酪蛋糕菌瘤乴乵乶乷乸乹乺乼乾俸冰嘉哕嚎坤妈尸垒旱枯涸俐渴潮涩煸豆燥爹瘦瘪癣瞪袋脆姜贝隆馏乿亀亁叫咕攘扔搞男砸窜蓬麻亃亄亅却亇迟典今临繁累卵奉婚聪躬巨与迁添裂副宿岁怪恶尕仑愣杆硅硫钛铀锰芑杂异钠砷胂磺琥珀舱棍簧胡茬盗浩盆贩郎腿亍洪亐互欠助勉惠操斥诿系户译亓墓碑刑铃卅渠缤纷斗米旗宪钒灯徽瘟祖拳福谷丰脏腑绑肉腌苓蕴桥铺霸颜闹判喷冈底蛙陉矿亖亘亜罕们娜桑那努哈喀弗烈曼松森杜氏杯奥琛敦戊穆圣裔汇薛孙亟亡佚虏羊牢奋释卷卸契媾感额睫缠谊趾塞挤纽阻还配驰庄亨洛祚亪享津沪畿郊慈菴枇杷膏亭阁锃丽亳亶亹诛初责翻疯偶杰丛稠妖拖寰居吸授慧蜗吞壮魅狗矛盾益渣患忧稀描猿梦暂涯畜祸缘沸搜引擎臣横纭谁混援蒸兽狮税剖亻亼亽亡什献刹邡么仂仃仄仆富怨仈仉毕昔晨壳绍仍仏仒仕宦仗欺恃腰叹叹炬梓讫施仙后琼逝仚仝仞仟悔仡佬偿填泊拓扑簇羔购顿钦佩发棻阃驭养亿儆尤借帧赈凌叙帖李柔刚沃眦睚戒讹取飨读仨仫仮著泳卧躺韶夏裁仳仵唯贤凭钓诞仿似宋佛讽伀硕盼鹅伄儅伈伉俪柯始娃迈戈坦堡帕茨萨庙玛莉莎藤霍姆伋伍奢胥廷芳豪伎俩侍汛勒希羲雏伐憩整谟闲闲伕伙伴颐伜伝伢叔恒兹恩翰伱伲侣伶俜悧鼬伸懒缩喇叭伹伺伻伽倻辐伾似佃伫布乔妮墨佉卢佌贷劣廉昂档浓矮伞洼缓耗胸谷迷挡率龋宅沫舍疗佐贰佑占优据铧尝呢须鲁晓佗佘余坪寺瓜铳僧蒙芒陀龛哼呕坊奸孽弊揖祟茧缚誓贼佝偻瞀佟你夺赶佡佢佣佤佧贾佪佫佯佰佱洁绩酿肴佴卷佶佷佸佹佺佻佼佽佾具唤窘坏娱怒慨硬习惯聋膨胀蔓骇贵痹侀侁侂侃侄侅鸿燕侇侈糜靡侉侌妾侏儒仓鼠侐侑侔仑侘侚链侜偎傍钴循柳葫芦附価侮骂蔑侯岩截蚀局贴壶嬛宴捷携桶笺酌俣狭膝狄俅俉俊俏俎俑俓俔谚俚俛黎健呈固墒增守康箱湿祐镖镳杠盒靖膜龄俞豹猎噪孚封札筒托衍鸽剪撰稿炼厂禊练缮葺俯瞰撑冲效俳俴俵俶俷俺备俾伥倂倅储卒惶敷猝逃颉蓄崇隐倌倏忽刺蜡烛噍嚼坍扁抽毙葱楣灌灶粪背薮卖赔闭霉腾倓倔幸倘倜傥倝借箸挹浇阅倡狂倢倣値倥偬倨傲倩匡嗣冲柝珍倬倭寇猩倮倶倷倹勤赞偁偃充伪吏嗓寐惺扮拱芫茜藉虢钞偈伟晶偌宕距析滤殿疼瘫注颇偓偕鸭歇滞偝偟偢忘怡旺偨偩逼偫偭偯偰偱偲侦缉蹄偷减惰漏窥窃偸偺迹傀儡傅傈僳骂篱傎奎琳迪叟芭傒傔傕伧悉荒傜傞傢傣芽逼佣婢傮睨寄檄诵谣颂伛担辜弓惨蒿悼疤傺傻屄臆巢泄箧羡盖轧颓傿㑩僄僇佥僊働僎侨僔僖僚僝伪僣僤侥僦猴偾僩僬僭僮僯僰雇僵殖签静僾僿征陇儁侬儃儇侩朴薄儊儋儌儍傧儓俦侪拟尽儜儞儤儦儩汰哉寡渥裕酷儭儱罐儳儵儹傩俨儽兀臬臲鹫允勋勋宙宵帅憝彝谐嫂阋畅沛溢盈饥赫凶悍狠猛顽愚妣斩秦遣鞭耀敏荣槃泽爆碟磁秃缆辉霁卤朵娄孜烽酱勃汀箕裘钳耶蒙蕾彻兑软遭黜兎児韵媳爸兕觥兖兙兛兜售鍪肚兝兞兟兡兢兣樽殓涅睡禀籍赘泌啡肽奸幕涵涝熵疚眷稃衬讧赴焕椒歼植跏没试误猜栖窗肋袖颊兪卦撇胡岐廓轿疸枫茴珑厕秩募勺吨寓斤历亩迫筷厘最淫螺韬兮宽匪筛襄赢轭复兲诈刃堰戎痞蚁饷它冀铸冂冃円冇冉册嫁厉砺竭醮冏牧冑冓冔冕冖冗冘冞冢窄抑诬冥冫烘菇蛰冷凝坨橇淇淋炭饼砖碛窖醋雕雹霜冱冶炉艳嘲峻滩淡漠煖飕饮冼冽凃凄怆梗凅凇净凊凋敝蒙凔凛遵汞脢凞几凢処凰凯凵凶焰凸折刷纹预丧喽奔巡榜殡芙蓉租笼辑鞘萃凼锯镬刁蛮刂娩崩批拆摊掰蘖骤歧颗秒袂赃勿嘱忌磋琢肤刈羽刎讼戮舂桨艇刓刖霹雳刜创犊刡恙墅帜筵致劫劫刨昏默攸尿欲熏润薰圭删刮痧铲刱刲刳刴刵踏磅戳柏槐绣芹苋猬舟铭鹄鹜劫剁剃辫刭锉履铅克剌姻咽哨廊掠桅沿召瞻翅赵卜渺茫郭剒剔剕沥剚愎毅讷才剜剥啄采剞剟剡剣剤䌽剐肾驶黏剰袍剀紊铲剸剺剽剿劁劂札劈啪柴扳啦刘奭姥夼昫涓熙禅禹锡翔雁鹗刽刿弩柄蜻蛉劒劓劖劘劙澜篑赏矶釜晋甜薪逐劦熔纣虐赤囚劬劭労劵效劻劼劾峭艮勅勇励勍勐腊脖庞漫饲荡粥辄勖勗勘骄馁碌泮雇捐竹骑殊阱绩朴恳谨剿勧勩勯勰劢勋勷劝惩慰诫谏勹芡践阑匁庇拯粟扎袱裹饺匆遽匈匉匊匋匍匐茎匏匕妆痰脓蛹斋苑烤蹈塘羌熊阀螳螂疆碚竿纬荷茵邙魏匚匜匝匟扶稷匣匦拢匸匹耦匽匾匿卂叮疮禧轸堤棚迢钧炼卄卆遐卉瓷盲瓶当胱腱裸卋卌卍卐怯污贱鄙龌龊陋卓溪唐梯渔陈枣泥漳浔涧梨芬谯赡辕迦郑単驴弈洽鳌卛占筮卝卞卟吩啉屎翠厄卣卨卪卬卮榫袄玺绶钮蚤惧殆笃耸卲帘帙绕恤卼卽厂厎厓厔厖厗奚厘厍厜厝谅厕厤厥厪腻孢厮厰厳厣厹厺粕垢芜菁厼厾叁悟茸薯叄吵笄悌哺讥坫垄弧芯杠潜婴刍袁诘贪谍煽馈驳収岳缔灾贿骗叚叡吻拦蘑蜜诀燧玩砚筝椎蔺铜逗骊另觅叨唠谒杵姓喊嚷嚣咚咛塑寻恼憎擦只泣渗蝠叱吒咄咤喝籀黛舵舷叵叶铎懿昭穰苴辽叻叼吁堑嫖赌瞧爬众抒吅吆夥卺橡涤抱纵摩郡唁坠扇篮膀袜颈吋忾谘酬哭妓媛暗表缰迩妃羿絮蕃浑拐葵暮隅吔吖啶嗪戚吜啬噬咽吟哦咏吠吧唧嗒咐吪隽咀征燐苞茹钙哧吮吰吱嘎吲哚吴栋娇窟孟箫忠晗淞阖闾趼宇呐睛嘘拂捧疵熄竽笛糠吼吽呀吕韦蒙呃呆笨呇贡呉罄呋喃呎呏呔呠呡痴呣呤呦呧瑛眩扒晬淑姬瑜璇鹃呪呫哔嚅嗫呬呯呰呱呲咧噌钝呴呶呷呸呺呻哱咻啸噜吁坎坷逻呿咁咂咆哮咇咈咋蟹煦珅蔼咍咑咒诅咔哒嚓咾哝哩喱咗咠咡咢咣咥咦咨嗟询咩咪咫啮啮咭咮咱咲咳呛嗽咴啕咸咹咺呙喉咿婉恸悯赋矜绿茗蓝哂抢瞒哆嗦啰噻啾滨彗哋哌哎唷哟哏哐哞哢哤哪里哫啼喘哰哲萎蚌哳咩哽哿呗唅唆唈唉唎唏哗尧棣殇璜睿肃唔睇唕吣唞唣喳唪唬唰喏唲唳唵嘛唶唸唹唻唼唾唿啁啃鹦鹉啅埠栈榷祺铺鞅飙啊啍啎啐啓啕啖啗啜哑祈啢衔啤啥啫啱啲啵啺饥啽噶昆沁喁喂喆裙喈咙喋喌喎喑喒喓喔粗喙幛庆滋鹊喟喣喤喥喦喧骚喨喩梆吃葡萄喭驼挑吓碰枞瓣纯疱藻趟铬喵営喹喺喼喿嗀嗃嗄嗅嗈嗉嗊嗍嗐嗑嗔诟嗕嗖嗙嗛嗜痂癖嗝嗡嗤嗥嗨唢嗬嗯嗰嗲嗵叽嗷嗹嗾嗿嘀嘁嘂嘅惋嘈峪禾荫啀嘌嘏嘐嘒啯啧嘚唛嘞嘟囔嘣嘥嘦嘧嘬嘭这谑严敞馋松哓嘶嗥呒虾嘹嘻啴嘿噀噂噅噇噉噎噏噔噗噘噙噚咝噞噢噤蝉皿噩噫噭嗳噱哙噳嚏涌洒欲巫霏噷噼嚃嚄嚆抖哜尝嚔苏嚚嚜嚞嚟呖嚬嚭嚮嚯亸喾饬按竣苛嚵嘤啭冁呓膪谦囍囒囓囗囘萧酚飘溅谛囝溯眸纥銮鹘囟殉囡団囤囥囧囨囱囫囵囬囮囯囲図囶囷囸囹圄圉拟囻囿圀圂圃圊粹蠹赦圌垦圏滚鲱凿枘圕圛圜圞坯埂壤骸炕祠窑豚绅魠鲮鳖圧握圩圪垯圬圮圯炸岬幔毯祇窨菩溉圳圴圻圾坂坆沾坋坌舛壈昆垫墩椅坒坓坩埚坭坰坱坳坴坵坻坼杨挣涎帘垃垈垌垍垓垔垕垗垚垛垝垣垞垟垤垧垮垵垺垾垿埀畔埄埆埇埈埌殃隍埏埒埕埗埜垭埤埦埧埭埯埰埲埳埴埵埶绋埸培怖桩础辅埼埽堀诃侄庑堃堄摧磐贞韧砌堈堉垩堋堌堍堎垴堙堞堠礁堧堨舆堭堮蜓摘堲堳堽堿塁塄塈煤茔棵塍垲埘塓绸塕鸦沽虱塙冢塝缪塡坞埙塥塩塬塱场螨塼塽塾塿墀墁墈墉墐夯増毁墝墠墦渍钵墫墬堕墰墺墙橱壅壆壊壌壎壒榨蒜壔壕壖圹垆壜壝垅壡壬壭壱売壴壹壻壸寝壿夂夅夆変夊夌漱邑夓腕泄甥御骼夗夘夙衮瑙妊娠醣枭珊莺鹭戗幻魇夤蹀秘擂鸫姚宛闺屿庾挞拇賛蛤裨菠氅漓捞湄蚊霆鲨箐篆篷荆肆舅荔鲆巷惭骰辟邱镕镰阪漂烩鲵鲽鳄鸨胪鹏妒峨谭枰晏玑癸祝秤竺牡籁恢罡蝼蝎赐绒御梭夬夭砣榆怙枕夶夹馅奄崛葩谲奈贺祀赠奌奂奓奕䜣詝奘奜奠奡奣陶奨奁魁奫奬奰娲孩贬隶酥宄狡猾她姹嫣妁毡荼皋膻蝇嫔妄妍嫉媚娆妗趣妚妞妤碍妬娅妯娌妲妳妵妺姁姅姉姗姒姘姙姜姝姞姣姤姧姫姮娥姱姸姺姽婀娀诱慑胁娉婷娑娓娟娣娭娯娵娶娸娼婊婐婕婞婤婥溪孺婧婪婬婹婺婼婽媁媄媊媕媞媟媠媢媬媮妫媲媵媸媺媻媪眯媿嫄嫈袅嫏嫕妪嫘嫚嫜嫠嫡嫦嫩嫪毐嫫嫬嫰妩嫺娴嫽嫿妫嬃嬅嬉耍婵痴艳嬔嬖嬗嫱袅嫒嬢嬷嬦嬬嬭幼嬲嬴婶嬹嬾嬿孀娘孅娈孏曰癫屏孑孓雀孖斟篓谜摺孛矻鸠崮轲祜鸾孥邈毓棠膑孬孭孰孱孳孵泛罔衔孻孪宀宁冗拙株薇掣抚琪瓿榴谧弥宊濂祁瑕宍宏碁宓邸谳実潢町宥宧宨宬徵崎骏掖阙臊煮禽蚕宸豫寀寁寥寃檐庶寎暄碜寔寖寘寙寛寠苫寤肘洱滥蒗陕核寪弘绰螽宝擅疙瘩晷対檐専尃尅赎绌缭畴衅尌峙醌襟痲碧屁昊槌淘恵瀑牝畑莓缸羚觑蔻脏躁尔尓锐尗尙尜尟尢��尨尪尬尭尰擒尲尶尴尸尹潽蠖蛾尻扣梢蚴鳍脬蹲屇屌蚵屐屃挪屖屘屙屛屝屡屣峦嶂岩舄屧屦屩屪屃屮戍驻钾崖嵛巅旮旯楂榄榉芋茱萸靛麓屴屹屺屼岀岊岌岍阜岑彭巩岒岝岢岚岣岧岨岫岱岵岷峁峇峋峒峓峞峠嵋峨峰峱岘峹峿崀崁崆祯崋崌崃岖昆崒崔嵬巍萤颢崚崞崟崠峥巆崤崦崧殂岽崱崳崴崶崿嵂嵇嵊泗嵌嵎嵒嵓岁嵙嵞嵡嵩嵫嵯嵴嵼嵾嵝崭崭晴嶋嶌嶒嶓嵚崂嶙嶝嶞峤嶡嶢峄嶨嶭嶮嶰嶲岙嵘巂巃巇巉岿巌巓巘巛滇芎巟巠弋回巣巤炊擘蜥蟒蛊觋巰蜀彦淖杏茂甫楞巻巽帼巿帛斐鲫蕊帑帔帗帚琉汶帟帡帣帨裙帯帰帷帹暆帏幄帮幋幌幏帻幙帮幞幠幡幢幦幨幩幪帱幭幯幰遥蹉跎馀庚鉴幵幷稚邃庀庁広庄庈庉笠庋跋庖牺庠庤庥鲸庬庱庳庴庵馨衢庹庿廃厩廆廋廌廎廏廐廑廒荫廖廛厮搏锣廞弛袤廥廧廨廪廱绵踵髓廸迫瓯邺廻廼廾廿躔弁皱弇弌弍弎弐弑吊诡憾荐弝弢弣弤弨弭弮弰弪霖繇焘斌旭溥骞弶弸弼弾彀彄别累纠强彔彖彘彟彟陌彤贻彧绘虹彪炳雕蔚鸥彰瘅彲彳彴仿彷徉徨彸彽踩敛旆徂徇徊渭畲铉裼従筌徘徙徜徕膳苏萌渐徬徭醺徯徳徴潘徻徼忀瘁胖燎怦悸颤扉犀澎湃砰恍惚绞隘忉惮挨饿忐忑忒忖応忝忞耿忡忪忭忮忱忸怩忻悠懑怏遏怔怗怚怛怞怼黍讶怫怭懦怱怲恍怵惕怸怹恁恂恇恉恌恏恒恓恔恘恚恛恝恞恟恠恣恧眄恪恫恬澹恰恿悀悁悃悄悆悊悐悒晦悚悛悜悝悤您悩悪悮悰悱凄恻德悴怅惘闷悻悾惄愫钟蒐惆惇惌惎惏惓惔惙惛耄惝疟浊恿惦德恽惴蠢惸拈愀愃愆愈愊愍愐愑愒愓愔愕恪氓蠢騃昵惬赧悫愬愮愯恺愼慁恿慅慆慇霭慉慊愠慝慥怄怂慬慱悭慴慵慷戚焚憀灼郁憃惫憋憍眺捏轼愦憔憖憙憧憬憨憪憭怃憯憷憸憹憺懃懅懆邀懊懋怿懔懐懞懠懤懥恹懫懮懰懱毖懵遁梁雍忏懽戁戄戆戉戋戕戛戝戛戠戡戢戣戤戥戦戬戭戯轰戱披菊牖戸戹戺戻卯戽锹扂楔扃扆扈扊杖牵绢铐镯赉扐搂搅烊盹瞌跟趸镲靶鼾払扗玫腮扛扞扠扡扢盔押扤扦扱罾揄绥鞍郤窾扻扼扽抃抆抈抉抌抏瞎抔缳缢擞抜拗択抨摔歉蹿牾抶抻搐泵菸拃拄拊髀抛拌脯拎拏拑擢秧沓曳挛迂拚拝拠拡拫拭拮踢拴拶拷攒拽掇芥橐簪摹疔挈瓢骥捺蹻挌挍挎挐拣挓挖掘浚挙揍聩挲挶挟挿捂捃捄捅捆捉捋胳膊揎捌捍捎躯蛛捗捘捙捜捥捩扪捭据捱捻捼捽掀掂抡臀膘掊掎掏掐笙掔掗掞棉芍掤搪阐掫掮掯揉掱掲掽掾揃揅揆搓揌诨揕揗揘揜揝揞揠揥揩揪揫橥遒麈揰揲揵揶揸背揺搆搉搊搋搌搎搔搕撼橹捣搘搠搡搢搣搤搥搦搧搨搬楦裢讪赸掏搰搲搳搴揾搷搽搾搿摀摁摂摃摎掴摒摓跤摙摛掼摞摠摦喉羯摭摮挚摰摲抠摴抟摷掺摽撂撃撅稻撊撋挦锏泼撕撙撚㧑挢撢掸撦撅撩撬撱朔揿蚍蜉挝捡擀掳闯擉缶觚擐擕擖擗擡擣擤澡腚擧擨擩擫擭摈拧撷擸撸擽擿攃摅撵攉攥攐攓撄搀撺每攩攫辔澄攮攰攲攴轶攷砭讦攽碘敁敃敇敉叙敎筏敔敕敖闰诲敜煌敧敪敳敹敺敻敿斁衽斄牒绉诌斉斎斓鹑谰驳鳢斒筲斛斝斞斠斡斢斨斫斮晾沂潟颖绛邵斲斸釳於琅斾斿旀旗旃旄涡旌旎旐旒旓旖旛旝旟旡旣浴旰獭魃旴时旻旼旽昀昃昄昇昉晰躲澈熹皎皓矾昑昕昜昝昞昡昤晖笋昦昨是昱昳昴昶昺昻晁蹇隧蔬髦晄晅晒晛晜晞晟晡晢晤晥曦晩萘莹顗晿暁暋暌暍暐暔暕煅旸暝暠暡曚暦暨暪朦胧昵暲殄冯暵暸暹暻暾曀晔昙曈曌曏曐暧曘曙曛叠昽曩骆曱甴肱曷牍禺锟曽沧耽朁朅朆杪栓夸竟粘绦朊膺朏朐朓朕朘朙瞄觐溘饔飧朠朢朣栅椆淀虱朩朮朰朱炆璋钰炽鹮朳槿朵朾朿杅杇杌陧欣钊湛漼楷瀍煜玟缨翱肇舜贽适逵杓杕杗杙荀蘅杝杞脩珓筊杰榔狍閦颦缅莞杲杳眇杴杶杸杻杼枋枌枒枓衾葄翘纾逋枙狸桠枟槁枲枳枴枵枷枸橼枹枻柁柂柃柅柈柊柎某柑橘柒柘柙柚柜柞栎柟柢柣柤柩柬柮柰柲橙柶柷柸柺査柿栃栄栒栔栘栝栟柏栩栫栭栱栲栳栴檀栵栻桀骜桁镁桄桉桋桎梏椹葚桓桔桕桜桟桫椤桭杯桯桲桴桷桹湘溟梃梊梍梐潼栀枧梜梠梡梣梧梩梱梲梳梴梵梹棁棃樱棐棑棕榈簑绷蓑枨棘棜棨棩棪棫棬棯棰棱棳棸棹椁棼碗椄苕椈椊椋椌椐椑椓椗検椤椪椰椳椴椵椷椸椽椿楀匾楅篪楋楍楎楗楘楙楛楝楟楠楢楥桢楩楪楫楬楮楯楰梅楸楹楻楽榀榃榊榎槺榕榖榘榛狉莽搒笞榠榡榤榥榦榧杩榭榰榱梿霰榼榾桤槊闩槎槑槔槖様槜槢槥椠槪槭椮槱槲槻槼槾樆樊樏樑樕樗樘樛樟樠樧樨権樲樴樵猢狲桦樻罍樾樿橁橄橆桡笥龠橕橚橛辆椭橤橧竖膈跨橾橿檩檃檇柽檍檎檑檖檗桧槚檠樯檨檫檬梼槟檴檵柠棹櫆櫌栉櫜椟櫡槠栌枥榇栊櫹棂茄櫽欀欂欃欐欑栾欙棂溴欨欬欱欵欶欷歔欸欹欻欼欿歁歃歆艎歈歊莳蝶歓歕歘歙歛歜欤歠蹦诠镶蹒跚升陟歩歮歯歰歳歴璞歺瞑歾殁夭殈殍殑殗殜殙殛殒殢殣殥殪殚僵殰殳荃殷殸殹蛟殻肴谤殴毈毉喂毎���蕈毗毘毚茛邓毧毬毳毷毹毽毾毵牦氄氆靴氉氊氇氍氐聊氕氖気氘氙氚氛氜氝氡汹焊痉氤氲氥氦铝锌氪烃氩铵痤汪浒漉痘盂碾菖蒲蕹蛭螅氵冰氹氺氽烫氾氿渚汆汊汋汍汎汏汐汔汕褟汙汚汜蓠沼秽蔑汧汨汩汭汲汳汴堤汾沄沅沆瀣沇沈葆浸沦湎溺痼疴沌沍沏沐沔沕沘浜畹砾沚沢沬沭沮沰沱灢沴沷籽沺烹濡洄泂肛泅泆涌肓泐泑泒泓泔泖泙泚泜泝泠漩馍涛粼泞藓鳅泩泫泭泯铢泱泲洇洊泾琵琶荽蓟箔洌洎洏洑潄濯洙洚洟洢洣洧洨洩痢滔洫洮洳洴洵洸洹洺洼洿淌蜚浄浉浙赣渫浠浡浤浥淼瀚浬浭翩萍浯浰蜃淀苔蛞蝓蜇螵蛸煲鲤浃浼浽溦涂涊涐涑涒涔滂莅涘涙涪涫涬涮涴涶涷涿淄淅淆淊凄黯淓淙涟淜淝淟淠淢淤渌淦淩猥藿亵淬淮淯淰淳诣涞纺淸淹炖癯绮渇済渉渋渓渕涣渟渢滓渤澥渧渨渮渰渲渶渼湅湉湋湍湑湓湔黔湜湝浈湟湢湣湩湫湮麟湱湲湴涅満沩溍溎溏溛舐漭溠溤溧驯溮溱溲溳溵溷溻溼溽溾滁滃滉滊荥滏稽滕滘汇滝滫滮羼耷卤滹浐煎漈漊漎绎漕漖漘漙沤漜漪漾漥漦漯漰溆漶漷濞潀颍潎潏潕潗潚潝潞潠潦祉疡潲潵滗潸潺潾涠澁澂澃澉澌澍澐澒澔澙渑澣澦澧澨澫澬浍澰澴澶澼熏郁濆濇濈濉濊貊濔疣濜濠濩觞浚濮盥潍濲泺瀁滢渎渖瀌浏瀒瀔濒泸瀛潇潆瀡潴泷濑瀬弥潋瀳瀵瀹瀺瀼沣滠灉灋灒漓灖灏灞灠滦灥灨滟灪蜴灮烬獴灴灸灺炁炅鱿炗炘炙炤炫疽烙钎炯炰炱炲炴炷毁炻烀烋瘴鲳烓烔焙烜烝烳饪烺焃焄耆焌焐焓焗焜焞焠焢焮焯焱焼煁煃煆煇煊熠煍熬煐炜煕暖熏硷霾煚煝煟煠茕矸煨琐炀萁煳煺煻熀熅熇熉罴荧穹炝熘熛熜稔谙烁熤熨熯熰眶蚂颎熳熸熿燀烨燂燄盏燊燋燏燔隼燖焖燠燡灿燨燮燹燻燽燿爇爊爓爚爝爟爨蟾爯爰为爻丬爿牀牁牂牄牋窗牏牓窗釉牚腩蒡虻牠虽蛎牣牤牮牯牲牳牴牷牸牼绊牿靬犂犄犆犇犉犍犎犒荦犗犛犟犠犨犩犪犮犰狳犴犵犺狁甩狃狆狎狒獾狘狙黠狨狩狫狴狷狺狻豕狈蜘猁猇猈猊猋猓猖獗猗猘狰狞犸猞猟獕猭猱猲猳猷猸猹猺玃獀獃獉獍獏獐獒毙獙獚獜獝獞獠獢獣獧鼇蹊狯猃獬豸狝獯鬻獳犷猕猡玁菟玅玆玈珉糁禛郅玍玎玓瓅玔玕玖玗玘玞玠玡玢玤玥玦珏瑰玭玳瑁玶玷玹玼珂珇珈瑚珌馐馔珔珖珙珛珞珡珣珥珧珩珪佩珶珷珺珽琀琁陨玡琇琖琚琠琤琦琨琫琬琭琮琯琰琱琲琅琴珐珲瑀瑂瑄瑉玮瑑瑔瑗瑢瑭瑱瑲瑳瑽瑾瑿璀璨璁璅璆璈琏璊璐璘璚璝璟璠璡璥瑷璩璪璫璯璲玙璸璺璿瓀璎瓖瓘瓒瓛脐瓞瓠瓤瓧瓩瓮瓰瓱瓴瓸瓻瓼甀甁甃甄甇甋甍甎甏甑甒甓甔瓮甖甗饴蔗甙诧钜粱盎锈团甡褥産甪甬甭甮宁铠甹甽甾甿畀畁畇畈畊畋畎畓畚畛畟鄂畤畦畧荻畯畳畵畷畸畽畾疃叠疋疍疎箪疐疒疕疘疝疢疥疧疳疶疿痁痄痊痌痍痏痐痒痔痗瘢痚痠痡痣痦痩痭痯痱痳痵痻痿瘀痖瘃瘈瘉瘊瘌瘏瘐痪瘕瘖瘙瘚瘛疭瘜瘝瘗瘠瘥瘨瘭瘆瘯瘰疬瘳疠瘵瘸瘺瘘瘼癃痨痫癈癎癐癔癙癜癠疖症癞蟆癪瘿痈発踔绀蔫酵皙砬砒翎翳蔹钨镴皑鹎驹暨粤褶皀皁荚皃镈皈皌皋皒朱皕皖皘皜皝皞皤皦皨皪皫皭糙绽皴皲皻皽盅盋碗盍盚盝踞盦盩秋千盬盭眦睁瞤盯盱眙裰盵盻睐眂眅眈眊県眑眕眚眛眞眢眣眭眳眴眵眹瞓眽郛睃睅睆睊睍睎困睒睖睙睟睠睢睥睪睾睯睽睾眯瞈瞋瞍逛瞏瞕瞖眍䁖瞟瞠瞢瞫瞭瞳瞵瞷瞹瞽阇瞿眬矉矍铄矔矗矙瞩矞矟矠矣矧矬矫矰矱硪碇磙罅舫阡、矼矽礓砃砅砆砉砍砑砕砝砟砠砢砦砧砩砫砮砳艏砵砹砼硇硌硍硎硏硐硒硜硖砗磲茚钡硭硻硾碃碉碏碣碓碔碞碡碪碫碬砀碯碲砜碻礴磈磉磎硙磔磕磖磛磟磠磡磤磥蹭磪磬磴磵磹磻硗礀硚礅礌礐礚礜礞礤礧礮砻礲礵礽礿祂祄祅祆禳祊祍祏祓祔祕祗祘祛祧祫祲祻祼饵脔锢禂禇禋祦禔祎隋禖禘禚禜禝禠祃禢禤禥禨禫祢禴禸秆秈秊闱飒秋秏秕笈蘵赁秠秣秪秫秬秭秷秸稊稌稍稑稗稙稛稞稬秸稲稹稼颡稿穂穄穇穈穉穋稣贮穏穜穟秾穑穣穤穧穨穭穮穵穸窿阒窀窂窅窆窈窕窊窋窌窒窗窔窞窣窬黩蹙窑窳窴窵窭窸窗竁竃竈竑竜并竦竖篦篾笆鲛竾笉笊笎笏笐靥笓笤箓笪笫笭笮笰笱笲笳笵笸笻筀筅筇筈筎筑筘筠筤筥筦笕筒筭箸筰筱筳筴宴筸箂个箊箎箑箒箘箙箛箜篌箝箠箬镞箯箴箾篁筼筜篘篙篚篛篜篝篟篠篡篢篥篧篨篭篰篲筚篴篶篹篼箦簁簃簆簉簋簌簏簜簟簠簥簦簨簬簰簸簻籊藤籒籓籔签籚篯箨籣籥籧笾簖籫籯芾麴籵籸籹籼粁秕粋粑粔粝粛粞粢粧粨粲粳稗粻粽辟粿糅糆糈糌糍糒糔萼糗蛆蹋糢糨糬粽糯糱籴粜糸糺紃蹼鲣霉纡纨绔纫闽襻紑纰纮锭鸢鹞纴紞紟扎紩紬绂绁纻紽紾绐絁絃絅経絍绗絏缡褵絓絖絘絜绚絣螯絪絫聒絰絵绝絺絻絿綀绡綅绠绨绣綌綍綎捆綖綘継続缎绻綦綪线綮綯绾罟蝽綷縩绺绫緁绲緅緆缁绯緌緎総緑绱緖缃缄缂绵缗緤褓缌纂緪緰缑缈缏缇縁縃縄萦缙缒縏缣縕缞縚缜缟缛縠縡縢縦绦縯縰骋缧縳纤缦絷缥縻衙縿繄缫繈繊繋繐缯繖繘繙繠缋繣繨缰缲繸繻缱纁纆纇缬缵纩纑纕缵纙纚纛缾罃罆坛罋罂罎罏罖罘罛罝罠罣罥罦罨罫罭锾罳罶罹罻罽罿羂羃羇芈蕉51鸵羑羖羌羜羝羢羣羟羧羭羮羰羱羵羶羸藜鲐翀翃翅翊翌翏翕翛翟翡翣翥翦跹翪翫翚翮翯翱翽翾翿板饕鸹锨耋耇耎耏专耒耜耔耞耡耤耨耩耪耧耰鬓耵聍聃聆聎聝聡聦聱聴聂聼阈聿肄肏肐肕腋肙肜肟肧胛肫肬肭肰肴肵肸肼胊胍胏胑胔胗胙胝胠铨胤胦胩胬胭胯胰胲胴胹胻胼胾脇脘脝脞脡脣脤脥脧脰脲脳腆腊腌臜腍腒腓胨腜腠脶腥腧腬腯踝蹬镣腴腶蠕诽膂腽嗉膇膋膔腘膗膙膟黐膣膦膫膰膴膵膷脍臃臄臇臈臌臐臑臓膘臖臙臛臝臞臧蓐诩臽臾臿舀舁鳑鲏舋舎舔舗馆舝舠舡舢舨舭舲舳舴舸舺艁艄艅艉艋艑艕艖艗艘艚艜艟艣舣艨艩舻艬艭荏艴艳艸艹艻艿芃芄芊萰陂藭芏芔芘芚蕙芟芣芤茉芧芨芩芪芮芰鲢芴芷芸荛豢芼芿苄苒苘苙苜蓿苠苡苣荬苤苎苪镑苶苹苺苻苾茀茁范蠡萣茆茇茈茌茍茖茞茠茢茥茦菰茭茯茳藨茷藘茼荁荄荅荇荈菅蜢鸮荍荑荘豆荵荸荠莆莒莔莕莘莙莚莛莜莝莦莨菪莩莪莭莰莿菀菆菉菎菏菐菑菓菔芲菘菝菡菢菣菥蓂菧菫毂蓥菶菷菹醢菺菻菼菾萅萆苌萋萏萐萑萜萩萱萴莴扁萻葇葍葎葑荭葖葙葠葥苇葧葭药葳葴葶葸葹葽蒄蒎莼茏薹莅蒟蒻蒢蒦蒨蒭藁蒯蒱鉾蒴蒹蒺蒽荪蓁蓆蓇蓊蓌蓍蓏蓓蓖蓧蓪蓫荜跣藕苁蓰蓱莼蓷蓺蓼蔀蔂蔃蔆蔇蔉蔊蔋蔌蔎蔕蔘蔙蒌蔟锷蒋雯茑蔯蔳麻蔵蔸蔾荨蒇蕋蕍荞蕐蕑芸莸蕖蕗蕝蕞蕠蕡蒉蕣蕤蕨蕳蓣蕸蕺蕻薀薁薃薅薆荟薉芗薏薐蔷薖薘剃谔钗薜薠薢薤薧薨薫薬薳薶薷薸薽薾薿藄藇藋荩藐藙藚藟藦藳藴苈藷藾蘀蘁蕲苹蘗蘘蘝蘤蘧蘩蘸蘼虀虆虍蟠虒虓虖虡虣虥虩虬虰蛵蛇虷鳟虺虼蚆蚈蚋蚓蚔蚖蚘蚜蚡蚣蚧蚨蚩蚪蚯蚰蜒蚱蚳蚶蚹蚺蚻蚿蛀蛁蛄蛅蝮蛌蛍蛐蟮蛑蛓蛔蛘蛚蛜蛡蛣蜊蛩蛱蜕螫蜅蚬蜈蝣蜋蜍蜎蜑蠊蜛饯蜞蜣蜨蜩蜮蜱蜷蜺蜾蜿蝀蝃蝋蝌蝍蝎蝏蝗蝘蝙蝝鲼蝡蝤蝥猿蝰虻蝲蝴蝻螃蠏蛳螉螋螒螓螗螘螙螚蟥螟螣螥螬螭䗖螾螀蟀蟅蝈蟊蟋蟑蟓蟛蟜蟟蟢虮蟨蟪蟭蛲蟳蛏蟷蟺蟿蠁蠂蠃虿蠋蛴蠓蚝蠗蠙蠚蠛蠜蠧蟏蠩蜂蠮蠰蠲蠵蠸蠼蠽衁衄衄衇衈衉衋衎衒同衖胡衞裳钩衭衲衵衹衺衿袈裟袗袚袟袢袪袮袲袴袷袺袼褙袽裀裉袅裋夹裍裎裒裛裯裱裲裴裾褀褂褉褊裈褎褐褒褓褔褕袆褚褡褢褦褧褪褫袅褯褰褱裆褛褽褾襁褒襆裥襉襋襌襏襚襛襜裣襞襡襢褴襦襫襬襭襮襕襶襼襽襾覂覃覅霸覉覊覌覗觇覚覜觍觎覧覩觊觏覰観觌觔觕觖觜觽觝觡酲觩觫觭觱觳觯觷觼觾觿言赅讣訇訏訑訒诂讬訧訬訳訹证訾詀詅诋毁詈詊讵詑诒诐詗诎察詨诜詶詸詹詻诙诖誂誃诔锄诓誋诳诶悖誙诮诰誧説読誯谇訚谄谆諆諌诤诹诼諕谂谀諝谝諟喧谥諴諵谌谖誊謆謇歌謍謏謑谡谥謡謦謪谪讴謷謼谩哗譅譆譈譊讹譒撰谮鑫譞噪譩谵譬譱譲谴譸譹谫讅讆詟䜩雠讐谗谶讙谠讟谽豁豉豇岂豊豋豌豏豔豞豖豗豜豝豣豦豨豭豱豳豵豶豷豺豻貅貆狸猊貔貘䝙貜貤餍贳餸贶贲赂賏赊赇赒賝赓赕賨赍斗賮賵賸赚赙赜赟贉赆赑贕赝赬赭赱赳迄趁趂趄趐趑趒趔趡趦趫趮趯趱趴趵趷趹趺趿跁跂跅跆踬跄跐跕跖跗跙跛跦跧跩跫跬跮跱跲跴跺跼跽踅踆踈踉踊踒踖踘踜踟躇蹰踠踡踣踤踥踦踧跷踫踮逾踱踊踶踹踺踼踽躞蹁蹂躏蹎蹐蹓蹔跸蹚蹜蹝迹蹠蹡蹢跶蹧蹩蹪蹯鞠蹽躃躄躅踌跻躐踯跞躘躙躗躝躠蹑躜躧躩躭躰躬躶軃軆辊軏轫軘軜軝腭転軥軨軭軱轱辘軷轵轺軽軿輀輂辇辂辁輈挽輗辄辎辋輠輤輬輭輮辏輴輵輶輹輼辗辒轇轏轑轒辚轕轖轗轘轙轝轞轹轳罪辣辞辵辶辺込辿迅迋迍麿迓迣迤逦迥迨迮迸迺迻迿逄逅逌逍逑逓迳逖逡逭逯逴逶逹遄遅侦遘遛遝遢遨遫遯遰遴绕遹遻邂邅邉邋邎邕邗邘邛邠邢邧邨邯郸邰邲邳邴邶邷邽邾邿郃郄郇郈郔郕郗郙郚郜郝郞郏郠郢郪郫郯郰郲郳郴郷郹郾郿鄀鄄郓鄇鄈鄋鄍鄎鄏鄐鄑邹邬鄕郧鄗鄘鄚鄜鄞鄠鄢鄣鄤鄦鄩鄫鄬鄮鄯鄱郐鄷鄹邝鄻鄾鄿酃酅酆酇郦酊酋酎酏酐酣酔酕醄酖酗酞酡酢酤酩酴酹酺醁醅醆醊醍醐醑醓醖醝酝醡醤醨醪醭醯醰酦醲醴醵醸醹醼醽醾釂酾酽釆釈鲈镏阊钆钇钌钯钋鼢鼹钐钏釪釬釭釱钍釸钕钫鈃钭鈆鈇钚鈊鈌钤钣鈒鈤钬钪鈬铌铈钶铛钹铍钸钿鉄鉆铊铇鉌铋鉏铂钷铆钵鉥钲鉨钼钽鉱鉲鉶铰铒鉼铪銍銎铣銕镂铫铦铑铷銤铱铟銧铥铕铯銭銰焊銶锑锉汞鋂锒鋆鋈鋊铤鋍铗鋐鋑鋕鋘鋙锊锓锔锇铓鋭铖锆锂铽鋳鋹鋺鉴镚钎錀锞锖锫锩錍铔锕錔锱铮锛錞锬锜錤錩錬録铼錼锝钔锴鍉镀鍏鍐铡鍚锻锽锸锲锘鍫鍭鍱鍴锶鍹锗针锺锿镅鎉鎋鎌鎍鎏鎒鎓鎗镉鎚鎞镃鎤铩锼鎭鎯镒镍鎴镓��鎹镎镟鏊镆镠镝鏖铿锵鏚镗镘镛鏠鏦錾镤鏸镪鏻鏽鏾铙鐄鐇鐏铹镦镡鐗馗镫镢镨鐡锎镄鐩镌鐬鐱镭鐶鐻鐽镱鑀鑅镔鑐鑕鑚鑛鑢鑤镥鑪镧鑯鑱鑴鑵镊镢钃镻闫闬闶闳閒闵閗閟阂関合閤哄阆閲阉閺阎阏阍阌暗闉阕阗闑闒闿闘闚阚闟闠闤闼阞阢阤阨阬阯阹阼阽陁陑陔陛陜陡陥陬骘陴険陼陾阴隃隈隒隗隞隠隣隤隩隮隰颧隳隷隹雂雈雉雊雎雑雒雗雘雚雝雟雩雰雱驿霂霅霈霊沾霒霓霙霝霢霣霤霨霩霪霫霮靁叇叆靑靓靣腼靪靮靰靳靷靸靺靼靿鞀鞃鞄鞍鞗鞙鞚鞝鞞鞡鞣鞨鞫鞬鞮鞶鞹鞾鞑韅鞯驮韍韎韔韖韘韝韫韡韣韭韭韱韹韺頀刮頄顸顼頍颀颃颁頖頞頠頫頬颅頯頲颕頼悴顋顑颙颛颜顕顚顜颟顣颥颞飐飑台飓颸飏飖颽颾颿飀飂飚飌翻飡飣饲飥饨饫飮飧飶餀餂饸饹餇餈饽哺馂餖餗餚馄馃餟餠餤餧餩餪餫糊餮糇餲饧馎糕饩馈馊馌馒饇馑馓膳饎饐饘饟馕馘馥馝馡馣骝骡馵馹駃駄駅駆駉駋驽駓驵駗骀驸駜骂骈駪駬骃駴骎駹駽駾騂騄骓騆騉騋骒骐麟騑騒験騕骛騠騢騣騤騧骧騵驺骟騺蓦骖骠骢驆驈骅驌骁驎骣驒驔驖驙驦驩驫骺鲠骫骭肮骱骴骶骷髅骾髁髂髄髆膀髇髑髌髋髙髝髞髟髡髣髧髪髫髭髯髲髳髹髺髽髾鬁鬃鬅鬈鬋鬎鬏鬐鬑鬒鬖鬗鬘鬙鬠鬣斗鬫鬬阄鬯鬰鬲鬵鬷魆魈魊魋魍魉魑魖鳔魛魟魣魦魨魬鲂魵魸鮀鲅鮆鲧鲇鲍鲋鮓鲒鲕鮟鱇鮠鮦鮨鲔鲑鮶鮸鮿鲧鯄鯆鲩鯈鲻鯕鲭鲞鯙鯠鲲鯥鲰鲶鳀鯸鳊鲗䲠鹣鳇鰋鳄鳆鰕鰛鰜鲥鰤鳏鰦鳎鳐鳁鳓鰶鲦鲡鰼鰽鱀鱄鳙鱆鳕鱎鱐鳝鳝鳜鲟鲎鱠鳣鱨鲚鱮鱲鱵鱻鲅鳦凫鳯鳲鳷鳻鴂鴃鴄鸩鴈鴎鸰鴔鴗鸳鸯鸲鹆鸱鴠鴢鸪鴥鸸鹋鴳鸻鴷鴽鵀鵁鸺鹁鵖鵙鹈鹕鹅鵟鵩鹌鵫鵵鵷鵻鹍鶂鶊鶏鶒鹙鶗鶡鶤鶦鶬鶱鹟鶵鶸鶹鹡鶿鹚鷁鷃鷄鷇䴘䴘鷊鷏鹧鷕鹥鸷鷞鷟鸶鹪鹩鷩鷫鷭鹇鹇鸴鷾䴙鸂鸇䴙鸏鸑鸒鸓鸬鹳鸜鹂鹸咸鹾麀麂麃麄麇麋麌麐麑麒麚麛麝麤麸面麫麮麯麰麺麾黁黈黉黢黒黓黕黙黝黟黥黦黧黮黰黱黪黶黹黻黼黾鼋鼂鼃鼅鼈鼍鼏鼐鼒冬鼖鼙鼚鼛鼡鼩鼱鼪鼫鼯鼷鼽齁齆齇齈齉齌赍齑龀齕齗龅齚龇齞龃龉龆齢出齧齩齮齯齰齱齵齾厐龑龒龚龖龘龝龡龢龤'
|
18 |
+
|
19 |
+
traditional_characters = '制咖片型超聲盤鑒定仔點他命書歌粉巾字帳恤手指記憶棒形轉彎溝光○〇㐄㐅㐆㐌㐖毒㐜㐡㐤㐰㐺㑇㑳㒳㒸㔾㗂㗎㝵㞎㞙㞞㠯㢲㢴㤅㥁㥯㨗㫺㬎㮎㮚㮸㲋㲱㲾㳮㵎㵪㶸㷖㷭㹢㹴犬㺢狓㺵㼝㽮㿝䍃䔢䖟䖸䗈䗥䗪䝓䠶䥯䦉䯝䰾魚䲔䳗䳘䵹鼄䶑一對應映射丁不識下兒子做二休世丘之貉並中台原則串為甚謂乾淨了百事無成八變五十些人得道雞升天代如併來去個國政策勁幽靈在歐洲遊蕩接樣蘿蔔坑側化傳價元論醇共再准刀兩斷切分耕耘收穫錢貨物向看舊就緒險刻千金動勞永逸匙零夜半卡通回復返影蹤反常態口咬氣句話同吐快吹周味呼諾嗚品紅鍋哄而散起唱和問三知生熟團漆黑火糟堆場空塊麵塌糊塗塵染壁廂夔已足多情露水大早到晚夫妻當關萬莫開失古恨套所料既往孔見提師要家主審寸陰難買鬥牛小撮部陣局展身層巴掌帆風順席地帶過年計於春頭載四季期被蛇怕井繩度願式份彈頃深前律徑心意念差愁孤行俱全房廳交遮打技長把抓死拿眼淚鼻涕鑰鎖折段抿拍即合掃排掬揮撥擁上入擊洞擲攬改故轍敗文值名斑方面旁族日秋餐隔雅里終父旦時晌會霎間晃暴寒曝更月望垠際朝夕本正經利杯羹東西板枝獨秀根筋桿進條龍服務概模次函數又性程總付步腳印趨登毛拔呵氧氮碳決雌雄波未平派謊言流清楚白準溜煙潭有獲聞是處降琴鶴甲病發可拾沙目然瞭直以相眨穿睹瞥瞬矢的解石鳥神教秉虔誠秘種窩蜂窮竅笑置筆苟勾銷抹殺煞等獎箍節吃箭仇雙鵰詩籌籮筐系列紙級士官統絲毫掛維網盡線微吭響股腦胎脈承腔臂力致效資源址器舉功投般說講規貿易葉障著慎滿皆輸號木電池衣傾鐘高低視仁覺醒覽遺角銀幣觸潰九鼎蔽抄出駟馬追重語破貧洗貫走路安蹴至幾蹶振躍役膽汗較輩輪辭贊退六連遍遞邊針血錘音錯門思閃真倒項栽霧類保護川先驚乍體鬨鱗爪鳴滴泡鄰域黨專鼓作齊炒丑烯亥克內酯冬加奴卯肝炎基尺梁街褲鎬客寵庭巳汝昌烷玲磊糖肇酉醛啷青縣韙良香骨鯛丂七集河市弦喜嘴張舌堵區工業姊妹星架構巧彩扭歪拼湊餘熱曜武州爺浮屠美鄉老階樹葷素碎落能魄鰓鰻珠丄丅丆万俟丈尚摸母娘量管群亞虎必我堂令申件裝伏位博俠義界表女墟臺戲臭皮匠勝諸葛亮賽頂倍催請運算包立叉戟離疫苗土史志演圍揭瓦曬夷姑婆帝村寶爛尖杉鹼屜桌山岔島由紀峽壩庫鎮廢從德後拗湯治旬食明昧曹朋友框欄極權冪曲歸依貓民氟硼氯磷鐵江侗自旅法司洋浦梅園溫暖灣焦班幸用田略番疊皇炮捶硝苯酸腺苷稜草鏡穗跳遠索錦綱聚氰胺聯店胚膲愛色堇紫羅蘭芝茶飯菱雲蟲藏藩亂叛蘇親債凳學座恐戀柱測肌腹衩錐係貂企烏跪叩軍車農題迭都甘油屯奏鍵短阿姨陪姐隻顧茅廬槽駕魂鮮鹿頁其菜單乘任供勢午齒漢組織吊調瀉唇坡城報墳外夸將尉建築岸崗公床揚新劍昇杭林栗校樓標款汽社浣海商館劇院鋼華港機械廣媒環球融第醫科證券綜財樂育游漲猶嶺疏癮瞼確兵領導繳肢膛船艾瑟爾蒼蔡虞傚衫覆訪訴課諭議軌述野鉤限敵鞋頜頷顎饒首齦站例修凡劃垂屆屬崽頦廚拜挫擺放旋削棋榻檻禮沉注滑營獄畫确儀聘花葬詔員跌轄週達酒錨閘陷陸雨雪飛威丌于丹久乏予理評產亢卑亦乎舞己悲矩圓詞害誌但住佞佳便俗信票案幅翁倦倫假偏倚斜虧鬼敲停備傷脾胃僅此像儉匱免宜穴焉戴兼容許凍伯仲負彼晝皂軒輊實刊划顛衛戰哥比省非好黃飾別拘束掩奶睬選擇搖擾煩苦枚寫協厭及格受歡迎約只估侵犯割狀告或缺抗拒挽撤救藥喻磨滅端倪少逆逾越避靠適吉譽吝玉含延咎歹聽啻淵善謀均勻堪忍夠太惹妙妥妨孕症孝術室完納推冠積宣疑辯慄碴稱屈撓屑干涉衡待很忙惡忿怎麼怠急恥恭息悅惑惜惟想愉愧怍慌憤啟懂懈懷材才緊招認扣抵拉捨也罷插揣冒搭撞南牆擴核支攻敢雷攀敬裡嗎需景智暇曾罪遇朽枉止況競爭辱求癒渝溶濟左右袒困補爽特寂寞示弱找謝畏強疾徐痛癢冤符眠睦瞅董何厚云措活疲羞者輕玻璃祥兆禁移稂莠穩佛換答簡結果盟絕縷途給談否羈翼耐肖脛毋寧興舒若菲萊痕跡窠臼虛衰臉兔撒鷹棺範該詳諱抬泰讓鬚眉象眾貲賬費灰賴奇慮訓輟辨菽麥辛近送透逞徒速續逮捕遂遑違遜斧鉞艱醉鏽隨觀棄顯飽脂肪使丏丐幫丒且慢末丕替桃宗王尊涼爵各圖屋脊糧署錄壇吾祿職胄襲君廈丗北壑桐疹損逢陵鷸丙寅戌氨腈唑綸辰酮脫氫酶醚丞丟現掉紗帽弄扯砲碗丠両丣坐存激肩臻蒂蓮悖序驅丨丩丫挺杈髻鬟細介俄伊犁京尼布訂普渡央委監察檢查劑圈設警隊斯督剩震境航舶革防托播促質版蠑螈鋒研藝歷殘消頻譜精密製造陲郵候埔堅壓壢凹匯執府究邦俘攝寮彬狼嶽肺腫庸英訊診埋粒胞括控碼韓暑槍樞砥澳哇牟壽甸鑽探篇簽綴縫繼耳肯照婦埃懸璧軸櫃檯辣擱淺邪跑纖阮陽私囊魔丮丰姿采丱燒丳丵丶丷丸參寨朗桂瑞砂衷霞貌鳳僕艦因嫌宰峰幹絡牌持旨祭禱簿編罰賓辦丼丿乀乂乃乄仰慕盛曠留考驗闊乆乇么醜麼乊湖燃乑乒乓乕乖僻忤戾离謬迕乗危肥劫除隙浪婿乙炔腸酰吡咯鹽乚乛乜嘢卿玄宮尾狐龜塔嶷兄弟泉章霄釘耙乞扎哀憐恕討乢乣乤乥乧乨乩童乪乫乭乳暈汁液瑤漿牙癌突竇罩腐膠豬酪蛋糕菌瘤乴乵乶乷乸乹乺乼乾俸冰嘉噦嚎坤媽屍壘旱枯涸俐渴潮澀煸豆燥爹瘦癟癬瞪袋脆薑貝隆餾乿亀亁叫咕攘扔搞男砸竄蓬麻亃亄亅卻亇遲典今臨繁累卵奉婚聰躬巨與遷添裂副宿歲怪噁尕崙愣杆硅硫鈦鈾錳芑雜異鈉砷胂磺琥珀艙棍簧胡茬盜浩盆販郎腿亍洪亐互欠助勉惠操斥諉繫戶譯亓墓碑刑鈴卅渠繽紛斗米旗憲釩燈徽瘟祖拳福穀豐臟腑綁肉醃苓蘊橋鋪霸顏鬧判噴岡底蛙陘礦亖亙亜罕們娜桑那努哈喀弗烈曼松森杜氏盃奧琛敦戊穆聖裔彙薛孫亟亡佚虜羊牢奮釋卷卸契媾感額睫纏誼趾塞擠紐阻還配馳莊亨洛祚亪享津滬畿郊慈菴枇杷膏亭閣鋥麗亳亶亹誅初責翻瘋偶傑叢稠妖拖寰居吸授慧蝸吞壯魅狗矛盾益渣患憂稀描猿夢暫涯畜禍緣沸搜引擎臣橫紜誰混援蒸獸獅稅剖亻亼亽亾什獻剎邡麽仂仃仄仆富怨仈仉畢昔晨殼紹仍仏仒仕宦仗欺恃腰嘆歎炬梓訖施仙后瓊逝仚仝仞仟悔仡佬償填泊拓撲簇羔購頓欽佩髮棻閫馭養億儆尤藉幀賑凌敘帖李柔剛沃眥睚戒訛取饗讀仨仫仮著泳臥躺韶夏裁仳仵唯賢憑釣誕仿似宋彿諷伀碩盼鵝伄儅伈伉儷柯始娃邁戈坦堡帕茨薩廟瑪莉莎藤霍姆伋伍奢胥廷芳豪伎倆侍汛勒希羲雛伐憩整謨閑閒伕伙伴頤伜伝伢叔恆茲恩翰伱伲侶伶俜悧鼬伸懶縮喇叭伹伺伻伽倻輻伾佀佃佇佈喬妮墨佉盧佌貸劣廉昂檔濃矮傘窪緩耗胸谷迷擋率齲宅沫舍療佐貳佑佔優據鏵嘗呢須魯曉佗佘余坪寺瓜銃僧蒙芒陀龕哼嘔坊姦孽弊揖祟繭縛誓賊佝僂瞀佟你奪趕佡佢佣佤佧賈佪佫佯佰佱潔績釀餚佴捲佶佷佸佹佺佻佼佽佾具喚窘壞娛怒慨硬習慣聾膨脹蔓駭貴痺侀侁侂侃侄侅鴻燕侇侈糜靡侉侌妾侏儒倉鼠侐侑侔侖侘侚鏈侜偎傍鈷循柳葫蘆附価侮罵蔑侯岩截蝕侷貼壺嬛宴捷攜桶箋酌俁狹膝狄俅俉俊俏俎俑俓俔諺俚俛黎健呈固墒增守康箱濕祐鏢鑣槓盒靖膜齡俞豹獵噪孚封札筒託衍鴿剪撰稿煉廠禊練繕葺俯瞰撐衝俲俳俴俵俶俷俺俻俾倀倂倅儲卒惶敷猝逃頡蓄崇隱倌倏忽刺蠟燭噍嚼坍扁抽斃蔥楣灌灶糞背藪賣賠閉霉騰倓倔倖倘倜儻倝借箸挹澆閱倡狂倢倣値倥傯倨��倩匡嗣沖柝珍倬倭寇猩倮倶倷倹勤讚偁偃充偽吏嗓寐惺扮拱芫茜藉虢鈔偈偉晶偌宕距析濾殿疼癱註頗偓偕鴨歇滯偝偟偢忘怡旺偨偩偪偫偭偯偰偱偲偵緝蹄偷減惰漏窺竊偸偺迹傀儡傅傈僳傌籬傎奎琳迪叟芭傒傔傕傖悉荒傜傞傢傣芽逼傭婢傮睨寄檄誦謠頌傴擔辜弓慘蒿悼疤傺傻屄臆巢洩篋羨蓋軋頹傿儸僄僇僉僊働僎僑僔僖僚僝僞僣僤僥僦猴僨僩僬僭僮僯僰僱僵殖籤靜僾僿征隴儁儂儃儇儈朴薄儊儋儌儍儐儓儔儕儗儘儜儞儤儦儩汰哉寡渥裕酷儭儱罐儳儵儹儺儼儽兀臬臲鷲允勛勳宙宵帥憝彞諧嫂鬩暢沛溢盈飢赫兇悍狠猛頑愚妣斬秦遣鞭耀敏榮槃澤爆碟磁禿纜輝霽鹵朵婁孜烽醬勃汀箕裘鉗耶懞蕾徹兌軟遭黜兎児韻媳爸兕觥兗兙兛兜售鍪肚兝兞兟兡兢兣樽殮涅睡稟籍贅泌啡肽奸幕涵澇熵疚眷稃襯訌赴煥椒殲植跏沒試誤猜棲窗肋袖頰兪卦撇鬍岐廓轎疸楓茴瓏廁秩募勺噸寓斤曆畝迫筷釐最淫螺韜兮寬匪篩襄贏軛複兲詐刃堰戎痞蟻餉它冀鑄冂冃円冇冉冊嫁厲礪竭醮冏牧冑冓冔冕冖冗冘冞冢窄抑誣冥冫烘菇蟄冷凝坨橇淇淋炭餅磚磧窖醋雕雹霜冱冶爐艷嘲峻灘淡漠煖颼飲冼冽凃凄愴梗凅凇凈凊凋敝濛凔凜遵汞脢凞几凢処凰凱凵凶焰凸摺刷紋預喪嘍奔巡榜殯芙蓉租籠輯鞘萃凼鋸鑊刁蠻刂娩崩批拆攤掰櫱驟歧顆秒袂贓勿囑忌磋琢膚刈羽刎訟戮舂槳艇刓刖霹靂刜創犢刡恙墅幟筵緻刦刧刨昏默攸尿慾薰潤薰圭刪刮痧鏟刱刲刳刴刵踏磅戳柏槐繡芹莧蝟舟銘鵠鶩刼剁剃辮剄剉履鉛剋剌姻咽哨廊掠桅沿召瞻翅趙卜渺茫郭剒剔剕瀝剚愎毅訥纔剜剝啄採剞剟剡剣剤綵剮腎駛黏剰袍剴紊剷剸剺剽剿劁劂劄劈啪柴扳啦劉奭姥夼昫涓熙禪禹錫翔雁鶚劊劌弩柄蜻蛉劒劓劖劘劙瀾簣賞磯釜晉甜薪逐劦熔紂虐赤囚劬劭労劵効劻劼劾峭艮勅勇勵勍勐臘脖龐漫飼盪粥輒勖勗勘驕餒碌泮雇捐竹騎殊阱勣樸懇謹勦勧勩勯勰勱勲勷勸懲慰誡諫勹芡踐闌匁庇拯粟紮袱裹餃匆遽匈匉匊匋匍匐莖匏匕妝痰膿蛹齋苑烤蹈塘羌熊閥螳螂疆碚竿緯荷茵邙魏匚匜匝匟扶稷匣匭攏匸匹耦匽匾匿卂叮瘡禧軫堤棚迢鈞鍊卄卆遐卉瓷盲瓶噹胱腱裸卋卌卍卐怯污賤鄙齷齪陋卓溪唐梯漁陳棗泥漳潯澗梨芬譙贍轅迦鄭単驢弈洽鰲卛占筮卝卞卟吩啉屎翠厄卣卨卪卬卮榫襖璽綬鈕蚤懼殆篤聳卲帘帙繞卹卼卽厂厎厓厔厖厗奚厘厙厜厝諒厠厤厥厪膩孢厮厰厳厴厹厺粕垢蕪菁厼厾叁悟茸薯叄吵笄悌哺譏坫壟弧芯杠潛嬰芻袁詰貪諜煽饋駁収岳締災賄騙叚叡吻攔蘑蜜訣燧玩硯箏椎藺銅逗驪另覓叨嘮謁杵姓喊嚷囂咚嚀塑尋惱憎擦祇泣滲蝠叱吒咄咤喝籀黛舵舷叵叶鐸懿昭穰苴遼叻叼吁塹嫖賭瞧爬衆抒吅吆夥巹橡滌抱縱摩郡唁墜扇籃膀襪頸吋愾諮酬哭妓媛暗錶韁邇妃羿絮蕃渾拐葵暮隅吔吖啶嗪戚吜嗇噬嚥吟哦詠吠吧唧嗒咐吪雋咀徵燐苞茹鈣哧吮吰吱嘎吲哚吳棟嬌窟孟簫忠晗淞闔閭趼宇吶睛噓拂捧疵熄竽笛糠吼吽呀呂韋矇呃呆笨呇貢呉罄呋喃呎呏呔呠呡癡呣呤呦呧瑛眩扒晬淑姬瑜璇鵑呪呫嗶嚅囁呬呯呰呱呲咧噌鈍呴呶呷呸呺呻哱咻嘯嚕籲坎坷邏呿咁咂咆哮咇咈咋蟹煦珅藹咍咑咒詛咔噠嚓咾噥哩喱咗咠咡咢咣咥咦咨嗟詢咩咪咫嚙齧咭咮咱咲咳嗆嗽咴咷咸咹咺咼喉咿婉慟憫賦矜綠茗藍哂搶瞞哆嗦囉噻啾濱彗哋哌哎唷喲哏哐哞哢哤哪裏哫啼喘哰哲萎蚌哳哶哽哿唄唅唆唈唉唎唏嘩堯棣殤璜睿肅唔睇唕唚唞唣喳唪唬唰喏唲唳唵嘛唶唸唹唻唼唾唿啁啃鸚鵡啅埠棧榷祺舖鞅飆啊啍啎啐啓啕啖啗啜啞祈啢啣啤啥啫啱啲啵啺饑啽噶崑沁喁喂喆裙喈嚨喋喌喎喑喒喓喔粗喙幛慶滋鵲喟喣喤喥喦喧騷喨喩梆喫葡萄喭駝挑嚇碰樅瓣純皰藻趟鉻喵営喹喺喼喿嗀嗃嗄嗅嗈嗉嗊嗍嗐嗑嗔詬嗕嗖嗙嗛嗜痂癖嗝嗡嗤嗥嗨嗩嗬嗯嗰嗲嗵嘰嗷嗹嗾嗿嘀嘁嘂嘅惋嘈峪禾蔭嘊嘌嘏嘐嘒嘓嘖嘚嘜嘞嘟囔嘣嘥嘦嘧嘬嘭這謔嚴敞饞鬆嘵嘶嘷嘸蝦嘹嘻嘽嘿噀噂噅噇噉噎噏噔噗噘噙噚噝噞噢噤蟬皿噩噫噭噯噱噲噳嚏涌灑欲巫霏噷噼嚃嚄嚆抖嚌嚐嚔囌嚚嚜嚞嚟嚦嚬嚭嚮嚯嚲嚳飭按竣苛嚵嚶囀囅囈膪謙囍囒囓囗囘蕭酚飄濺諦囝溯眸紇鑾鶻囟殉囡団囤囥囧囨囪囫圇囬囮囯囲図囶囷囸囹圄圉擬囻囿圀圂圃圊粹蠹赦圌墾圏滾鯡鑿枘圕圛圜圞坯埂壤骸炕祠窯豚紳魠鯪鱉圧握圩圪垯圬圮圯炸岬幔毯祇窨菩溉圳圴圻圾坂坆沾坋坌舛壈昆墊墩椅坒坓坩堝坭坰坱坳坴坵坻坼楊掙涎簾垃垈垌垍垓垔垕垗垚垛垝垣垞垟垤垧垮垵垺垾垿埀畔埄埆埇埈埌殃隍埏埒埕埗埜埡埤埦埧埭埯埰埲埳埴埵埶紼埸培怖樁礎輔埼埽堀訶姪廡堃堄摧磐貞韌砌堈堉堊堋堌堍堎堖堙堞堠礁堧堨輿堭堮蜓摘堲堳堽堿塁塄塈煤塋棵塍塏塒塓綢���鴉沽虱塙塚塝繆塡塢塤塥塩塬塱塲蟎塼塽塾塿墀墁墈墉墐夯増毀墝墠墦漬缽墫墬墮墰墺墻櫥壅壆壊壌壎壒榨蒜壔壕壖壙壚壜壝壠壡壬壭壱売壴壹壻壼寢壿夂夅夆変夊夌漱邑夓腕泄甥禦骼夗夘夙袞瑙妊娠醣梟珊鶯鷺戧幻魘夤蹀祕擂鶇姚宛閨嶼庾撻拇賛蛤裨菠氅漓撈湄蚊霆鯊箐篆篷荊肆舅荔鮃巷慚骰辟邱鎔鐮阪漂燴鯢鰈鱷鴇臚鵬妒峨譚枰晏璣癸祝秤竺牡籟恢罡螻蠍賜絨御梭夬夭砣榆怙枕夶夾餡奄崛葩譎奈賀祀贈奌奐奓奕訢詝奘奜奠奡奣陶奨奩魁奫奬奰媧孩貶隸酥宄狡猾她奼嫣妁氈荼皋膻蠅嬪妄妍嫉媚嬈妗趣妚妞妤礙妬婭妯娌妲妳妵妺姁姅姉姍姒姘姙姜姝姞姣姤姧姫姮娥姱姸姺姽婀娀誘懾脅娉婷娑娓娟娣娭娯娵娶娸娼婊婐婕婞婤婥谿孺婧婪婬婹婺婼婽媁媄媊媕媞媟媠媢媬媮媯媲媵媸媺媻媼眯媿嫄嫈嫋嫏嫕嫗嫘嫚嫜嫠嫡嫦嫩嫪毐嫫嫬嫰嫵嫺嫻嫽嫿嬀嬃嬅嬉耍嬋痴豔嬔嬖嬗嬙嬝嬡嬢嬤嬦嬬嬭幼嬲嬴嬸嬹嬾嬿孀孃孅孌孏曰癲屏孑孓雀孖斟簍謎摺孛矻鳩崮軻祜鸞孥邈毓棠臏孬孭孰孱孳孵泛罔銜孻孿宀宁宂拙株薇掣撫琪瓿榴謐彌宊濂祁瑕宍宏碁宓邸讞実潢町宥宧宨宬徵崎駿掖闕臊煮禽蠶宸豫寀寁寥寃簷庶寎暄磣寔寖寘寙寛寠苫寤肘洱濫蒗陝覈寪弘綽螽寳擅疙瘩晷対檐専尃尅贖絀繚疇釁尌峙醌襟痲碧屁昊槌淘恵瀑牝畑莓缸羚覷蔻髒躁尒尓銳尗尙尜尟尢尥尨尪尬尭尰擒尲尶尷尸尹潽蠖蛾尻釦梢蚴鰭脬蹲屇屌蚵屐屓挪屖屘屙屛屝屢屣巒嶂巖舄屧屨屩屪屭屮戍駐鉀崖嵛巔旮旯楂欖櫸芋茱萸靛麓屴屹屺屼岀岊岌岍阜岑彭鞏岒岝岢嵐岣岧岨岫岱岵岷峁峇峋峒峓峞峠嵋峩峯峱峴峹峿崀崁崆禎崋崌崍嶇崐崒崔嵬巍螢顥崚崞崟崠崢巆崤崦崧殂崬崱崳崴崶崿嵂嵇嵊泗嵌嵎嵒嵓嵗嵙嵞嵡嵩嵫嵯嵴嵼嵾嶁嶃嶄晴嶋嶌嶒嶓嶔嶗嶙嶝嶞嶠嶡嶢嶧嶨嶭嶮嶰嶲嶴嶸巂巃巇巉巋巌巓巘巛滇芎巟巠弋迴巣巤炊擘蜥蟒蠱覡巰蜀彥淖杏茂甫楞巻巽幗巿帛斐鯽蕊帑帔帗帚琉汶帟帡帣帨帬帯帰帷帹暆幃幄幇幋幌幏幘幙幚幞幠幡幢幦幨幩幪幬幭幯幰遙蹉跎餘庚鑑幵幷稚邃庀庁広庄庈庉笠庋跋庖犧庠庤庥鯨庬庱庳庴庵馨衢庹庿廃廄廆廋廌廎廏廐廑廒廕廖廛廝搏鑼廞弛袤廥廧廨廩廱綿踵髓廸廹甌鄴廻廼廾廿躔弁皺弇弌弍弎弐弒弔詭憾薦弝弢弣弤弨弭弮弰弳霖繇燾斌旭溥騫弶弸弼弾彀彄彆纍糾彊彔彖彘彟彠陌彤貽彧繪虹彪炳彫蔚鷗彰癉彲彳彴彷彷徉徨彸彽踩斂旆徂徇徊渭畬鉉裼従筌徘徙徜徠膳甦萌漸徬徭醺徯徳徴潘徻徼忀瘁胖燎怦悸顫扉犀澎湃砰恍惚絞隘忉憚挨餓忐忑忒忖応忝忞耿忡忪忭忮忱忸怩忻悠懣怏遏怔怗怚怛怞懟黍訝怫怭懦怱怲怳怵惕怸怹恁恂恇恉恌恏恒恓恔恘恚恛恝恞恟恠恣恧眄恪恫恬澹恰恿悀悁悃悄悆悊悐悒晦悚悛悜悝悤您悩悪悮悰悱悽惻悳悴悵惘悶悻悾惄愫鍾蒐惆惇惌惎惏惓惔惙惛耄惝瘧濁惥惦惪惲惴惷惸拈愀愃愆愈愊愍愐愑愒愓愔愕愙氓蠢騃昵愜赧愨愬愮愯愷愼慁慂慅慆慇靄慉慊慍慝慥慪慫慬慱慳慴慵慷慼焚憀灼鬱憃憊憋憍眺捏軾憒憔憖憙憧憬憨憪憭憮憯憷憸憹憺懃懅懆邀懊懋懌懍懐懞懠懤懥懨懫懮懰懱毖懵遁樑雍懺懽戁戄戇戉戔戕戛戝戞戠戡戢戣戤戥戦戩戭戯轟戱披菊牖戸戹戺戻戼戽鍬扂楔扃扆扈扊杖牽絹銬鐲賚扐摟攪烊盹瞌跟躉鑔靶鼾払扗玫腮扛扞扠扡扢盔押扤扦扱罾揄綏鞍郤窾扻扼扽抃抆抈抉抌抏瞎抔繯縊擻抜抝択抨摔歉躥牾抶抻搐泵菸拃拄拊髀拋拌脯拎拏拑擢秧沓曳攣迂拚拝拠拡拫拭拮踢拴拶拷攢拽掇芥橐簪摹疔挈瓢驥捺蹻挌挍挎挐揀挓挖掘浚挙揍聵挲挶挾挿捂捃捄捅捆捉捋胳膊揎捌捍捎軀蛛捗捘捙捜捥捩捫捭据捱捻捼捽掀掂掄臀膘掊掎掏掐笙掔掗掞棉芍掤搪闡掫掮掯揉掱掲掽掾揃揅揆搓揌諢揕揗揘揜揝揞揠揥揩揪揫櫫遒麈揰揲揵揶揸揹揺搆搉搊搋搌搎搔搕撼櫓搗搘搠搡搢搣搤搥搦搧搨搬楦褳訕赸搯搰搲搳搴搵搷搽搾搿摀摁摂摃摎摑摒摓跤摙摛摜摞摠摦睺羯摭摮摯摰摲摳摴摶摷摻摽撂撃撅稻撊撋撏鐧潑撕撙撚撝撟撢撣撦撧撩撬撱朔撳蚍蜉撾撿擀擄闖擉缶觚擐擕擖擗擡擣擤澡腚擧擨擩擫擭擯擰擷擸擼擽擿攃攄攆攉攥攐攓攖攙攛每攩攫轡澄攮攰攲攴軼攷砭訐攽碘敁敃敇敉敍敎筏敔敕敖閏誨敜煌敧敪敱敹敺敻敿斁衽斄牒縐謅斉斎斕鶉讕駮鱧斒筲斛斝斞斠斡斢斨斫斮晾沂潟穎絳邵斲斸釳於琅斾斿旀旂旃旄渦旌旎旐旒旓旖旛旝旟旡旣浴旰獺魃旴旹旻旼旽昀昃昄昇昉晰躲澈熹皎皓礬昑昕昜昝昞昡昤暉筍昦昨昰昱昳昴昶昺昻晁蹇隧蔬髦晄晅晒晛晜晞晟晡晢晤晥曦晩萘瑩顗晿暁暋暌暍暐暔暕煅暘暝暠暡曚暦暨暪朦朧暱暲殄馮暵暸暹暻暾曀曄曇曈曌曏曐曖曘曙曛曡曨曩駱曱甴肱曷牘禺錕曽滄耽朁朅朆杪栓誇竟粘絛朊膺朏朐朓朕朘朙瞄覲溘饔飧朠朢朣柵椆澱蝨朩朮朰朱炆璋鈺熾鹮朳槿朶朾朿杅杇杌隉欣釗湛漼楷瀍煜玟纓翱肈舜贄适逵杓杕杗杙荀蘅杝杞脩珓筊杰榔狍閦顰緬莞杲杳眇杴杶杸杻杼枋枌枒枓衾葄翹紓逋枙狸椏枟槁枲枳枴枵枷枸櫞枹枻柁柂柃柅柈柊柎某柑橘柒柘柙柚柜柞櫟柟柢柣柤柩柬柮柰柲橙柶柷柸柺査柿栃栄栒栔栘栝栟栢栩栫栭栱栲栳栴檀栵栻桀驁桁鎂桄桉桋桎梏椹葚桓桔桕桜桟桫欏桭桮桯桲桴桷桹湘溟梃梊梍梐潼梔梘梜梠梡梣梧梩梱梲梳梴梵梹棁棃櫻棐棑棕櫚簑繃蓑棖棘棜棨棩棪棫棬棯棰棱棳棸棹槨棼椀椄苕椈椊椋椌椐椑椓椗検椤椪椰椳椴椵椷椸椽椿楀楄楅篪楋楍楎楗楘楙楛楝楟楠楢楥楨楩楪楫楬楮楯楰楳楸楹楻楽榀榃榊榎槺榕榖榘榛狉莽榜笞榠榡榤榥榦榧榪榭榰榱槤霰榼榾榿槊閂槎槑槔槖様槜槢槥槧槪槭槮槱槲槻槼槾樆樊樏樑樕樗樘樛樟樠樧樨権樲樴樵猢猻樺樻罍樾樿橁橄橆橈笥龠橕橚橛輛橢橤橧豎膈跨橾橿檁檃檇檉檍檎檑檖檗檜檟檠檣檨檫檬檮檳檴檵檸櫂櫆櫌櫛櫜櫝櫡櫧櫨櫪櫬櫳櫹櫺茄櫽欀欂欃欐欑欒欙欞溴欨欬欱欵欶欷歔欸欹欻欼欿歁歃歆艎歈歊蒔蝶歓歕歘歙歛歜歟歠蹦詮鑲蹣跚陞陟歩歮歯歰歳歴璞歺瞑歾歿殀殈殍殑殗殜殙殛殞殢殣殥殪殫殭殰殳荃殷殸殹蛟殻殽謗毆毈毉餵毎毑蕈毗毘毚茛鄧毧毬毳毷毹毽毾毿氂氄氆靴氉氊氌氍氐聊氕氖気氘氙氚氛氜氝氡洶焊痙氤氳氥氦鋁鋅氪烴氬銨痤汪滸漉痘盂碾菖蒲蕹蛭螅氵氷氹氺氽燙氾氿渚汆汊汋汍汎汏汐汔汕褟汙汚汜蘺沼穢衊汧汨汩汭汲汳汴隄汾沄沅沆瀣沇沈葆浸淪湎溺痼痾沌沍沏沐沔沕沘浜畹礫沚沢沬沭沮沰沱灢沴沷籽沺烹濡洄泂肛泅泆湧肓泐泑泒泓泔泖泙泚泜泝泠漩饃濤粼濘蘚鰍泩泫泭泯銖泱泲洇洊涇琵琶荽薊箔洌洎洏洑潄濯洙洚洟洢洣洧洨洩痢滔洫洮洳洴洵洸洹洺洼洿淌蜚浄浉浙贛渫浠浡浤浥淼瀚浬浭翩萍浯浰蜃淀苔蛞蝓蜇螵蛸煲鯉浹浼浽溦涂涊涐涑涒涔滂涖涘涙涪涫涬涮涴涶涷涿淄淅淆淊淒黯淓淙漣淜淝淟淠淢淤淥淦淩猥藿褻淬淮淯淰淳詣淶紡淸淹燉癯綺渇済渉渋渓渕渙渟渢滓渤澥渧渨渮渰渲渶渼湅湉湋湍湑湓湔黔湜湝湞湟湢湣湩湫湮麟湱湲湴湼満溈溍溎溏溛舐漭溠溤溧馴溮溱溲溳溵溷溻溼溽溾滁滃滉滊滎滏稽滕滘滙滝滫滮羼耷滷滹滻煎漈漊漎繹漕漖漘漙漚漜漪漾漥漦漯漰漵漶漷濞潀潁潎潏潕潗潚潝潞潠潦祉瘍潲潵潷潸潺潾潿澁澂澃澉澌澍澐澒澔澙澠澣澦澧澨澫澬澮澰澴澶澼熏郁濆濇濈濉濊貊濔疣濜濠濩觴濬濮盥濰濲濼瀁瀅瀆瀋瀌瀏瀒瀔瀕瀘瀛瀟瀠瀡瀦瀧瀨瀬瀰瀲瀳瀵瀹瀺瀼灃灄灉灋灒灕灖灝灞灠灤灥灨灩灪蜴灮燼獴灴灸灺炁炅魷炗炘炙炤炫疽烙釺炯炰炱炲炴炷燬炻烀烋瘴鯧烓烔焙烜烝烳飪烺焃焄耆焌焐焓焗焜焞焠焢焮焯焱焼煁煃煆煇煊熠煍熬煐煒煕煗燻礆霾煚煝煟煠煢矸煨瑣煬萁煳煺煻熀熅熇熉羆熒穹熗熘熛熜稔諳爍熤熨熯熰眶螞熲熳熸熿燀燁燂燄盞燊燋燏燔隼燖燜燠燡燦燨燮燹燻燽燿爇爊爓爚爝爟爨蟾爯爰爲爻爿爿牀牁牂牄牋牎牏牓牕釉牚腩蒡虻牠雖蠣牣牤牮牯牲牳牴牷牸牼絆牿靬犂犄犆犇犉犍犎犒犖犗犛犟犠犨犩犪犮犰狳犴犵犺狁甩狃狆狎狒獾狘狙黠狨狩狫狴狷狺狻豕狽蜘猁猇猈猊猋猓猖獗猗猘猙獰獁猞猟獕猭猱猲猳猷猸猹猺玃獀獃獉獍獏獐獒獘獙獚獜獝獞獠獢獣獧鼇蹊獪獫獬豸獮獯鬻獳獷獼玀玁菟玅玆玈珉糝禛郅玍玎玓瓅玔玕玖玗玘玞玠玡玢玤玥玦玨瑰玭玳瑁玶玷玹玼珂珇珈瑚珌饈饌珔珖珙珛珞珡珣珥珧珩珪珮珶珷珺珽琀琁隕琊琇琖琚琠琤琦琨琫琬琭琮琯琰琱琲瑯琹琺琿瑀瑂瑄瑉瑋瑑瑔瑗瑢瑭瑱瑲瑳瑽瑾瑿璀璨璁璅璆璈璉璊璐璘璚璝璟璠璡璥璦璩璪璫璯璲璵璸璺璿瓀瓔瓖瓘瓚瓛臍瓞瓠瓤瓧瓩瓮瓰瓱瓴瓸瓻瓼甀甁甃甄甇甋甍甎甏甑甒甓甔甕甖甗飴蔗甙詫鉅粱盎銹糰甡褥産甪甬甭甮甯鎧甹甽甾甿畀畁畇畈畊畋畎畓畚畛畟鄂畤畦畧荻畯畳畵畷畸畽畾疃疉疋疍疎簞疐疒疕疘疝疢疥疧疳疶疿痁痄痊痌痍痏痐痒痔痗瘢痚痠痡痣痦痩痭痯痱痳痵痻痿瘀瘂瘃瘈瘉瘊瘌瘏瘐瘓瘕瘖瘙瘚瘛瘲瘜瘝瘞瘠瘥瘨瘭瘮瘯瘰癧瘳癘瘵瘸瘺瘻瘼癃癆癇癈癎癐癔癙癜癠癤癥癩蟆癪癭癰発踔紺蔫酵皙砬砒翎翳蘞鎢鑞皚鵯駒鱀粵褶皀皁莢皃鎛皈皌皐皒硃皕皖皘皜皝皞皤皦皨皪皫皭糙綻皴皸皻皽盅盋盌盍盚盝踞盦盩鞦韆盬盭眦睜瞤盯盱眙裰盵盻睞眂眅眈眊県眑眕眚眛眞眢眣眭眳眴眵眹瞓眽郛睃睅睆睊睍睎睏睒睖睙睟睠睢睥睪睪睯睽睾瞇瞈瞋瞍逛瞏瞕瞖瞘瞜瞟瞠瞢瞫瞭瞳瞵瞷瞹瞽闍瞿矓矉矍鑠矔矗矙矚矞矟矠矣矧矬矯矰矱硪碇磙��舫阡、矼矽礓砃砅砆砉砍砑砕砝砟砠砢砦砧砩砫砮砳艏砵砹砼硇硌硍硎硏硐硒硜硤硨磲茚鋇硭硻硾碃碉碏碣碓碔碞碡碪碫碬碭碯碲碸碻礡磈磉磎磑磔磕磖磛磟磠磡磤磥蹭磪磬磴磵磹磻磽礀礄礅礌礐礚礜礞礤礧礮礱礲礵礽礿祂祄祅祆禳祊祍祏祓祔祕祗祘祛祧祫祲祻祼餌臠錮禂禇禋禑禔禕隋禖禘禚禜禝禠禡禢禤禥禨禫禰禴禸稈秈秊闈颯秌秏秕笈蘵賃秠秣秪秫秬秭秷秸稊稌稍稑稗稙稛稞稬稭稲稹稼顙稾穂穄穇穈穉穋穌貯穏穜穟穠穡穣穤穧穨穭穮穵穸窿闃窀窂窅窆窈窕窊窋窌窒窓窔窞窣窬黷蹙窰窳窴窵窶窸窻竁竃竈竑竜竝竦竪篦篾笆鮫竾笉笊笎笏笐靨笓笤籙笪笫笭笮笰笱笲笳笵笸笻筀筅筇筈筎筑筘筠筤筥筦筧筩筭筯筰筱筳筴讌筸箂箇箊箎箑箒箘箙箛箜篌箝箠箬鏃箯箴箾篁篔簹篘篙篚篛篜篝篟篠篡篢篥篧篨篭篰篲篳篴篶篹篼簀簁簃簆簉簋簌簏簜簟簠簥簦簨簬簰簸簻籊籐籒籓籔籖籚籛籜籣籥籧籩籪籫籯芾麴籵籸籹籼粁粃粋粑粔糲粛粞粢粧粨粲粳粺粻粽闢粿糅糆糈糌糍糒糔萼糗蛆蹋糢糨糬糭糯糱糴糶糸糺紃蹼鰹黴紆紈絝紉閩襻紑紕紘錠鳶鷂紝紞紟紥紩紬紱紲紵紽紾紿絁絃絅経絍絎絏縭褵絓絖絘絜絢絣螯絪絫聒絰絵絶絺絻絿綀綃綅綆綈綉綌綍綎綑綖綘継続緞綣綦綪綫綮綯綰罟蝽綷縩綹綾緁緄緅緆緇緋緌緎総緑緔緖緗緘緙緜緡緤緥緦纂緪緰緱緲緶緹縁縃縄縈縉縋縏縑縕縗縚縝縞縟縠縡縢縦縧縯縰騁縲縳縴縵縶縹縻衙縿繄繅繈繊繋繐繒繖繘繙繠繢繣繨繮繰繸繻繾纁纆纇纈纉纊纑纕纘纙纚纛缾罃罆罈罋罌罎罏罖罘罛罝罠罣罥罦罨罫罭鍰罳罶罹罻罽罿羂羃羇羋蕉51鴕羑羖羗羜羝羢羣羥羧羭羮羰羱羵羶羸藜鮐翀翃翄翊翌翏翕翛翟翡翣翥翦躚翪翫翬翮翯翺翽翾翿闆饕鴰鍁耋耇耎耏耑耒耜耔耞耡耤耨耩耪耬耰鬢耵聹聃聆聎聝聡聦聱聴聶聼閾聿肄肏肐肕腋肙肜肟肧胛肫肬肭肰肴肵肸肼胊胍胏胑胔胗胙胝胠銓胤胦胩胬胭胯胰胲胴胹胻胼胾脇脘脝脞脡脣脤脥脧脰脲脳腆腊腌臢腍腒腓腖腜腠腡腥腧腬腯踝蹬鐐腴腶蠕誹膂膃膆膇膋膔膕膗膙膟黐膣膦膫膰膴膵膷膾臃臄臇臈臌臐臑臓臕臖臙臛臝臞臧蓐詡臽臾臿舀舁鰟鮍舋舎舔舗舘舝舠舡舢舨舭舲舳舴舸舺艁艄艅艉艋艑艕艖艗艘艚艜艟艣艤艨艩艫艬艭荏艴艶艸艹艻艿芃芄芊萰陂藭芏芔芘芚蕙芟芣芤茉芧芨芩芪芮芰鰱芴芷芸蕘豢芼芿苄苒苘苙苜蓿苠苡苣蕒苤苧苪鎊苶苹苺苻苾茀茁范蠡萣茆茇茈茌茍茖茞茠茢茥茦菰茭茯茳藨茷藘茼荁荄荅荇荈菅蜢鴞荍荑荘荳荵荸薺莆莒莔莕莘莙莚莛莜莝莦莨菪莩莪莭莰莿菀菆菉菎菏菐菑菓菔菕菘菝菡菢菣菥蓂菧菫轂鎣菶菷菹醢菺菻菼菾萅萆萇萋萏萐萑萜萩萱萴萵萹萻葇葍葎葑葒葖葙葠葥葦葧葭葯葳葴葶葸葹葽蒄蒎蒓蘢薹蒞蒟蒻蒢蒦蒨蒭藁蒯蒱鉾蒴蒹蒺蒽蓀蓁蓆蓇蓊蓌蓍蓏蓓蓖蓧蓪蓫蓽跣藕蓯蓰蓱蓴蓷蓺蓼蔀蔂蔃蔆蔇蔉蔊蔋蔌蔎蔕蔘蔙蔞蔟鍔蔣雯蔦蔯蔳蔴蔵蔸蔾蕁蕆蕋蕍蕎蕐蕑蕓蕕蕖蕗蕝蕞蕠蕡蕢蕣蕤蕨蕳蕷蕸蕺蕻薀薁薃薅薆薈薉薌薏薐薔薖薘薙諤釵薜薠薢薤薧薨薫薬薳薶薷薸薽薾薿藄藇藋藎藐藙藚藟藦藳藴藶藷藾蘀蘁蘄蘋蘗蘘蘝蘤蘧蘩蘸蘼虀虆虍蟠虒虓虖虡虣虥虩虯虰蛵虵虷鱒虺虼蚆蚈蚋蚓蚔蚖蚘蚜蚡蚣蚧蚨蚩蚪蚯蚰蜒蚱蚳蚶蚹蚺蚻蚿蛀蛁蛄蛅蝮蛌蛍蛐蟮蛑蛓蛔蛘蛚蛜蛡蛣蜊蛩蛺蛻螫蜅蜆蜈蝣蜋蜍蜎蜑蠊蜛餞蜞蜣蜨蜩蜮蜱蜷蜺蜾蜿蝀蝃蝋蝌蝍蝎蝏蝗蝘蝙蝝鱝蝡蝤蝥蝯蝰蝱蝲蝴蝻螃蠏螄螉螋螒螓螗螘螙螚蟥螟螣螥螬螭螮螾螿蟀蟅蟈蟊蟋蟑蟓蟛蟜蟟蟢蟣蟨蟪蟭蟯蟳蟶蟷蟺蟿蠁蠂蠃蠆蠋蠐蠓蠔蠗蠙蠚蠛蠜蠧蠨蠩蠭蠮蠰蠲蠵蠸蠼蠽衁衂衄衇衈衉衋衎衒衕衖衚衞裳鈎衭衲衵衹衺衿袈裟袗袚袟袢袪袮袲袴袷袺袼褙袽裀裉裊裋裌裍裎裒裛裯裱裲裴裾褀褂褉褊褌褎褐褒褓褔褕褘褚褡褢褦褧褪褫褭褯褰褱襠褸褽褾襁襃襆襇襉襋襌襏襚襛襜襝襞襡襢襤襦襫襬襭襮襴襶襼襽襾覂覃覅覇覉覊覌覗覘覚覜覥覦覧覩覬覯覰観覿觔觕觖觜觽觝觡酲觩觫觭觱觳觶觷觼觾觿言賅訃訇訏訑訒詁託訧訬訳訹証訾詀詅詆譭詈詊詎詑詒詖詗詘詧詨詵詶詸詹詻詼詿誂誃誄鋤誆誋誑誒誖誙誚誥誧説読誯誶誾諂諄諆諌諍諏諑諕諗諛諝諞諟諠諡諴諵諶諼謄謆謇謌謍謏謑謖謚謡謦謪謫謳謷謼謾譁譅譆譈譊譌譒譔譖鑫譞譟譩譫譬譱譲譴譸譹譾讅讆讋讌讎讐讒讖讙讜讟谽豁豉豇豈豊豋豌豏豔豞豖豗豜豝豣豦豨豭豱豳豵豶豷豺豻貅貆貍貎貔貘貙貜貤饜貰餸貺賁賂賏賒賕賙賝賡賧賨賫鬭賮賵賸賺賻賾贇贉贐贔贕贗赬赭赱赳迄趁趂趄趐趑趒趔趡趦趫趮趯趲趴趵趷趹趺趿跁跂跅跆躓蹌跐跕跖跗跙跛跦跧跩跫跬跮跱跲跴跺跼跽踅踆踈踉踊踒���踘踜踟躇躕踠踡踣踤踥踦踧蹺踫踮踰踱踴踶踹踺踼踽躞蹁蹂躪蹎蹐蹓蹔蹕蹚蹜蹝蹟蹠蹡蹢躂蹧蹩蹪蹯鞠蹽躃躄躅躊躋躐躑躒躘躙躛躝躠躡躦躧躩躭躰躳躶軃軆輥軏軔軘軜軝齶転軥軨軭軱軲轆軷軹軺軽軿輀輂輦輅輇輈輓輗輙輜輞輠輤輬輭輮輳輴輵輶輹輼輾轀轇轏轑轒轔轕轖轗轘轙轝轞轢轤辠辢辤辵辶辺込辿迅迋迍麿迓迣迤邐迥迨迮迸迺迻迿逄逅逌逍逑逓逕逖逡逭逯逴逶逹遄遅遉遘遛遝遢遨遫遯遰遴遶遹遻邂邅邉邋邎邕邗邘邛邠邢邧邨邯鄲邰邲邳邴邶邷邽邾邿郃郄郇郈郔郕郗郙郚郜郝郞郟郠郢郪郫郯郰郲郳郴郷郹郾郿鄀鄄鄆鄇鄈鄋鄍鄎鄏鄐鄑鄒鄔鄕鄖鄗鄘鄚鄜鄞鄠鄢鄣鄤鄦鄩鄫鄬鄮鄯鄱鄶鄷鄹鄺鄻鄾鄿酃酅酆酇酈酊酋酎酏酐酣酔酕醄酖酗酞酡酢酤酩酴酹酺醁醅醆醊醍醐醑醓醖醝醞醡醤醨醪醭醯醰醱醲醴醵醸醹醼醽醾釂釃釅釆釈鱸鎦閶釓釔釕鈀釙鼢鼴釤釧釪釬釭釱釷釸釹鈁鈃鈄鈆鈇鈈鈊鈌鈐鈑鈒鈤鈥鈧鈬鈮鈰鈳鐺鈸鈹鈽鈿鉄鉆鉈鉋鉌鉍鉏鉑鉕鉚鉢鉥鉦鉨鉬鉭鉱鉲鉶鉸鉺鉼鉿銍銎銑銕鏤銚銛銠銣銤銥銦銧銩銪銫銭銰銲銶銻銼銾鋂鋃鋆鋈鋊鋌鋍鋏鋐鋑鋕鋘鋙鋝鋟鋦鋨鋩鋭鋮鋯鋰鋱鋳鋹鋺鋻鏰鐱錀錁錆錇錈錍錏錒錔錙錚錛錞錟錡錤錩錬録錸錼鍀鍆鍇鍉鍍鍏鍐鍘鍚鍛鍠鍤鍥鍩鍫鍭鍱鍴鍶鍹鍺鍼鍾鎄鎇鎉鎋鎌鎍鎏鎒鎓鎗鎘鎚鎞鎡鎤鎩鎪鎭鎯鎰鎳鎴鎵鎸鎹鎿鏇鏊鏌鏐鏑鏖鏗鏘鏚鏜鏝鏞鏠鏦鏨鏷鏸鏹鏻鏽鏾鐃鐄鐇鐏鐒鐓鐔鐗馗鐙鐝鐠鐡鐦鐨鐩鐫鐬鐱鐳鐶鐻鐽鐿鑀鑅鑌鑐鑕鑚鑛鑢鑤鑥鑪鑭鑯鑱鑴鑵鑷钁钃镻閆閈閌閎閒閔閗閟閡関閤閤閧閬閲閹閺閻閼閽閿闇闉闋闐闑闒闓闘闚闞闟闠闤闥阞阢阤阨阬阯阹阼阽陁陑陔陛陜陡陥陬騭陴険陼陾隂隃隈隒隗隞隠隣隤隩隮隰顴隳隷隹雂雈雉雊雎雑雒雗雘雚雝雟雩雰雱驛霂霅霈霊霑霒霓霙霝霢霣霤霨霩霪霫霮靁靆靉靑靚靣靦靪靮靰靳靷靸靺靼靿鞀鞃鞄鞌鞗鞙鞚鞝鞞鞡鞣鞨鞫鞬鞮鞶鞹鞾韃韅韉馱韍韎韔韖韘韝韞韡韣韭韮韱韹韺頀颳頄頇頊頍頎頏頒頖頞頠頫頬顱頯頲頴頼顇顋顑顒顓顔顕顚顜顢顣顬顳颭颮颱颶颸颺颻颽颾颿飀飂飈飌飜飡飣飤飥飩飫飮飱飶餀餂餄餎餇餈餑餔餕餖餗餚餛餜餟餠餤餧餩餪餫餬餮餱餲餳餺餻餼餽餿饁饅饇饉饊饍饎饐饘饟饢馘馥馝馡馣騮騾馵馹駃駄駅駆駉駋駑駓駔駗駘駙駜駡駢駪駬駰駴駸駹駽駾騂騄騅騆騉騋騍騏驎騑騒験騕騖騠騢騣騤騧驤騵騶騸騺驀驂驃驄驆驈驊驌驍驎驏驒驔驖驙驦驩驫骺鯁骫骭骯骱骴骶骷髏骾髁髂髄髆髈髐髑髕髖髙髝髞髟髡髣髧髪髫髭髯髲髳髹髺髽髾鬁鬃鬅鬈鬋鬎鬏鬐鬑鬒鬖鬗鬘鬙鬠鬣鬪鬫鬬鬮鬯鬰鬲鬵鬷魆魈魊魋魍魎魑魖鰾魛魟魣魦魨魬魴魵魸鮀鮁鮆鮌鮎鮑鮒鮓鮚鮞鮟鱇鮠鮦鮨鮪鮭鮶鮸鮿鯀鯄鯆鯇鯈鯔鯕鯖鯗鯙鯠鯤鯥鯫鯰鯷鯸鯿鰂鰆鶼鰉鰋鰐鰒鰕鰛鰜鰣鰤鰥鰦鰨鰩鰮鰳鰶鰷鱺鰼鰽鱀鱄鱅鱆鱈鱎鱐鱓鱔鱖鱘鱟鱠鱣鱨鱭鱮鱲鱵鱻鲅鳦鳧鳯鳲鳷鳻鴂鴃鴄鴆鴈鴎鴒鴔鴗鴛鴦鴝鵒鴟鴠鴢鴣鴥鴯鶓鴳鴴鴷鴽鵀鵁鵂鵓鵖鵙鵜鶘鵞鵟鵩鵪鵫鵵鵷鵻鵾鶂鶊鶏鶒鶖鶗鶡鶤鶦鶬鶱鶲鶵鶸鶹鶺鶿鷀鷁鷃鷄鷇鷈鷉鷊鷏鷓鷕鷖鷙鷞鷟鷥鷦鷯鷩鷫鷭鷳鷴鷽鷾鷿鸂鸇鸊鸏鸑鸒鸓鸕鸛鸜鸝鹸鹹鹺麀麂麃麄麇麋麌麐麑麒麚麛麝麤麩麪麫麮麯麰麺麾黁黈黌黢黒黓黕黙黝黟黥黦黧黮黰黱黲黶黹黻黼黽黿鼂鼃鼅鼈鼉鼏鼐鼒鼕鼖鼙鼚鼛鼡鼩鼱鼪鼫鼯鼷鼽齁齆齇齈齉齌齎齏齔齕齗齙齚齜齞齟齬齠齢齣齧齩齮齯齰齱齵齾龎龑龒龔龖龘龝龡龢龤'
|
20 |
+
|
21 |
+
assert len(simplified_charcters) == len(simplified_charcters)
|
22 |
+
|
23 |
+
s2t_dict = {}
|
24 |
+
t2s_dict = {}
|
25 |
+
for i, item in enumerate(simplified_charcters):
|
26 |
+
s2t_dict[item] = traditional_characters[i]
|
27 |
+
t2s_dict[traditional_characters[i]] = item
|
28 |
+
|
29 |
+
|
30 |
+
def tranditional_to_simplified(text: str) -> str:
|
31 |
+
return "".join(
|
32 |
+
[t2s_dict[item] if item in t2s_dict else item for item in text])
|
33 |
+
|
34 |
+
|
35 |
+
def simplified_to_traditional(text: str) -> str:
|
36 |
+
return "".join(
|
37 |
+
[s2t_dict[item] if item in s2t_dict else item for item in text])
|
38 |
+
|
39 |
+
|
40 |
+
if __name__ == "__main__":
|
41 |
+
text = "一般是指存取一個應用程式啟動時始終顯示在網站或網頁瀏覽器中的一個或多個初始網頁等畫面存在的站點"
|
42 |
+
print(text)
|
43 |
+
text_simple = tranditional_to_simplified(text)
|
44 |
+
print(text_simple)
|
45 |
+
text_traditional = simplified_to_traditional(text_simple)
|
46 |
+
print(text_traditional)
|
GPT_SoVITS/text/zh_normalization/chronology.py
ADDED
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
import re
|
15 |
+
|
16 |
+
from .num import DIGITS
|
17 |
+
from .num import num2str
|
18 |
+
from .num import verbalize_cardinal
|
19 |
+
from .num import verbalize_digit
|
20 |
+
|
21 |
+
|
22 |
+
def _time_num2str(num_string: str) -> str:
|
23 |
+
"""A special case for verbalizing number in time."""
|
24 |
+
result = num2str(num_string.lstrip('0'))
|
25 |
+
if num_string.startswith('0'):
|
26 |
+
result = DIGITS['0'] + result
|
27 |
+
return result
|
28 |
+
|
29 |
+
|
30 |
+
# 时刻表达式
|
31 |
+
RE_TIME = re.compile(r'([0-1]?[0-9]|2[0-3])'
|
32 |
+
r':([0-5][0-9])'
|
33 |
+
r'(:([0-5][0-9]))?')
|
34 |
+
|
35 |
+
# 时间范围,如8:30-12:30
|
36 |
+
RE_TIME_RANGE = re.compile(r'([0-1]?[0-9]|2[0-3])'
|
37 |
+
r':([0-5][0-9])'
|
38 |
+
r'(:([0-5][0-9]))?'
|
39 |
+
r'(~|-)'
|
40 |
+
r'([0-1]?[0-9]|2[0-3])'
|
41 |
+
r':([0-5][0-9])'
|
42 |
+
r'(:([0-5][0-9]))?')
|
43 |
+
|
44 |
+
|
45 |
+
def replace_time(match) -> str:
|
46 |
+
"""
|
47 |
+
Args:
|
48 |
+
match (re.Match)
|
49 |
+
Returns:
|
50 |
+
str
|
51 |
+
"""
|
52 |
+
|
53 |
+
is_range = len(match.groups()) > 5
|
54 |
+
|
55 |
+
hour = match.group(1)
|
56 |
+
minute = match.group(2)
|
57 |
+
second = match.group(4)
|
58 |
+
|
59 |
+
if is_range:
|
60 |
+
hour_2 = match.group(6)
|
61 |
+
minute_2 = match.group(7)
|
62 |
+
second_2 = match.group(9)
|
63 |
+
|
64 |
+
result = f"{num2str(hour)}点"
|
65 |
+
if minute.lstrip('0'):
|
66 |
+
if int(minute) == 30:
|
67 |
+
result += "半"
|
68 |
+
else:
|
69 |
+
result += f"{_time_num2str(minute)}分"
|
70 |
+
if second and second.lstrip('0'):
|
71 |
+
result += f"{_time_num2str(second)}秒"
|
72 |
+
|
73 |
+
if is_range:
|
74 |
+
result += "至"
|
75 |
+
result += f"{num2str(hour_2)}点"
|
76 |
+
if minute_2.lstrip('0'):
|
77 |
+
if int(minute) == 30:
|
78 |
+
result += "半"
|
79 |
+
else:
|
80 |
+
result += f"{_time_num2str(minute_2)}分"
|
81 |
+
if second_2 and second_2.lstrip('0'):
|
82 |
+
result += f"{_time_num2str(second_2)}秒"
|
83 |
+
|
84 |
+
return result
|
85 |
+
|
86 |
+
|
87 |
+
RE_DATE = re.compile(r'(\d{4}|\d{2})年'
|
88 |
+
r'((0?[1-9]|1[0-2])月)?'
|
89 |
+
r'(((0?[1-9])|((1|2)[0-9])|30|31)([日号]))?')
|
90 |
+
|
91 |
+
|
92 |
+
def replace_date(match) -> str:
|
93 |
+
"""
|
94 |
+
Args:
|
95 |
+
match (re.Match)
|
96 |
+
Returns:
|
97 |
+
str
|
98 |
+
"""
|
99 |
+
year = match.group(1)
|
100 |
+
month = match.group(3)
|
101 |
+
day = match.group(5)
|
102 |
+
result = ""
|
103 |
+
if year:
|
104 |
+
result += f"{verbalize_digit(year)}年"
|
105 |
+
if month:
|
106 |
+
result += f"{verbalize_cardinal(month)}月"
|
107 |
+
if day:
|
108 |
+
result += f"{verbalize_cardinal(day)}{match.group(9)}"
|
109 |
+
return result
|
110 |
+
|
111 |
+
|
112 |
+
# 用 / 或者 - 分隔的 YY/MM/DD 或者 YY-MM-DD 日期
|
113 |
+
RE_DATE2 = re.compile(
|
114 |
+
r'(\d{4})([- /.])(0[1-9]|1[012])\2(0[1-9]|[12][0-9]|3[01])')
|
115 |
+
|
116 |
+
|
117 |
+
def replace_date2(match) -> str:
|
118 |
+
"""
|
119 |
+
Args:
|
120 |
+
match (re.Match)
|
121 |
+
Returns:
|
122 |
+
str
|
123 |
+
"""
|
124 |
+
year = match.group(1)
|
125 |
+
month = match.group(3)
|
126 |
+
day = match.group(4)
|
127 |
+
result = ""
|
128 |
+
if year:
|
129 |
+
result += f"{verbalize_digit(year)}年"
|
130 |
+
if month:
|
131 |
+
result += f"{verbalize_cardinal(month)}月"
|
132 |
+
if day:
|
133 |
+
result += f"{verbalize_cardinal(day)}日"
|
134 |
+
return result
|
GPT_SoVITS/text/zh_normalization/constants.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
import re
|
15 |
+
import string
|
16 |
+
|
17 |
+
from pypinyin.constants import SUPPORT_UCS4
|
18 |
+
|
19 |
+
# 全角半角转换
|
20 |
+
# 英文字符全角 -> 半角映射表 (num: 52)
|
21 |
+
F2H_ASCII_LETTERS = {
|
22 |
+
ord(char) + 65248: ord(char)
|
23 |
+
for char in string.ascii_letters
|
24 |
+
}
|
25 |
+
|
26 |
+
# 英文字符半角 -> 全角映射表
|
27 |
+
H2F_ASCII_LETTERS = {value: key for key, value in F2H_ASCII_LETTERS.items()}
|
28 |
+
|
29 |
+
# 数字字符全角 -> 半角映射表 (num: 10)
|
30 |
+
F2H_DIGITS = {ord(char) + 65248: ord(char) for char in string.digits}
|
31 |
+
# 数字字符半角 -> 全角映射表
|
32 |
+
H2F_DIGITS = {value: key for key, value in F2H_DIGITS.items()}
|
33 |
+
|
34 |
+
# 标点符号全角 -> 半角映射表 (num: 32)
|
35 |
+
F2H_PUNCTUATIONS = {ord(char) + 65248: ord(char) for char in string.punctuation}
|
36 |
+
# 标点符号半角 -> 全角映射表
|
37 |
+
H2F_PUNCTUATIONS = {value: key for key, value in F2H_PUNCTUATIONS.items()}
|
38 |
+
|
39 |
+
# 空格 (num: 1)
|
40 |
+
F2H_SPACE = {'\u3000': ' '}
|
41 |
+
H2F_SPACE = {' ': '\u3000'}
|
42 |
+
|
43 |
+
# 非"有拼音的汉字"的字符串,可用于NSW提取
|
44 |
+
if SUPPORT_UCS4:
|
45 |
+
RE_NSW = re.compile(r'(?:[^'
|
46 |
+
r'\u3007' # 〇
|
47 |
+
r'\u3400-\u4dbf' # CJK扩展A:[3400-4DBF]
|
48 |
+
r'\u4e00-\u9fff' # CJK基本:[4E00-9FFF]
|
49 |
+
r'\uf900-\ufaff' # CJK兼容:[F900-FAFF]
|
50 |
+
r'\U00020000-\U0002A6DF' # CJK扩展B:[20000-2A6DF]
|
51 |
+
r'\U0002A703-\U0002B73F' # CJK扩展C:[2A700-2B73F]
|
52 |
+
r'\U0002B740-\U0002B81D' # CJK扩展D:[2B740-2B81D]
|
53 |
+
r'\U0002F80A-\U0002FA1F' # CJK兼容扩展:[2F800-2FA1F]
|
54 |
+
r'])+')
|
55 |
+
else:
|
56 |
+
RE_NSW = re.compile( # pragma: no cover
|
57 |
+
r'(?:[^'
|
58 |
+
r'\u3007' # 〇
|
59 |
+
r'\u3400-\u4dbf' # CJK扩展A:[3400-4DBF]
|
60 |
+
r'\u4e00-\u9fff' # CJK基本:[4E00-9FFF]
|
61 |
+
r'\uf900-\ufaff' # CJK兼容:[F900-FAFF]
|
62 |
+
r'])+')
|
GPT_SoVITS/text/zh_normalization/num.py
ADDED
@@ -0,0 +1,318 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
"""
|
15 |
+
Rules to verbalize numbers into Chinese characters.
|
16 |
+
https://zh.wikipedia.org/wiki/中文数字#現代中文
|
17 |
+
"""
|
18 |
+
import re
|
19 |
+
from collections import OrderedDict
|
20 |
+
from typing import List
|
21 |
+
|
22 |
+
DIGITS = {str(i): tran for i, tran in enumerate('零一二三四五六七八九')}
|
23 |
+
UNITS = OrderedDict({
|
24 |
+
1: '十',
|
25 |
+
2: '百',
|
26 |
+
3: '千',
|
27 |
+
4: '万',
|
28 |
+
8: '亿',
|
29 |
+
})
|
30 |
+
|
31 |
+
COM_QUANTIFIERS = '(处|台|架|枚|趟|幅|平|方|堵|间|床|株|批|项|例|列|篇|栋|注|亩|封|艘|把|目|套|段|人|所|朵|匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|丘|棵|只|支|袭|辆|挑|担|颗|壳|窠|曲|墙|群|腔|砣|座|客|贯|扎|捆|刀|令|打|手|罗|坡|山|岭|江|溪|钟|队|单|双|对|出|口|头|脚|板|跳|枝|件|贴|针|线|管|名|位|身|堂|课|本|页|家|户|层|丝|毫|厘|分|钱|两|斤|担|铢|石|钧|锱|忽|(千|毫|微)克|毫|厘|(公)分|分|寸|尺|丈|里|寻|常|铺|程|(千|分|厘|毫|微)米|米|撮|勺|合|升|斗|石|盘|碗|碟|叠|桶|笼|盆|盒|杯|钟|斛|锅|簋|篮|盘|桶|罐|瓶|壶|卮|盏|箩|箱|煲|啖|袋|钵|年|月|日|季|刻|时|周|天|秒|分|小时|旬|纪|岁|世|更|夜|春|夏|秋|冬|代|伏|辈|丸|泡|粒|颗|幢|堆|条|根|支|道|面|片|张|颗|块|元|(亿|千万|百万|万|千|百)|(亿|千万|百万|万|千|百|美|)元|(亿|千万|百万|万|千|百|十|)吨|(亿|千万|百万|万|千|百|)块|角|毛|分)'
|
32 |
+
|
33 |
+
# 分数表达式
|
34 |
+
RE_FRAC = re.compile(r'(-?)(\d+)/(\d+)')
|
35 |
+
|
36 |
+
|
37 |
+
def replace_frac(match) -> str:
|
38 |
+
"""
|
39 |
+
Args:
|
40 |
+
match (re.Match)
|
41 |
+
Returns:
|
42 |
+
str
|
43 |
+
"""
|
44 |
+
sign = match.group(1)
|
45 |
+
nominator = match.group(2)
|
46 |
+
denominator = match.group(3)
|
47 |
+
sign: str = "负" if sign else ""
|
48 |
+
nominator: str = num2str(nominator)
|
49 |
+
denominator: str = num2str(denominator)
|
50 |
+
result = f"{sign}{denominator}分之{nominator}"
|
51 |
+
return result
|
52 |
+
|
53 |
+
|
54 |
+
# 百分数表达式
|
55 |
+
RE_PERCENTAGE = re.compile(r'(-?)(\d+(\.\d+)?)%')
|
56 |
+
|
57 |
+
|
58 |
+
def replace_percentage(match) -> str:
|
59 |
+
"""
|
60 |
+
Args:
|
61 |
+
match (re.Match)
|
62 |
+
Returns:
|
63 |
+
str
|
64 |
+
"""
|
65 |
+
sign = match.group(1)
|
66 |
+
percent = match.group(2)
|
67 |
+
sign: str = "负" if sign else ""
|
68 |
+
percent: str = num2str(percent)
|
69 |
+
result = f"{sign}百分之{percent}"
|
70 |
+
return result
|
71 |
+
|
72 |
+
|
73 |
+
# 整数表达式
|
74 |
+
# 带负号的整数 -10
|
75 |
+
RE_INTEGER = re.compile(r'(-)' r'(\d+)')
|
76 |
+
|
77 |
+
|
78 |
+
def replace_negative_num(match) -> str:
|
79 |
+
"""
|
80 |
+
Args:
|
81 |
+
match (re.Match)
|
82 |
+
Returns:
|
83 |
+
str
|
84 |
+
"""
|
85 |
+
sign = match.group(1)
|
86 |
+
number = match.group(2)
|
87 |
+
sign: str = "负" if sign else ""
|
88 |
+
number: str = num2str(number)
|
89 |
+
result = f"{sign}{number}"
|
90 |
+
return result
|
91 |
+
|
92 |
+
|
93 |
+
# 编号-无符号整形
|
94 |
+
# 00078
|
95 |
+
RE_DEFAULT_NUM = re.compile(r'\d{3}\d*')
|
96 |
+
|
97 |
+
|
98 |
+
def replace_default_num(match):
|
99 |
+
"""
|
100 |
+
Args:
|
101 |
+
match (re.Match)
|
102 |
+
Returns:
|
103 |
+
str
|
104 |
+
"""
|
105 |
+
number = match.group(0)
|
106 |
+
return verbalize_digit(number, alt_one=True)
|
107 |
+
|
108 |
+
|
109 |
+
# 加减乘除
|
110 |
+
# RE_ASMD = re.compile(
|
111 |
+
# r'((-?)((\d+)(\.\d+)?)|(\.(\d+)))([\+\-\×÷=])((-?)((\d+)(\.\d+)?)|(\.(\d+)))')
|
112 |
+
RE_ASMD = re.compile(
|
113 |
+
r'((-?)((\d+)(\.\d+)?[⁰¹²³⁴⁵⁶⁷⁸⁹ˣʸⁿ]*)|(\.\d+[⁰¹²³⁴⁵⁶⁷⁸⁹ˣʸⁿ]*)|([A-Za-z][⁰¹²³⁴⁵⁶⁷⁸⁹ˣʸⁿ]*))([\+\-\×÷=])((-?)((\d+)(\.\d+)?[⁰¹²³⁴⁵⁶⁷⁸⁹ˣʸⁿ]*)|(\.\d+[⁰¹²³⁴⁵⁶⁷⁸⁹ˣʸⁿ]*)|([A-Za-z][⁰¹²³⁴⁵⁶⁷⁸⁹ˣʸⁿ]*))')
|
114 |
+
|
115 |
+
asmd_map = {
|
116 |
+
'+': '加',
|
117 |
+
'-': '减',
|
118 |
+
'×': '乘',
|
119 |
+
'÷': '除',
|
120 |
+
'=': '等于'
|
121 |
+
}
|
122 |
+
|
123 |
+
def replace_asmd(match) -> str:
|
124 |
+
"""
|
125 |
+
Args:
|
126 |
+
match (re.Match)
|
127 |
+
Returns:
|
128 |
+
str
|
129 |
+
"""
|
130 |
+
result = match.group(1) + asmd_map[match.group(8)] + match.group(9)
|
131 |
+
return result
|
132 |
+
|
133 |
+
|
134 |
+
# 次方专项
|
135 |
+
RE_POWER = re.compile(r'[⁰¹²³⁴⁵⁶⁷⁸⁹ˣʸⁿ]+')
|
136 |
+
|
137 |
+
power_map = {
|
138 |
+
'⁰': '0',
|
139 |
+
'¹': '1',
|
140 |
+
'²': '2',
|
141 |
+
'³': '3',
|
142 |
+
'⁴': '4',
|
143 |
+
'⁵': '5',
|
144 |
+
'⁶': '6',
|
145 |
+
'⁷': '7',
|
146 |
+
'⁸': '8',
|
147 |
+
'⁹': '9',
|
148 |
+
'ˣ': 'x',
|
149 |
+
'ʸ': 'y',
|
150 |
+
'ⁿ': 'n'
|
151 |
+
}
|
152 |
+
|
153 |
+
def replace_power(match) -> str:
|
154 |
+
"""
|
155 |
+
Args:
|
156 |
+
match (re.Match)
|
157 |
+
Returns:
|
158 |
+
str
|
159 |
+
"""
|
160 |
+
power_num = ""
|
161 |
+
for m in match.group(0):
|
162 |
+
power_num += power_map[m]
|
163 |
+
result = "的" + power_num + "次方"
|
164 |
+
return result
|
165 |
+
|
166 |
+
|
167 |
+
# 数字表达式
|
168 |
+
# 纯小数
|
169 |
+
RE_DECIMAL_NUM = re.compile(r'(-?)((\d+)(\.\d+))' r'|(\.(\d+))')
|
170 |
+
# 正整数 + 量词
|
171 |
+
RE_POSITIVE_QUANTIFIERS = re.compile(r"(\d+)([多余几\+])?" + COM_QUANTIFIERS)
|
172 |
+
RE_NUMBER = re.compile(r'(-?)((\d+)(\.\d+)?)' r'|(\.(\d+))')
|
173 |
+
|
174 |
+
|
175 |
+
def replace_positive_quantifier(match) -> str:
|
176 |
+
"""
|
177 |
+
Args:
|
178 |
+
match (re.Match)
|
179 |
+
Returns:
|
180 |
+
str
|
181 |
+
"""
|
182 |
+
number = match.group(1)
|
183 |
+
match_2 = match.group(2)
|
184 |
+
if match_2 == "+":
|
185 |
+
match_2 = "多"
|
186 |
+
match_2: str = match_2 if match_2 else ""
|
187 |
+
quantifiers: str = match.group(3)
|
188 |
+
number: str = num2str(number)
|
189 |
+
number = "两" if number == "二" else number
|
190 |
+
result = f"{number}{match_2}{quantifiers}"
|
191 |
+
return result
|
192 |
+
|
193 |
+
|
194 |
+
def replace_number(match) -> str:
|
195 |
+
"""
|
196 |
+
Args:
|
197 |
+
match (re.Match)
|
198 |
+
Returns:
|
199 |
+
str
|
200 |
+
"""
|
201 |
+
sign = match.group(1)
|
202 |
+
number = match.group(2)
|
203 |
+
pure_decimal = match.group(5)
|
204 |
+
if pure_decimal:
|
205 |
+
result = num2str(pure_decimal)
|
206 |
+
else:
|
207 |
+
sign: str = "负" if sign else ""
|
208 |
+
number: str = num2str(number)
|
209 |
+
result = f"{sign}{number}"
|
210 |
+
return result
|
211 |
+
|
212 |
+
|
213 |
+
# 范围表达式
|
214 |
+
# match.group(1) and match.group(8) are copy from RE_NUMBER
|
215 |
+
|
216 |
+
RE_RANGE = re.compile(
|
217 |
+
r"""
|
218 |
+
(?<![\d\+\-\×÷=]) # 使用反向前瞻以确保数字范围之前没有其他数字和操作符
|
219 |
+
((-?)((\d+)(\.\d+)?)) # 匹配范围起始的负数或正数(整数或小数)
|
220 |
+
[-~] # 匹配范围分隔符
|
221 |
+
((-?)((\d+)(\.\d+)?)) # 匹配范围结束的负数或正数(整数或小数)
|
222 |
+
(?![\d\+\-\×÷=]) # 使用正向前瞻以确保数字范围之后没有其他数字和操作符
|
223 |
+
""", re.VERBOSE)
|
224 |
+
|
225 |
+
|
226 |
+
def replace_range(match) -> str:
|
227 |
+
"""
|
228 |
+
Args:
|
229 |
+
match (re.Match)
|
230 |
+
Returns:
|
231 |
+
str
|
232 |
+
"""
|
233 |
+
first, second = match.group(1), match.group(6)
|
234 |
+
first = RE_NUMBER.sub(replace_number, first)
|
235 |
+
second = RE_NUMBER.sub(replace_number, second)
|
236 |
+
result = f"{first}到{second}"
|
237 |
+
return result
|
238 |
+
|
239 |
+
|
240 |
+
# ~至表达式
|
241 |
+
RE_TO_RANGE = re.compile(
|
242 |
+
r'((-?)((\d+)(\.\d+)?)|(\.(\d+)))(%|°C|℃|度|摄氏度|cm2|cm²|cm3|cm³|cm|db|ds|kg|km|m2|m²|m³|m3|ml|m|mm|s)[~]((-?)((\d+)(\.\d+)?)|(\.(\d+)))(%|°C|℃|度|摄氏度|cm2|cm²|cm3|cm³|cm|db|ds|kg|km|m2|m²|m³|m3|ml|m|mm|s)')
|
243 |
+
|
244 |
+
def replace_to_range(match) -> str:
|
245 |
+
"""
|
246 |
+
Args:
|
247 |
+
match (re.Match)
|
248 |
+
Returns:
|
249 |
+
str
|
250 |
+
"""
|
251 |
+
result = match.group(0).replace('~', '至')
|
252 |
+
return result
|
253 |
+
|
254 |
+
|
255 |
+
def _get_value(value_string: str, use_zero: bool=True) -> List[str]:
|
256 |
+
stripped = value_string.lstrip('0')
|
257 |
+
if len(stripped) == 0:
|
258 |
+
return []
|
259 |
+
elif len(stripped) == 1:
|
260 |
+
if use_zero and len(stripped) < len(value_string):
|
261 |
+
return [DIGITS['0'], DIGITS[stripped]]
|
262 |
+
else:
|
263 |
+
return [DIGITS[stripped]]
|
264 |
+
else:
|
265 |
+
largest_unit = next(
|
266 |
+
power for power in reversed(UNITS.keys()) if power < len(stripped))
|
267 |
+
first_part = value_string[:-largest_unit]
|
268 |
+
second_part = value_string[-largest_unit:]
|
269 |
+
return _get_value(first_part) + [UNITS[largest_unit]] + _get_value(
|
270 |
+
second_part)
|
271 |
+
|
272 |
+
|
273 |
+
def verbalize_cardinal(value_string: str) -> str:
|
274 |
+
if not value_string:
|
275 |
+
return ''
|
276 |
+
|
277 |
+
# 000 -> '零' , 0 -> '零'
|
278 |
+
value_string = value_string.lstrip('0')
|
279 |
+
if len(value_string) == 0:
|
280 |
+
return DIGITS['0']
|
281 |
+
|
282 |
+
result_symbols = _get_value(value_string)
|
283 |
+
# verbalized number starting with '一十*' is abbreviated as `十*`
|
284 |
+
if len(result_symbols) >= 2 and result_symbols[0] == DIGITS[
|
285 |
+
'1'] and result_symbols[1] == UNITS[1]:
|
286 |
+
result_symbols = result_symbols[1:]
|
287 |
+
return ''.join(result_symbols)
|
288 |
+
|
289 |
+
|
290 |
+
def verbalize_digit(value_string: str, alt_one=False) -> str:
|
291 |
+
result_symbols = [DIGITS[digit] for digit in value_string]
|
292 |
+
result = ''.join(result_symbols)
|
293 |
+
if alt_one:
|
294 |
+
result = result.replace("一", "幺")
|
295 |
+
return result
|
296 |
+
|
297 |
+
|
298 |
+
def num2str(value_string: str) -> str:
|
299 |
+
integer_decimal = value_string.split('.')
|
300 |
+
if len(integer_decimal) == 1:
|
301 |
+
integer = integer_decimal[0]
|
302 |
+
decimal = ''
|
303 |
+
elif len(integer_decimal) == 2:
|
304 |
+
integer, decimal = integer_decimal
|
305 |
+
else:
|
306 |
+
raise ValueError(
|
307 |
+
f"The value string: '${value_string}' has more than one point in it."
|
308 |
+
)
|
309 |
+
|
310 |
+
result = verbalize_cardinal(integer)
|
311 |
+
|
312 |
+
decimal = decimal.rstrip('0')
|
313 |
+
if decimal:
|
314 |
+
# '.22' is verbalized as '零点二二'
|
315 |
+
# '3.20' is verbalized as '三点二
|
316 |
+
result = result if result else "零"
|
317 |
+
result += '点' + verbalize_digit(decimal)
|
318 |
+
return result
|
GPT_SoVITS/text/zh_normalization/phonecode.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
import re
|
15 |
+
|
16 |
+
from .num import verbalize_digit
|
17 |
+
|
18 |
+
# 规范化固话/手机号码
|
19 |
+
# 手机
|
20 |
+
# http://www.jihaoba.com/news/show/13680
|
21 |
+
# 移动:139、138、137、136、135、134、159、158、157、150、151、152、188、187、182、183、184、178、198
|
22 |
+
# 联通:130、131、132、156、155、186、185、176
|
23 |
+
# 电信:133、153、189、180、181、177
|
24 |
+
RE_MOBILE_PHONE = re.compile(
|
25 |
+
r"(?<!\d)((\+?86 ?)?1([38]\d|5[0-35-9]|7[678]|9[89])\d{8})(?!\d)")
|
26 |
+
RE_TELEPHONE = re.compile(
|
27 |
+
r"(?<!\d)((0(10|2[1-3]|[3-9]\d{2})-?)?[1-9]\d{6,7})(?!\d)")
|
28 |
+
|
29 |
+
# 全国统一的号码400开头
|
30 |
+
RE_NATIONAL_UNIFORM_NUMBER = re.compile(r"(400)(-)?\d{3}(-)?\d{4}")
|
31 |
+
|
32 |
+
|
33 |
+
def phone2str(phone_string: str, mobile=True) -> str:
|
34 |
+
if mobile:
|
35 |
+
sp_parts = phone_string.strip('+').split()
|
36 |
+
result = ','.join(
|
37 |
+
[verbalize_digit(part, alt_one=True) for part in sp_parts])
|
38 |
+
return result
|
39 |
+
else:
|
40 |
+
sil_parts = phone_string.split('-')
|
41 |
+
result = ','.join(
|
42 |
+
[verbalize_digit(part, alt_one=True) for part in sil_parts])
|
43 |
+
return result
|
44 |
+
|
45 |
+
|
46 |
+
def replace_phone(match) -> str:
|
47 |
+
"""
|
48 |
+
Args:
|
49 |
+
match (re.Match)
|
50 |
+
Returns:
|
51 |
+
str
|
52 |
+
"""
|
53 |
+
return phone2str(match.group(0), mobile=False)
|
54 |
+
|
55 |
+
|
56 |
+
def replace_mobile(match) -> str:
|
57 |
+
"""
|
58 |
+
Args:
|
59 |
+
match (re.Match)
|
60 |
+
Returns:
|
61 |
+
str
|
62 |
+
"""
|
63 |
+
return phone2str(match.group(0))
|
GPT_SoVITS/text/zh_normalization/quantifier.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
import re
|
15 |
+
|
16 |
+
from .num import num2str
|
17 |
+
|
18 |
+
# 温度表达式,温度会影响负号的读法
|
19 |
+
# -3°C 零下三度
|
20 |
+
RE_TEMPERATURE = re.compile(r'(-?)(\d+(\.\d+)?)(°C|℃|度|摄氏度)')
|
21 |
+
measure_dict = {
|
22 |
+
"cm2": "平方厘米",
|
23 |
+
"cm²": "平方厘米",
|
24 |
+
"cm3": "立方厘米",
|
25 |
+
"cm³": "立方厘米",
|
26 |
+
"cm": "厘米",
|
27 |
+
"db": "分贝",
|
28 |
+
"ds": "毫秒",
|
29 |
+
"kg": "千克",
|
30 |
+
"km": "千米",
|
31 |
+
"m2": "平方米",
|
32 |
+
"m²": "平方米",
|
33 |
+
"m³": "立方米",
|
34 |
+
"m3": "立方米",
|
35 |
+
"ml": "毫升",
|
36 |
+
"m": "米",
|
37 |
+
"mm": "毫米",
|
38 |
+
"s": "秒"
|
39 |
+
}
|
40 |
+
|
41 |
+
|
42 |
+
def replace_temperature(match) -> str:
|
43 |
+
"""
|
44 |
+
Args:
|
45 |
+
match (re.Match)
|
46 |
+
Returns:
|
47 |
+
str
|
48 |
+
"""
|
49 |
+
sign = match.group(1)
|
50 |
+
temperature = match.group(2)
|
51 |
+
unit = match.group(3)
|
52 |
+
sign: str = "零下" if sign else ""
|
53 |
+
temperature: str = num2str(temperature)
|
54 |
+
unit: str = "摄氏度" if unit == "摄氏度" else "度"
|
55 |
+
result = f"{sign}{temperature}{unit}"
|
56 |
+
return result
|
57 |
+
|
58 |
+
|
59 |
+
def replace_measure(sentence) -> str:
|
60 |
+
for q_notation in measure_dict:
|
61 |
+
if q_notation in sentence:
|
62 |
+
sentence = sentence.replace(q_notation, measure_dict[q_notation])
|
63 |
+
return sentence
|
GPT_SoVITS/text/zh_normalization/text_normlization.py
ADDED
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
import re
|
15 |
+
from typing import List
|
16 |
+
|
17 |
+
from .char_convert import tranditional_to_simplified
|
18 |
+
from .chronology import RE_DATE
|
19 |
+
from .chronology import RE_DATE2
|
20 |
+
from .chronology import RE_TIME
|
21 |
+
from .chronology import RE_TIME_RANGE
|
22 |
+
from .chronology import replace_date
|
23 |
+
from .chronology import replace_date2
|
24 |
+
from .chronology import replace_time
|
25 |
+
from .constants import F2H_ASCII_LETTERS
|
26 |
+
from .constants import F2H_DIGITS
|
27 |
+
from .constants import F2H_SPACE
|
28 |
+
from .num import RE_DECIMAL_NUM
|
29 |
+
from .num import RE_DEFAULT_NUM
|
30 |
+
from .num import RE_FRAC
|
31 |
+
from .num import RE_INTEGER
|
32 |
+
from .num import RE_NUMBER
|
33 |
+
from .num import RE_PERCENTAGE
|
34 |
+
from .num import RE_POSITIVE_QUANTIFIERS
|
35 |
+
from .num import RE_RANGE
|
36 |
+
from .num import RE_TO_RANGE
|
37 |
+
from .num import RE_ASMD
|
38 |
+
from .num import RE_POWER
|
39 |
+
from .num import replace_default_num
|
40 |
+
from .num import replace_frac
|
41 |
+
from .num import replace_negative_num
|
42 |
+
from .num import replace_number
|
43 |
+
from .num import replace_percentage
|
44 |
+
from .num import replace_positive_quantifier
|
45 |
+
from .num import replace_range
|
46 |
+
from .num import replace_to_range
|
47 |
+
from .num import replace_asmd
|
48 |
+
from .num import replace_power
|
49 |
+
from .phonecode import RE_MOBILE_PHONE
|
50 |
+
from .phonecode import RE_NATIONAL_UNIFORM_NUMBER
|
51 |
+
from .phonecode import RE_TELEPHONE
|
52 |
+
from .phonecode import replace_mobile
|
53 |
+
from .phonecode import replace_phone
|
54 |
+
from .quantifier import RE_TEMPERATURE
|
55 |
+
from .quantifier import replace_measure
|
56 |
+
from .quantifier import replace_temperature
|
57 |
+
|
58 |
+
|
59 |
+
class TextNormalizer():
|
60 |
+
def __init__(self):
|
61 |
+
self.SENTENCE_SPLITOR = re.compile(r'([:、,;。?!,;?!][”’]?)')
|
62 |
+
|
63 |
+
def _split(self, text: str, lang="zh") -> List[str]:
|
64 |
+
"""Split long text into sentences with sentence-splitting punctuations.
|
65 |
+
Args:
|
66 |
+
text (str): The input text.
|
67 |
+
Returns:
|
68 |
+
List[str]: Sentences.
|
69 |
+
"""
|
70 |
+
# Only for pure Chinese here
|
71 |
+
if lang == "zh":
|
72 |
+
text = text.replace(" ", "")
|
73 |
+
# 过滤掉特殊字符
|
74 |
+
text = re.sub(r'[——《》【】<>{}()()#&@“”^_|\\]', '', text)
|
75 |
+
text = self.SENTENCE_SPLITOR.sub(r'\1\n', text)
|
76 |
+
text = text.strip()
|
77 |
+
sentences = [sentence.strip() for sentence in re.split(r'\n+', text)]
|
78 |
+
return sentences
|
79 |
+
|
80 |
+
def _post_replace(self, sentence: str) -> str:
|
81 |
+
sentence = sentence.replace('/', '每')
|
82 |
+
# sentence = sentence.replace('~', '至')
|
83 |
+
# sentence = sentence.replace('~', '至')
|
84 |
+
sentence = sentence.replace('①', '一')
|
85 |
+
sentence = sentence.replace('②', '二')
|
86 |
+
sentence = sentence.replace('③', '三')
|
87 |
+
sentence = sentence.replace('④', '四')
|
88 |
+
sentence = sentence.replace('⑤', '五')
|
89 |
+
sentence = sentence.replace('⑥', '六')
|
90 |
+
sentence = sentence.replace('⑦', '七')
|
91 |
+
sentence = sentence.replace('⑧', '八')
|
92 |
+
sentence = sentence.replace('⑨', '九')
|
93 |
+
sentence = sentence.replace('⑩', '十')
|
94 |
+
sentence = sentence.replace('α', '阿尔法')
|
95 |
+
sentence = sentence.replace('β', '贝塔')
|
96 |
+
sentence = sentence.replace('γ', '伽玛').replace('Γ', '伽玛')
|
97 |
+
sentence = sentence.replace('δ', '德尔塔').replace('Δ', '德尔塔')
|
98 |
+
sentence = sentence.replace('ε', '艾普西龙')
|
99 |
+
sentence = sentence.replace('ζ', '捷塔')
|
100 |
+
sentence = sentence.replace('η', '依塔')
|
101 |
+
sentence = sentence.replace('θ', '西塔').replace('Θ', '西塔')
|
102 |
+
sentence = sentence.replace('ι', '艾欧塔')
|
103 |
+
sentence = sentence.replace('κ', '喀帕')
|
104 |
+
sentence = sentence.replace('λ', '拉姆达').replace('Λ', '拉姆达')
|
105 |
+
sentence = sentence.replace('μ', '缪')
|
106 |
+
sentence = sentence.replace('ν', '拗')
|
107 |
+
sentence = sentence.replace('ξ', '克西').replace('Ξ', '克西')
|
108 |
+
sentence = sentence.replace('ο', '欧米克伦')
|
109 |
+
sentence = sentence.replace('π', '派').replace('Π', '派')
|
110 |
+
sentence = sentence.replace('ρ', '肉')
|
111 |
+
sentence = sentence.replace('ς', '西格玛').replace('Σ', '西格玛').replace(
|
112 |
+
'σ', '西格玛')
|
113 |
+
sentence = sentence.replace('τ', '套')
|
114 |
+
sentence = sentence.replace('υ', '宇普西龙')
|
115 |
+
sentence = sentence.replace('φ', '服艾').replace('Φ', '服艾')
|
116 |
+
sentence = sentence.replace('χ', '器')
|
117 |
+
sentence = sentence.replace('ψ', '普赛').replace('Ψ', '普赛')
|
118 |
+
sentence = sentence.replace('ω', '欧米伽').replace('Ω', '欧米伽')
|
119 |
+
# 兜底数学运算,顺便兼容懒人用语
|
120 |
+
sentence = sentence.replace('+', '加')
|
121 |
+
sentence = sentence.replace('-', '减')
|
122 |
+
sentence = sentence.replace('×', '乘')
|
123 |
+
sentence = sentence.replace('÷', '除')
|
124 |
+
sentence = sentence.replace('=', '等')
|
125 |
+
# re filter special characters, have one more character "-" than line 68
|
126 |
+
sentence = re.sub(r'[-——《》【】<=>{}()()#&@“”^_|\\]', '', sentence)
|
127 |
+
return sentence
|
128 |
+
|
129 |
+
def normalize_sentence(self, sentence: str) -> str:
|
130 |
+
# basic character conversions
|
131 |
+
sentence = tranditional_to_simplified(sentence)
|
132 |
+
sentence = sentence.translate(F2H_ASCII_LETTERS).translate(
|
133 |
+
F2H_DIGITS).translate(F2H_SPACE)
|
134 |
+
|
135 |
+
# number related NSW verbalization
|
136 |
+
sentence = RE_DATE.sub(replace_date, sentence)
|
137 |
+
sentence = RE_DATE2.sub(replace_date2, sentence)
|
138 |
+
|
139 |
+
# range first
|
140 |
+
sentence = RE_TIME_RANGE.sub(replace_time, sentence)
|
141 |
+
sentence = RE_TIME.sub(replace_time, sentence)
|
142 |
+
|
143 |
+
# 处理~波浪号作为至的替换
|
144 |
+
sentence = RE_TO_RANGE.sub(replace_to_range, sentence)
|
145 |
+
sentence = RE_TEMPERATURE.sub(replace_temperature, sentence)
|
146 |
+
sentence = replace_measure(sentence)
|
147 |
+
|
148 |
+
# 处理数学运算
|
149 |
+
while RE_ASMD.search(sentence):
|
150 |
+
sentence = RE_ASMD.sub(replace_asmd, sentence)
|
151 |
+
sentence = RE_POWER.sub(replace_power, sentence)
|
152 |
+
|
153 |
+
sentence = RE_FRAC.sub(replace_frac, sentence)
|
154 |
+
sentence = RE_PERCENTAGE.sub(replace_percentage, sentence)
|
155 |
+
sentence = RE_MOBILE_PHONE.sub(replace_mobile, sentence)
|
156 |
+
|
157 |
+
sentence = RE_TELEPHONE.sub(replace_phone, sentence)
|
158 |
+
sentence = RE_NATIONAL_UNIFORM_NUMBER.sub(replace_phone, sentence)
|
159 |
+
|
160 |
+
sentence = RE_RANGE.sub(replace_range, sentence)
|
161 |
+
|
162 |
+
sentence = RE_INTEGER.sub(replace_negative_num, sentence)
|
163 |
+
sentence = RE_DECIMAL_NUM.sub(replace_number, sentence)
|
164 |
+
sentence = RE_POSITIVE_QUANTIFIERS.sub(replace_positive_quantifier,
|
165 |
+
sentence)
|
166 |
+
sentence = RE_DEFAULT_NUM.sub(replace_default_num, sentence)
|
167 |
+
sentence = RE_NUMBER.sub(replace_number, sentence)
|
168 |
+
sentence = self._post_replace(sentence)
|
169 |
+
|
170 |
+
return sentence
|
171 |
+
|
172 |
+
def normalize(self, text: str) -> List[str]:
|
173 |
+
sentences = self._split(text)
|
174 |
+
sentences = [self.normalize_sentence(sent) for sent in sentences]
|
175 |
+
return sentences
|