Alyosha11 commited on
Commit
9fd672f
·
verified ·
1 Parent(s): e0cb627

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +6 -0
  2. GPT_SoVITS/download.py +5 -0
  3. GPT_SoVITS/download_hubert.py +52 -0
  4. GPT_SoVITS/export_torch_script.py +832 -0
  5. GPT_SoVITS/extract_hubert.py +150 -0
  6. GPT_SoVITS/inference_cli.py +55 -0
  7. GPT_SoVITS/inference_gui.py +310 -0
  8. GPT_SoVITS/inference_webui.py +772 -0
  9. GPT_SoVITS/inference_webui_fast.py +336 -0
  10. GPT_SoVITS/onnx_export.py +344 -0
  11. GPT_SoVITS/prepare_data.py +66 -0
  12. GPT_SoVITS/pretrained_models/.gitignore +2 -0
  13. GPT_SoVITS/pretrained_models/README.md +5 -0
  14. GPT_SoVITS/process_ckpt.py +31 -0
  15. GPT_SoVITS/s1_train.py +113 -0
  16. GPT_SoVITS/s2_train.py +610 -0
  17. GPT_SoVITS/text/.gitignore +3 -0
  18. GPT_SoVITS/text/__init__.py +27 -0
  19. GPT_SoVITS/text/cantonese.py +209 -0
  20. GPT_SoVITS/text/chinese.py +211 -0
  21. GPT_SoVITS/text/chinese2.py +308 -0
  22. GPT_SoVITS/text/cleaner.py +91 -0
  23. GPT_SoVITS/text/cmudict-fast.rep +0 -0
  24. GPT_SoVITS/text/cmudict.rep +0 -0
  25. GPT_SoVITS/text/engdict-hot.rep +3 -0
  26. GPT_SoVITS/text/english.py +374 -0
  27. GPT_SoVITS/text/g2pw/__init__.py +1 -0
  28. GPT_SoVITS/text/g2pw/dataset.py +166 -0
  29. GPT_SoVITS/text/g2pw/g2pw.py +154 -0
  30. GPT_SoVITS/text/g2pw/onnx_api.py +241 -0
  31. GPT_SoVITS/text/g2pw/polyphonic-fix.rep +0 -0
  32. GPT_SoVITS/text/g2pw/polyphonic.rep +53 -0
  33. GPT_SoVITS/text/g2pw/utils.py +145 -0
  34. GPT_SoVITS/text/hindi.py +222 -0
  35. GPT_SoVITS/text/japanese.py +226 -0
  36. GPT_SoVITS/text/korean.py +265 -0
  37. GPT_SoVITS/text/opencpop-strict.txt +429 -0
  38. GPT_SoVITS/text/symbols.py +427 -0
  39. GPT_SoVITS/text/symbols2.py +444 -0
  40. GPT_SoVITS/text/text_processing.py +37 -0
  41. GPT_SoVITS/text/tone_sandhi.py +807 -0
  42. GPT_SoVITS/text/zh_normalization/README.md +16 -0
  43. GPT_SoVITS/text/zh_normalization/__init__.py +14 -0
  44. GPT_SoVITS/text/zh_normalization/char_convert.py +46 -0
  45. GPT_SoVITS/text/zh_normalization/chronology.py +134 -0
  46. GPT_SoVITS/text/zh_normalization/constants.py +62 -0
  47. GPT_SoVITS/text/zh_normalization/num.py +318 -0
  48. GPT_SoVITS/text/zh_normalization/phonecode.py +63 -0
  49. GPT_SoVITS/text/zh_normalization/quantifier.py +63 -0
  50. GPT_SoVITS/text/zh_normalization/text_normlization.py +175 -0
.gitattributes CHANGED
@@ -702,3 +702,9 @@ data8/wavs/119.wav filter=lfs diff=lfs merge=lfs -text
702
  data8/wavs/122.wav filter=lfs diff=lfs merge=lfs -text
703
  data8/wavs/135.wav filter=lfs diff=lfs merge=lfs -text
704
  data8/wavs/159.wav filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
702
  data8/wavs/122.wav filter=lfs diff=lfs merge=lfs -text
703
  data8/wavs/135.wav filter=lfs diff=lfs merge=lfs -text
704
  data8/wavs/159.wav filter=lfs diff=lfs merge=lfs -text
705
+ data8/wavs/190.wav filter=lfs diff=lfs merge=lfs -text
706
+ data8/wavs/118.wav filter=lfs diff=lfs merge=lfs -text
707
+ data8/wavs/129.wav filter=lfs diff=lfs merge=lfs -text
708
+ data8/wavs/174.wav filter=lfs diff=lfs merge=lfs -text
709
+ data8/wavs/152.wav filter=lfs diff=lfs merge=lfs -text
710
+ data8/wavs/13.wav filter=lfs diff=lfs merge=lfs -text
GPT_SoVITS/download.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import os, sys
2
+ now_dir = os.getcwd()
3
+ sys.path.insert(0, now_dir)
4
+ from text.g2pw import G2PWPinyin
5
+ g2pw = G2PWPinyin(model_dir="GPT_SoVITS/text/G2PWModel",model_source="GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large",v_to_u=False, neutral_tone_with_five=True)
GPT_SoVITS/download_hubert.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import torch
4
+ from pathlib import Path
5
+ from tqdm import tqdm
6
+
7
+ def download_file(url, dest_path):
8
+ """Download a file with progress bar"""
9
+ response = requests.get(url, stream=True)
10
+ total_size = int(response.headers.get('content-length', 0))
11
+
12
+ with open(dest_path, 'wb') as f, tqdm(
13
+ desc=os.path.basename(dest_path),
14
+ total=total_size,
15
+ unit='iB',
16
+ unit_scale=True,
17
+ unit_divisor=1024,
18
+ ) as pbar:
19
+ for data in response.iter_content(chunk_size=1024):
20
+ size = f.write(data)
21
+ pbar.update(size)
22
+
23
+ def download_hubert():
24
+ """Download Chinese Hubert model"""
25
+
26
+ # Get project root directory (parent of GPT_SoVITS)
27
+ root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
28
+
29
+ # Create model directory
30
+ model_dir = os.path.join(root_dir, "pretrained_models", "chinese-hubert-base")
31
+ os.makedirs(model_dir, exist_ok=True)
32
+
33
+ # Model files to download
34
+ files = {
35
+ "config.json": "https://huggingface.co/TencentGameMate/chinese-hubert-base/raw/main/config.json",
36
+ "preprocessor_config.json": "https://huggingface.co/TencentGameMate/chinese-hubert-base/raw/main/preprocessor_config.json",
37
+ "pytorch_model.bin": "https://huggingface.co/TencentGameMate/chinese-hubert-base/resolve/main/pytorch_model.bin"
38
+ }
39
+
40
+ print("Downloading Chinese Hubert model...")
41
+ for filename, url in files.items():
42
+ dest_path = os.path.join(model_dir, filename)
43
+ if not os.path.exists(dest_path):
44
+ print(f"Downloading {filename}...")
45
+ download_file(url, dest_path)
46
+ else:
47
+ print(f"{filename} already exists, skipping...")
48
+
49
+ print("Download complete.")
50
+
51
+ if __name__ == "__main__":
52
+ download_hubert()
GPT_SoVITS/export_torch_script.py ADDED
@@ -0,0 +1,832 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/models/t2s_model.py
2
+ # reference: https://github.com/lifeiteng/vall-e
3
+ import argparse
4
+ from typing import Optional
5
+ from my_utils import load_audio
6
+ from text import cleaned_text_to_sequence
7
+ import torch
8
+ import torchaudio
9
+
10
+ from torch import IntTensor, LongTensor, Tensor, nn
11
+ from torch.nn import functional as F
12
+
13
+ from transformers import AutoModelForMaskedLM, AutoTokenizer
14
+ from feature_extractor import cnhubert
15
+
16
+ from AR.models.t2s_lightning_module import Text2SemanticLightningModule
17
+ from module.models_onnx import SynthesizerTrn
18
+
19
+ from inference_webui import get_phones_and_bert
20
+
21
+ import os
22
+ import soundfile
23
+
24
+ default_config = {
25
+ "embedding_dim": 512,
26
+ "hidden_dim": 512,
27
+ "num_head": 8,
28
+ "num_layers": 12,
29
+ "num_codebook": 8,
30
+ "p_dropout": 0.0,
31
+ "vocab_size": 1024 + 1,
32
+ "phoneme_vocab_size": 512,
33
+ "EOS": 1024,
34
+ }
35
+
36
+ def get_raw_t2s_model(dict_s1) -> Text2SemanticLightningModule:
37
+ config = dict_s1["config"]
38
+ config["model"]["dropout"] = float(config["model"]["dropout"])
39
+ t2s_model = Text2SemanticLightningModule(config, "****", is_train=False)
40
+ t2s_model.load_state_dict(dict_s1["weight"])
41
+ t2s_model = t2s_model.eval()
42
+ return t2s_model
43
+
44
+ @torch.jit.script
45
+ def logits_to_probs(
46
+ logits,
47
+ previous_tokens: Optional[torch.Tensor] = None,
48
+ temperature: float = 1.0,
49
+ top_k: Optional[int] = None,
50
+ top_p: Optional[int] = None,
51
+ repetition_penalty: float = 1.0,
52
+ ):
53
+ # if previous_tokens is not None:
54
+ # previous_tokens = previous_tokens.squeeze()
55
+ # print(logits.shape,previous_tokens.shape)
56
+ # pdb.set_trace()
57
+ if previous_tokens is not None and repetition_penalty != 1.0:
58
+ previous_tokens = previous_tokens.long()
59
+ score = torch.gather(logits, dim=1, index=previous_tokens)
60
+ score = torch.where(
61
+ score < 0, score * repetition_penalty, score / repetition_penalty
62
+ )
63
+ logits.scatter_(dim=1, index=previous_tokens, src=score)
64
+
65
+ if top_p is not None and top_p < 1.0:
66
+ sorted_logits, sorted_indices = torch.sort(logits, descending=True)
67
+ cum_probs = torch.cumsum(
68
+ torch.nn.functional.softmax(sorted_logits, dim=-1), dim=-1
69
+ )
70
+ sorted_indices_to_remove = cum_probs > top_p
71
+ sorted_indices_to_remove[:, 0] = False # keep at least one option
72
+ indices_to_remove = sorted_indices_to_remove.scatter(
73
+ dim=1, index=sorted_indices, src=sorted_indices_to_remove
74
+ )
75
+ logits = logits.masked_fill(indices_to_remove, -float("Inf"))
76
+
77
+ logits = logits / max(temperature, 1e-5)
78
+
79
+ if top_k is not None:
80
+ v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
81
+ pivot = v[: , -1].unsqueeze(-1)
82
+ logits = torch.where(logits < pivot, -float("Inf"), logits)
83
+
84
+ probs = torch.nn.functional.softmax(logits, dim=-1)
85
+ return probs
86
+
87
+ @torch.jit.script
88
+ def multinomial_sample_one_no_sync(probs_sort):
89
+ # Does multinomial sampling without a cuda synchronization
90
+ q = torch.randn_like(probs_sort)
91
+ return torch.argmax(probs_sort / q, dim=-1, keepdim=True).to(dtype=torch.int)
92
+
93
+ @torch.jit.script
94
+ def sample(
95
+ logits,
96
+ previous_tokens,
97
+ temperature: float = 1.0,
98
+ top_k: Optional[int] = None,
99
+ top_p: Optional[int] = None,
100
+ repetition_penalty: float = 1.0,
101
+ ):
102
+ probs = logits_to_probs(
103
+ logits=logits, previous_tokens=previous_tokens, temperature=temperature, top_k=top_k, top_p=top_p, repetition_penalty=repetition_penalty
104
+ )
105
+ idx_next = multinomial_sample_one_no_sync(probs)
106
+ return idx_next, probs
107
+
108
+
109
+ @torch.jit.script
110
+ def spectrogram_torch(y:Tensor, n_fft:int, sampling_rate:int, hop_size:int, win_size:int, center:bool=False):
111
+ hann_window = torch.hann_window(win_size,device=y.device,dtype=y.dtype)
112
+ y = torch.nn.functional.pad(
113
+ y.unsqueeze(1),
114
+ (int((n_fft - hop_size) / 2), int((n_fft - hop_size) / 2)),
115
+ mode="reflect",
116
+ )
117
+ y = y.squeeze(1)
118
+ spec = torch.stft(
119
+ y,
120
+ n_fft,
121
+ hop_length=hop_size,
122
+ win_length=win_size,
123
+ window=hann_window,
124
+ center=center,
125
+ pad_mode="reflect",
126
+ normalized=False,
127
+ onesided=True,
128
+ return_complex=False,
129
+ )
130
+ spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6)
131
+ return spec
132
+
133
+
134
+ class DictToAttrRecursive(dict):
135
+ def __init__(self, input_dict):
136
+ super().__init__(input_dict)
137
+ for key, value in input_dict.items():
138
+ if isinstance(value, dict):
139
+ value = DictToAttrRecursive(value)
140
+ self[key] = value
141
+ setattr(self, key, value)
142
+
143
+ def __getattr__(self, item):
144
+ try:
145
+ return self[item]
146
+ except KeyError:
147
+ raise AttributeError(f"Attribute {item} not found")
148
+
149
+ def __setattr__(self, key, value):
150
+ if isinstance(value, dict):
151
+ value = DictToAttrRecursive(value)
152
+ super(DictToAttrRecursive, self).__setitem__(key, value)
153
+ super().__setattr__(key, value)
154
+
155
+ def __delattr__(self, item):
156
+ try:
157
+ del self[item]
158
+ except KeyError:
159
+ raise AttributeError(f"Attribute {item} not found")
160
+
161
+ @torch.jit.script
162
+ class T2SMLP:
163
+ def __init__(self, w1, b1, w2, b2):
164
+ self.w1 = w1
165
+ self.b1 = b1
166
+ self.w2 = w2
167
+ self.b2 = b2
168
+
169
+ def forward(self, x):
170
+ x = F.relu(F.linear(x, self.w1, self.b1))
171
+ x = F.linear(x, self.w2, self.b2)
172
+ return x
173
+
174
+ @torch.jit.script
175
+ class T2SBlock:
176
+ def __init__(
177
+ self,
178
+ num_heads: int,
179
+ hidden_dim: int,
180
+ mlp: T2SMLP,
181
+ qkv_w,
182
+ qkv_b,
183
+ out_w,
184
+ out_b,
185
+ norm_w1,
186
+ norm_b1,
187
+ norm_eps1: float,
188
+ norm_w2,
189
+ norm_b2,
190
+ norm_eps2: float,
191
+ ):
192
+ self.num_heads = num_heads
193
+ self.mlp = mlp
194
+ self.hidden_dim: int = hidden_dim
195
+ self.qkv_w = qkv_w
196
+ self.qkv_b = qkv_b
197
+ self.out_w = out_w
198
+ self.out_b = out_b
199
+ self.norm_w1 = norm_w1
200
+ self.norm_b1 = norm_b1
201
+ self.norm_eps1 = norm_eps1
202
+ self.norm_w2 = norm_w2
203
+ self.norm_b2 = norm_b2
204
+ self.norm_eps2 = norm_eps2
205
+
206
+ self.false = torch.tensor(False, dtype=torch.bool)
207
+
208
+ @torch.jit.ignore
209
+ def to_mask(self, x:torch.Tensor, padding_mask:Optional[torch.Tensor]):
210
+ if padding_mask is None:
211
+ return x
212
+
213
+ if padding_mask.dtype == torch.bool:
214
+ return x.masked_fill(padding_mask, 0)
215
+ else:
216
+ return x * padding_mask
217
+
218
+ def process_prompt(self, x:torch.Tensor, attn_mask : torch.Tensor, padding_mask:Optional[torch.Tensor]=None):
219
+ q, k, v = F.linear(self.to_mask(x, padding_mask), self.qkv_w, self.qkv_b).chunk(3, dim=-1)
220
+
221
+ batch_size = q.shape[0]
222
+ q_len = q.shape[1]
223
+ kv_len = k.shape[1]
224
+
225
+ q = self.to_mask(q, padding_mask)
226
+ k_cache = self.to_mask(k, padding_mask)
227
+ v_cache = self.to_mask(v, padding_mask)
228
+
229
+ q = q.view(batch_size, q_len, self.num_heads, -1).transpose(1, 2)
230
+ k = k_cache.view(batch_size, kv_len, self.num_heads, -1).transpose(1, 2)
231
+ v = v_cache.view(batch_size, kv_len, self.num_heads, -1).transpose(1, 2)
232
+
233
+ attn = F.scaled_dot_product_attention(q, k, v, ~attn_mask)
234
+
235
+ attn = attn.permute(2, 0, 1, 3).reshape(batch_size*q_len, self.hidden_dim)
236
+ attn = attn.view(q_len, batch_size, self.hidden_dim).transpose(1, 0)
237
+ attn = F.linear(self.to_mask(attn, padding_mask), self.out_w, self.out_b)
238
+
239
+ if padding_mask is not None:
240
+ for i in range(batch_size):
241
+ # mask = padding_mask[i,:,0]
242
+ if self.false.device!= padding_mask.device:
243
+ self.false = self.false.to(padding_mask.device)
244
+ idx = torch.where(padding_mask[i,:,0]==self.false)[0]
245
+ x_item = x[i,idx,:].unsqueeze(0)
246
+ attn_item = attn[i,idx,:].unsqueeze(0)
247
+ x_item = x_item + attn_item
248
+ x_item = F.layer_norm(
249
+ x_item, [self.hidden_dim], self.norm_w1, self.norm_b1, self.norm_eps1
250
+ )
251
+ x_item = x_item + self.mlp.forward(x_item)
252
+ x_item = F.layer_norm(
253
+ x_item,
254
+ [self.hidden_dim],
255
+ self.norm_w2,
256
+ self.norm_b2,
257
+ self.norm_eps2,
258
+ )
259
+ x[i,idx,:] = x_item.squeeze(0)
260
+ x = self.to_mask(x, padding_mask)
261
+ else:
262
+ x = x + attn
263
+ x = F.layer_norm(
264
+ x, [self.hidden_dim], self.norm_w1, self.norm_b1, self.norm_eps1
265
+ )
266
+ x = x + self.mlp.forward(x)
267
+ x = F.layer_norm(
268
+ x,
269
+ [self.hidden_dim],
270
+ self.norm_w2,
271
+ self.norm_b2,
272
+ self.norm_eps2,
273
+ )
274
+ return x, k_cache, v_cache
275
+
276
+ def decode_next_token(self, x:torch.Tensor, k_cache:torch.Tensor, v_cache:torch.Tensor):
277
+ q, k, v = F.linear(x, self.qkv_w, self.qkv_b).chunk(3, dim=-1)
278
+
279
+ k_cache = torch.cat([k_cache, k], dim=1)
280
+ v_cache = torch.cat([v_cache, v], dim=1)
281
+
282
+ batch_size = q.shape[0]
283
+ q_len = q.shape[1]
284
+ kv_len = k_cache.shape[1]
285
+
286
+ q = q.view(batch_size, q_len, self.num_heads, -1).transpose(1, 2)
287
+ k = k_cache.view(batch_size, kv_len, self.num_heads, -1).transpose(1, 2)
288
+ v = v_cache.view(batch_size, kv_len, self.num_heads, -1).transpose(1, 2)
289
+
290
+ attn = F.scaled_dot_product_attention(q, k, v)
291
+
292
+ attn = attn.permute(2, 0, 1, 3).reshape(batch_size*q_len, self.hidden_dim)
293
+ attn = attn.view(q_len, batch_size, self.hidden_dim).transpose(1, 0)
294
+ attn = F.linear(attn, self.out_w, self.out_b)
295
+
296
+ x = x + attn
297
+ x = F.layer_norm(
298
+ x, [self.hidden_dim], self.norm_w1, self.norm_b1, self.norm_eps1
299
+ )
300
+ x = x + self.mlp.forward(x)
301
+ x = F.layer_norm(
302
+ x,
303
+ [self.hidden_dim],
304
+ self.norm_w2,
305
+ self.norm_b2,
306
+ self.norm_eps2,
307
+ )
308
+ return x, k_cache, v_cache
309
+
310
+ @torch.jit.script
311
+ class T2STransformer:
312
+ def __init__(self, num_blocks : int, blocks: list[T2SBlock]):
313
+ self.num_blocks : int = num_blocks
314
+ self.blocks = blocks
315
+
316
+ def process_prompt(
317
+ self, x:torch.Tensor, attn_mask : torch.Tensor,padding_mask : Optional[torch.Tensor]=None):
318
+ k_cache : list[torch.Tensor] = []
319
+ v_cache : list[torch.Tensor] = []
320
+ for i in range(self.num_blocks):
321
+ x, k_cache_, v_cache_ = self.blocks[i].process_prompt(x, attn_mask, padding_mask)
322
+ k_cache.append(k_cache_)
323
+ v_cache.append(v_cache_)
324
+ return x, k_cache, v_cache
325
+
326
+ def decode_next_token(
327
+ self, x:torch.Tensor,
328
+ k_cache: list[torch.Tensor],
329
+ v_cache: list[torch.Tensor]):
330
+ for i in range(self.num_blocks):
331
+ x, k_cache[i], v_cache[i] = self.blocks[i].decode_next_token(x, k_cache[i], v_cache[i])
332
+ return x, k_cache, v_cache
333
+
334
+ class VitsModel(nn.Module):
335
+ def __init__(self, vits_path):
336
+ super().__init__()
337
+ # dict_s2 = torch.load(vits_path,map_location="cpu")
338
+ dict_s2 = torch.load(vits_path)
339
+ self.hps = dict_s2["config"]
340
+ if dict_s2['weight']['enc_p.text_embedding.weight'].shape[0] == 322:
341
+ self.hps["model"]["version"] = "v1"
342
+ else:
343
+ self.hps["model"]["version"] = "v2"
344
+
345
+ self.hps = DictToAttrRecursive(self.hps)
346
+ self.hps.model.semantic_frame_rate = "25hz"
347
+ self.vq_model = SynthesizerTrn(
348
+ self.hps.data.filter_length // 2 + 1,
349
+ self.hps.train.segment_size // self.hps.data.hop_length,
350
+ n_speakers=self.hps.data.n_speakers,
351
+ **self.hps.model
352
+ )
353
+ self.vq_model.eval()
354
+ self.vq_model.load_state_dict(dict_s2["weight"], strict=False)
355
+
356
+ def forward(self, text_seq, pred_semantic, ref_audio, speed=1.0):
357
+ refer = spectrogram_torch(
358
+ ref_audio,
359
+ self.hps.data.filter_length,
360
+ self.hps.data.sampling_rate,
361
+ self.hps.data.hop_length,
362
+ self.hps.data.win_length,
363
+ center=False
364
+ )
365
+ return self.vq_model(pred_semantic, text_seq, refer, speed)[0, 0]
366
+
367
+ class T2SModel(nn.Module):
368
+ def __init__(self,raw_t2s:Text2SemanticLightningModule):
369
+ super(T2SModel, self).__init__()
370
+ self.model_dim = raw_t2s.model.model_dim
371
+ self.embedding_dim = raw_t2s.model.embedding_dim
372
+ self.num_head = raw_t2s.model.num_head
373
+ self.num_layers = raw_t2s.model.num_layers
374
+ self.vocab_size = raw_t2s.model.vocab_size
375
+ self.phoneme_vocab_size = raw_t2s.model.phoneme_vocab_size
376
+ # self.p_dropout = float(raw_t2s.model.p_dropout)
377
+ self.EOS:int = int(raw_t2s.model.EOS)
378
+ self.norm_first = raw_t2s.model.norm_first
379
+ assert self.EOS == self.vocab_size - 1
380
+ self.hz = 50
381
+
382
+ self.bert_proj = raw_t2s.model.bert_proj
383
+ self.ar_text_embedding = raw_t2s.model.ar_text_embedding
384
+ self.ar_text_position = raw_t2s.model.ar_text_position
385
+ self.ar_audio_embedding = raw_t2s.model.ar_audio_embedding
386
+ self.ar_audio_position = raw_t2s.model.ar_audio_position
387
+
388
+ # self.t2s_transformer = T2STransformer(self.num_layers, blocks)
389
+ # self.t2s_transformer = raw_t2s.model.t2s_transformer
390
+
391
+ blocks = []
392
+ h = raw_t2s.model.h
393
+
394
+ for i in range(self.num_layers):
395
+ layer = h.layers[i]
396
+ t2smlp = T2SMLP(
397
+ layer.linear1.weight,
398
+ layer.linear1.bias,
399
+ layer.linear2.weight,
400
+ layer.linear2.bias
401
+ )
402
+
403
+ block = T2SBlock(
404
+ self.num_head,
405
+ self.model_dim,
406
+ t2smlp,
407
+ layer.self_attn.in_proj_weight,
408
+ layer.self_attn.in_proj_bias,
409
+ layer.self_attn.out_proj.weight,
410
+ layer.self_attn.out_proj.bias,
411
+ layer.norm1.weight,
412
+ layer.norm1.bias,
413
+ layer.norm1.eps,
414
+ layer.norm2.weight,
415
+ layer.norm2.bias,
416
+ layer.norm2.eps
417
+ )
418
+
419
+ blocks.append(block)
420
+
421
+ self.t2s_transformer = T2STransformer(self.num_layers, blocks)
422
+
423
+ # self.ar_predict_layer = nn.Linear(self.model_dim, self.vocab_size, bias=False)
424
+ self.ar_predict_layer = raw_t2s.model.ar_predict_layer
425
+ # self.loss_fct = nn.CrossEntropyLoss(reduction="sum")
426
+ self.max_sec = raw_t2s.config["data"]["max_sec"]
427
+ self.top_k = int(raw_t2s.config["inference"]["top_k"])
428
+ self.early_stop_num = torch.LongTensor([self.hz * self.max_sec])
429
+
430
+ def forward(self,prompts:LongTensor, ref_seq:LongTensor, text_seq:LongTensor, ref_bert:torch.Tensor, text_bert:torch.Tensor):
431
+ bert = torch.cat([ref_bert.T, text_bert.T], 1)
432
+ all_phoneme_ids = torch.cat([ref_seq, text_seq], 1)
433
+ bert = bert.unsqueeze(0)
434
+
435
+ x = self.ar_text_embedding(all_phoneme_ids)
436
+ x = x + self.bert_proj(bert.transpose(1, 2))
437
+ x:torch.Tensor = self.ar_text_position(x)
438
+
439
+ early_stop_num = self.early_stop_num
440
+
441
+
442
+ #[1,N,512] [1,N]
443
+ # y, k, v, y_emb, x_example = self.first_stage_decoder(x, prompts)
444
+ y = prompts
445
+ # x_example = x[:,:,0] * 0.0
446
+
447
+ x_len = x.shape[1]
448
+ x_attn_mask = torch.zeros((x_len, x_len), dtype=torch.bool)
449
+
450
+ y_emb = self.ar_audio_embedding(y)
451
+ y_len = y_emb.shape[1]
452
+ prefix_len = y.shape[1]
453
+ y_pos = self.ar_audio_position(y_emb)
454
+ xy_pos = torch.concat([x, y_pos], dim=1)
455
+
456
+ bsz = x.shape[0]
457
+ src_len = x_len + y_len
458
+ x_attn_mask_pad = F.pad(
459
+ x_attn_mask,
460
+ (0, y_len), ###xx的纯0扩展到xx纯0+xy纯1,(x,x+y)
461
+ value=True,
462
+ )
463
+ y_attn_mask = F.pad( ###yy的右上1扩展到左边xy的0,(y,x+y)
464
+ torch.triu(torch.ones(y_len, y_len, dtype=torch.bool), diagonal=1),
465
+ (x_len, 0),
466
+ value=False,
467
+ )
468
+ xy_attn_mask = torch.concat([x_attn_mask_pad, y_attn_mask], dim=0)\
469
+ .unsqueeze(0)\
470
+ .expand(bsz*self.num_head, -1, -1)\
471
+ .view(bsz, self.num_head, src_len, src_len)\
472
+ .to(device=x.device, dtype=torch.bool)
473
+
474
+ idx = 0
475
+
476
+ xy_dec, k_cache, v_cache = self.t2s_transformer.process_prompt(xy_pos, xy_attn_mask, None)
477
+
478
+ logits = self.ar_predict_layer(xy_dec[:, -1])
479
+ logits = logits[:, :-1]
480
+ samples = sample(logits, y, top_k=self.top_k, top_p=1, repetition_penalty=1.35, temperature=1.0)[0]
481
+ y = torch.concat([y, samples], dim=1)
482
+ y_emb = self.ar_audio_embedding(y[:, -1:])
483
+ xy_pos = y_emb * self.ar_audio_position.x_scale + self.ar_audio_position.alpha * self.ar_audio_position.pe[:, y_len + idx].to(dtype=y_emb.dtype,device=y_emb.device)
484
+
485
+ stop = False
486
+ # for idx in range(1, 50):
487
+ for idx in range(1, 1500):
488
+ #[1, N] [N_layer, N, 1, 512] [N_layer, N, 1, 512] [1, N, 512] [1] [1, N, 512] [1, N]
489
+ # y, k, v, y_emb, logits, samples = self.stage_decoder(y, k, v, y_emb, x_example)
490
+ xy_dec, k_cache, v_cache = self.t2s_transformer.decode_next_token(xy_pos, k_cache, v_cache)
491
+ logits = self.ar_predict_layer(xy_dec[:, -1])
492
+
493
+ if(idx<11):###至少预测出10个token不然不给停止(0.4s)
494
+ logits = logits[:, :-1]
495
+
496
+ samples = sample(logits, y, top_k=self.top_k, top_p=1, repetition_penalty=1.35, temperature=1.0)[0]
497
+
498
+ y = torch.concat([y, samples], dim=1)
499
+
500
+ if early_stop_num != -1 and (y.shape[1] - prefix_len) > early_stop_num:
501
+ stop = True
502
+ if torch.argmax(logits, dim=-1)[0] == self.EOS or samples[0, 0] == self.EOS:
503
+ stop = True
504
+ if stop:
505
+ if y.shape[1] == 0:
506
+ y = torch.concat([y, torch.zeros_like(samples)], dim=1)
507
+ break
508
+
509
+ y_emb = self.ar_audio_embedding(y[:, -1:])
510
+ xy_pos = y_emb * self.ar_audio_position.x_scale + self.ar_audio_position.alpha * self.ar_audio_position.pe[:, y_len + idx].to(dtype=y_emb.dtype,device=y_emb.device)
511
+
512
+ y[0,-1] = 0
513
+
514
+ return y[:, -idx:].unsqueeze(0)
515
+
516
+ bert_path = os.environ.get(
517
+ "bert_path", "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large"
518
+ )
519
+ cnhubert_base_path = "GPT_SoVITS/pretrained_models/chinese-hubert-base"
520
+ cnhubert.cnhubert_base_path = cnhubert_base_path
521
+
522
+ @torch.jit.script
523
+ def build_phone_level_feature(res:Tensor, word2ph:IntTensor):
524
+ phone_level_feature = []
525
+ for i in range(word2ph.shape[0]):
526
+ repeat_feature = res[i].repeat(word2ph[i].item(), 1)
527
+ phone_level_feature.append(repeat_feature)
528
+ phone_level_feature = torch.cat(phone_level_feature, dim=0)
529
+ # [sum(word2ph), 1024]
530
+ return phone_level_feature
531
+
532
+ class MyBertModel(torch.nn.Module):
533
+ def __init__(self, bert_model):
534
+ super(MyBertModel, self).__init__()
535
+ self.bert = bert_model
536
+
537
+ def forward(self, input_ids:torch.Tensor, attention_mask:torch.Tensor, token_type_ids:torch.Tensor, word2ph:IntTensor):
538
+ outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
539
+ # res = torch.cat(outputs["hidden_states"][-3:-2], -1)[0][1:-1]
540
+ res = torch.cat(outputs[1][-3:-2], -1)[0][1:-1]
541
+ return build_phone_level_feature(res, word2ph)
542
+
543
+ class SSLModel(torch.nn.Module):
544
+ def __init__(self):
545
+ super().__init__()
546
+ self.ssl = cnhubert.get_model().model
547
+
548
+ def forward(self, ref_audio_16k)-> torch.Tensor:
549
+ ssl_content = self.ssl(ref_audio_16k)["last_hidden_state"].transpose(1, 2)
550
+ return ssl_content
551
+
552
+ class ExportSSLModel(torch.nn.Module):
553
+ def __init__(self,ssl:SSLModel):
554
+ super().__init__()
555
+ self.ssl = ssl
556
+
557
+ def forward(self, ref_audio:torch.Tensor):
558
+ return self.ssl(ref_audio)
559
+
560
+ @torch.jit.export
561
+ def resample(self,ref_audio:torch.Tensor,src_sr:int,dst_sr:int)->torch.Tensor:
562
+ audio = resamplex(ref_audio,src_sr,dst_sr).float()
563
+ return audio
564
+
565
+ def export_bert(output_path):
566
+ tokenizer = AutoTokenizer.from_pretrained(bert_path)
567
+
568
+ text = "叹息声一声接着一声传出,木兰对着房门织布.听不见织布机织布的声音,只听见木兰在叹息.问木兰在想什么?问木兰在惦记什么?木兰答道,我也没有在想什么,也没有在惦记什么."
569
+ ref_bert_inputs = tokenizer(text, return_tensors="pt")
570
+ word2ph = []
571
+ for c in text:
572
+ if c in [',','。',':','?',",",".","?"]:
573
+ word2ph.append(1)
574
+ else:
575
+ word2ph.append(2)
576
+ ref_bert_inputs['word2ph'] = torch.Tensor(word2ph).int()
577
+
578
+ bert_model = AutoModelForMaskedLM.from_pretrained(bert_path,output_hidden_states=True,torchscript=True)
579
+ my_bert_model = MyBertModel(bert_model)
580
+
581
+ ref_bert_inputs = {
582
+ 'input_ids': ref_bert_inputs['input_ids'],
583
+ 'attention_mask': ref_bert_inputs['attention_mask'],
584
+ 'token_type_ids': ref_bert_inputs['token_type_ids'],
585
+ 'word2ph': ref_bert_inputs['word2ph']
586
+ }
587
+
588
+ torch._dynamo.mark_dynamic(ref_bert_inputs['input_ids'], 1)
589
+ torch._dynamo.mark_dynamic(ref_bert_inputs['attention_mask'], 1)
590
+ torch._dynamo.mark_dynamic(ref_bert_inputs['token_type_ids'], 1)
591
+ torch._dynamo.mark_dynamic(ref_bert_inputs['word2ph'], 0)
592
+
593
+ my_bert_model = torch.jit.trace(my_bert_model,example_kwarg_inputs=ref_bert_inputs)
594
+ output_path = os.path.join(output_path, "bert_model.pt")
595
+ my_bert_model.save(output_path)
596
+ print('#### exported bert ####')
597
+
598
+ def export(gpt_path, vits_path, ref_audio_path, ref_text, output_path, export_bert_and_ssl=False, device='cpu'):
599
+ if not os.path.exists(output_path):
600
+ os.makedirs(output_path)
601
+ print(f"目录已创建: {output_path}")
602
+ else:
603
+ print(f"目录已存在: {output_path}")
604
+
605
+ ref_audio = torch.tensor([load_audio(ref_audio_path, 16000)]).float()
606
+ ssl = SSLModel()
607
+ if export_bert_and_ssl:
608
+ s = ExportSSLModel(torch.jit.trace(ssl,example_inputs=(ref_audio)))
609
+ ssl_path = os.path.join(output_path, "ssl_model.pt")
610
+ torch.jit.script(s).save(ssl_path)
611
+ print('#### exported ssl ####')
612
+ export_bert(output_path)
613
+ else:
614
+ s = ExportSSLModel(ssl)
615
+
616
+ print(f"device: {device}")
617
+
618
+
619
+ ref_seq_id,ref_bert_T,ref_norm_text = get_phones_and_bert(ref_text,"all_zh",'v2')
620
+ ref_seq = torch.LongTensor([ref_seq_id]).to(device)
621
+ ref_bert = ref_bert_T.T.to(ref_seq.device)
622
+ text_seq_id,text_bert_T,norm_text = get_phones_and_bert("这是一条测试语音,说什么无所谓,只是给它一个例子","all_zh",'v2')
623
+ text_seq = torch.LongTensor([text_seq_id]).to(device)
624
+ text_bert = text_bert_T.T.to(text_seq.device)
625
+
626
+ ssl_content = ssl(ref_audio).to(device)
627
+
628
+ # vits_path = "SoVITS_weights_v2/xw_e8_s216.pth"
629
+ vits = VitsModel(vits_path).to(device)
630
+ vits.eval()
631
+
632
+ # gpt_path = "GPT_weights_v2/xw-e15.ckpt"
633
+ # dict_s1 = torch.load(gpt_path, map_location=device)
634
+ dict_s1 = torch.load(gpt_path)
635
+ raw_t2s = get_raw_t2s_model(dict_s1).to(device)
636
+ print('#### get_raw_t2s_model ####')
637
+ print(raw_t2s.config)
638
+ t2s_m = T2SModel(raw_t2s)
639
+ t2s_m.eval()
640
+ t2s = torch.jit.script(t2s_m).to(device)
641
+ print('#### script t2s_m ####')
642
+
643
+ print("vits.hps.data.sampling_rate:",vits.hps.data.sampling_rate)
644
+ gpt_sovits = GPT_SoVITS(t2s,vits).to(device)
645
+ gpt_sovits.eval()
646
+
647
+ ref_audio_sr = s.resample(ref_audio,16000,32000).to(device)
648
+
649
+ torch._dynamo.mark_dynamic(ssl_content, 2)
650
+ torch._dynamo.mark_dynamic(ref_audio_sr, 1)
651
+ torch._dynamo.mark_dynamic(ref_seq, 1)
652
+ torch._dynamo.mark_dynamic(text_seq, 1)
653
+ torch._dynamo.mark_dynamic(ref_bert, 0)
654
+ torch._dynamo.mark_dynamic(text_bert, 0)
655
+
656
+ with torch.no_grad():
657
+ gpt_sovits_export = torch.jit.trace(
658
+ gpt_sovits,
659
+ example_inputs=(
660
+ ssl_content,
661
+ ref_audio_sr,
662
+ ref_seq,
663
+ text_seq,
664
+ ref_bert,
665
+ text_bert))
666
+
667
+ gpt_sovits_path = os.path.join(output_path, "gpt_sovits_model.pt")
668
+ gpt_sovits_export.save(gpt_sovits_path)
669
+ print('#### exported gpt_sovits ####')
670
+
671
+ @torch.jit.script
672
+ def parse_audio(ref_audio):
673
+ ref_audio_16k = torchaudio.functional.resample(ref_audio,48000,16000).float()#.to(ref_audio.device)
674
+ ref_audio_sr = torchaudio.functional.resample(ref_audio,48000,32000).float()#.to(ref_audio.device)
675
+ return ref_audio_16k,ref_audio_sr
676
+
677
+ @torch.jit.script
678
+ def resamplex(ref_audio:torch.Tensor,src_sr:int,dst_sr:int)->torch.Tensor:
679
+ return torchaudio.functional.resample(ref_audio,src_sr,dst_sr).float()
680
+
681
+ class GPT_SoVITS(nn.Module):
682
+ def __init__(self, t2s:T2SModel,vits:VitsModel):
683
+ super().__init__()
684
+ self.t2s = t2s
685
+ self.vits = vits
686
+
687
+ def forward(self, ssl_content:torch.Tensor, ref_audio_sr:torch.Tensor, ref_seq:Tensor, text_seq:Tensor, ref_bert:Tensor, text_bert:Tensor, speed=1.0):
688
+ codes = self.vits.vq_model.extract_latent(ssl_content)
689
+ prompt_semantic = codes[0, 0]
690
+ prompts = prompt_semantic.unsqueeze(0)
691
+
692
+ pred_semantic = self.t2s(prompts, ref_seq, text_seq, ref_bert, text_bert)
693
+ audio = self.vits(text_seq, pred_semantic, ref_audio_sr, speed)
694
+ return audio
695
+
696
+ def test():
697
+ parser = argparse.ArgumentParser(description="GPT-SoVITS Command Line Tool")
698
+ parser.add_argument('--gpt_model', required=True, help="Path to the GPT model file")
699
+ parser.add_argument('--sovits_model', required=True, help="Path to the SoVITS model file")
700
+ parser.add_argument('--ref_audio', required=True, help="Path to the reference audio file")
701
+ parser.add_argument('--ref_text', required=True, help="Path to the reference text file")
702
+ parser.add_argument('--output_path', required=True, help="Path to the output directory")
703
+
704
+
705
+ args = parser.parse_args()
706
+ gpt_path = args.gpt_model
707
+ vits_path = args.sovits_model
708
+ ref_audio_path = args.ref_audio
709
+ ref_text = args.ref_text
710
+
711
+ tokenizer = AutoTokenizer.from_pretrained(bert_path)
712
+ # bert_model = AutoModelForMaskedLM.from_pretrained(bert_path,output_hidden_states=True,torchscript=True)
713
+ # bert = MyBertModel(bert_model)
714
+ my_bert = torch.jit.load("onnx/bert_model.pt",map_location='cuda')
715
+
716
+ # dict_s1 = torch.load(gpt_path, map_location="cuda")
717
+ # raw_t2s = get_raw_t2s_model(dict_s1)
718
+ # t2s = T2SModel(raw_t2s)
719
+ # t2s.eval()
720
+ # t2s = torch.jit.load("onnx/xw/t2s_model.pt",map_location='cuda')
721
+
722
+ # vits_path = "SoVITS_weights_v2/xw_e8_s216.pth"
723
+ # vits = VitsModel(vits_path)
724
+ # vits.eval()
725
+
726
+ # ssl = ExportSSLModel(SSLModel()).to('cuda')
727
+ # ssl.eval()
728
+ ssl = torch.jit.load("onnx/by/ssl_model.pt",map_location='cuda')
729
+
730
+ # gpt_sovits = GPT_SoVITS(t2s,vits)
731
+ gpt_sovits = torch.jit.load("onnx/by/gpt_sovits_model.pt",map_location='cuda')
732
+
733
+ ref_seq_id,ref_bert_T,ref_norm_text = get_phones_and_bert(ref_text,"all_zh",'v2')
734
+ ref_seq = torch.LongTensor([ref_seq_id])
735
+ ref_bert = ref_bert_T.T.to(ref_seq.device)
736
+ # text_seq_id,text_bert_T,norm_text = get_phones_and_bert("昨天晚上看见征兵文书,知道君主在大规模征兵,那么多卷征兵文册,每一卷上都有父亲的名字.","all_zh",'v2')
737
+ text = "昨天晚上看见征兵文书,知道君主在大规模征兵,那么多卷征兵文册,每一卷上都有父亲的名字."
738
+
739
+ text_seq_id,text_bert_T,norm_text = get_phones_and_bert(text,"all_zh",'v2')
740
+
741
+ test_bert = tokenizer(text, return_tensors="pt")
742
+ word2ph = []
743
+ for c in text:
744
+ if c in [',','。',':','?',"?",",","."]:
745
+ word2ph.append(1)
746
+ else:
747
+ word2ph.append(2)
748
+ test_bert['word2ph'] = torch.Tensor(word2ph).int()
749
+
750
+ test_bert = my_bert(
751
+ test_bert['input_ids'].to('cuda'),
752
+ test_bert['attention_mask'].to('cuda'),
753
+ test_bert['token_type_ids'].to('cuda'),
754
+ test_bert['word2ph'].to('cuda')
755
+ )
756
+
757
+ text_seq = torch.LongTensor([text_seq_id])
758
+ text_bert = text_bert_T.T.to(text_seq.device)
759
+
760
+ print('text_bert:',text_bert.shape,text_bert)
761
+ print('test_bert:',test_bert.shape,test_bert)
762
+ print(torch.allclose(text_bert.to('cuda'),test_bert))
763
+
764
+ print('text_seq:',text_seq.shape)
765
+ print('text_bert:',text_bert.shape,text_bert.type())
766
+
767
+ #[1,N]
768
+ ref_audio = torch.tensor([load_audio(ref_audio_path, 16000)]).float().to('cuda')
769
+ print('ref_audio:',ref_audio.shape)
770
+
771
+ ref_audio_sr = ssl.resample(ref_audio,16000,32000)
772
+ print('start ssl')
773
+ ssl_content = ssl(ref_audio)
774
+
775
+ print('start gpt_sovits:')
776
+ print('ssl_content:',ssl_content.shape)
777
+ print('ref_audio_sr:',ref_audio_sr.shape)
778
+ print('ref_seq:',ref_seq.shape)
779
+ ref_seq=ref_seq.to('cuda')
780
+ print('text_seq:',text_seq.shape)
781
+ text_seq=text_seq.to('cuda')
782
+ print('ref_bert:',ref_bert.shape)
783
+ ref_bert=ref_bert.to('cuda')
784
+ print('text_bert:',text_bert.shape)
785
+ text_bert=text_bert.to('cuda')
786
+
787
+ with torch.no_grad():
788
+ audio = gpt_sovits(ssl_content, ref_audio_sr, ref_seq, text_seq, ref_bert, test_bert)
789
+ print('start write wav')
790
+ soundfile.write("out.wav", audio.detach().cpu().numpy(), 32000)
791
+
792
+
793
+ import text
794
+ import json
795
+
796
+ def export_symbel(version='v2'):
797
+ if version=='v1':
798
+ symbols = text._symbol_to_id_v1
799
+ with open(f"onnx/symbols_v1.json", "w") as file:
800
+ json.dump(symbols, file, indent=4)
801
+ else:
802
+ symbols = text._symbol_to_id_v2
803
+ with open(f"onnx/symbols_v2.json", "w") as file:
804
+ json.dump(symbols, file, indent=4)
805
+
806
+ def main():
807
+ parser = argparse.ArgumentParser(description="GPT-SoVITS Command Line Tool")
808
+ parser.add_argument('--gpt_model', required=True, help="Path to the GPT model file")
809
+ parser.add_argument('--sovits_model', required=True, help="Path to the SoVITS model file")
810
+ parser.add_argument('--ref_audio', required=True, help="Path to the reference audio file")
811
+ parser.add_argument('--ref_text', required=True, help="Path to the reference text file")
812
+ parser.add_argument('--output_path', required=True, help="Path to the output directory")
813
+ parser.add_argument('--export_common_model', action='store_true', help="Export Bert and SSL model")
814
+ parser.add_argument('--device', help="Device to use")
815
+
816
+ args = parser.parse_args()
817
+ export(
818
+ gpt_path=args.gpt_model,
819
+ vits_path=args.sovits_model,
820
+ ref_audio_path=args.ref_audio,
821
+ ref_text=args.ref_text,
822
+ output_path=args.output_path,
823
+ device=args.device,
824
+ export_bert_and_ssl=args.export_common_model,
825
+ )
826
+
827
+ import inference_webui
828
+ if __name__ == "__main__":
829
+ inference_webui.is_half=False
830
+ inference_webui.dtype=torch.float32
831
+ main()
832
+ # test()
GPT_SoVITS/extract_hubert.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import torch
4
+ import numpy as np
5
+ import traceback
6
+ from scipy.io import wavfile
7
+ import librosa
8
+ from pathlib import Path
9
+ from time import time as ttime
10
+ import shutil
11
+ from tools.my_utils import load_audio, clean_path
12
+ from feature_extractor import cnhubert
13
+
14
+ def my_save(fea, path, i_part):
15
+ """Fix issue: torch.save doesn't support chinese path"""
16
+ dir = os.path.dirname(path)
17
+ name = os.path.basename(path)
18
+ tmp_path = f"{ttime()}{i_part}.pth"
19
+ torch.save(fea, tmp_path)
20
+ shutil.move(tmp_path, f"{dir}/{name}")
21
+
22
+ def extract_hubert_features(data_dir="data8", exp_dir="logs/s2"):
23
+ """Extract Hubert features for stage 2 training"""
24
+
25
+ # Get project root directory (parent of GPT_SoVITS)
26
+ root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
27
+
28
+ # Convert relative paths to absolute
29
+ data_dir = os.path.join(root_dir, data_dir)
30
+ exp_dir = os.path.join(root_dir, exp_dir)
31
+
32
+ # Set environment variables for Hubert extraction
33
+ inp_text = os.path.join(exp_dir, "2-name2text.txt")
34
+ inp_wav_dir = os.path.join(exp_dir, "5-wav32k")
35
+ exp_name = "s2"
36
+ i_part = "0"
37
+ all_parts = "1"
38
+ opt_dir = exp_dir
39
+ cnhubert.cnhubert_base_path = os.path.join(root_dir, "pretrained_models", "chinese-hubert-base")
40
+ is_half = torch.cuda.is_available()
41
+
42
+ print("Starting Hubert feature extraction...")
43
+ print(f"Input text file: {inp_text}")
44
+ print(f"Input wav directory: {inp_wav_dir}")
45
+ print(f"Output directory: {opt_dir}")
46
+
47
+ hubert_dir = f"{opt_dir}/4-cnhubert"
48
+ wav32dir = f"{opt_dir}/5-wav32k"
49
+ os.makedirs(opt_dir, exist_ok=True)
50
+ os.makedirs(hubert_dir, exist_ok=True)
51
+ os.makedirs(wav32dir, exist_ok=True)
52
+
53
+ maxx = 0.95
54
+ alpha = 0.5
55
+ if torch.cuda.is_available():
56
+ device = "cuda:0"
57
+ else:
58
+ device = "cpu"
59
+
60
+ print(f"Loading Hubert model from: {cnhubert.cnhubert_base_path}")
61
+ model = cnhubert.get_model()
62
+ if is_half:
63
+ model = model.half().to(device)
64
+ else:
65
+ model = model.to(device)
66
+
67
+ nan_fails = []
68
+
69
+ def name2go(wav_name, wav_path):
70
+ print(f"Processing: {wav_name} from {wav_path}")
71
+ hubert_path = f"{hubert_dir}/{wav_name}.pt"
72
+ if os.path.exists(hubert_path):
73
+ print(f"Skipping {wav_name} - already processed")
74
+ return
75
+
76
+ if not os.path.exists(wav_path):
77
+ print(f"Error: WAV file not found: {wav_path}")
78
+ return
79
+
80
+ tmp_audio = load_audio(wav_path, 32000)
81
+ if tmp_audio is None:
82
+ print(f"Error: Failed to load audio: {wav_path}")
83
+ return
84
+
85
+ tmp_max = np.abs(tmp_audio).max()
86
+ if tmp_max > 2.2:
87
+ print(f"{wav_name}-filtered,{tmp_max}")
88
+ return
89
+
90
+ tmp_audio32 = (tmp_audio / tmp_max * (maxx * alpha * 32768)) + ((1 - alpha) * 32768) * tmp_audio
91
+ tmp_audio32b = (tmp_audio / tmp_max * (maxx * alpha * 1145.14)) + ((1 - alpha) * 1145.14) * tmp_audio
92
+ tmp_audio = librosa.resample(tmp_audio32b, orig_sr=32000, target_sr=16000)
93
+
94
+ tensor_wav16 = torch.from_numpy(tmp_audio)
95
+ if is_half:
96
+ tensor_wav16 = tensor_wav16.half().to(device)
97
+ else:
98
+ tensor_wav16 = tensor_wav16.to(device)
99
+
100
+ ssl = model.model(tensor_wav16.unsqueeze(0))["last_hidden_state"].transpose(1, 2).cpu()
101
+
102
+ if np.isnan(ssl.detach().numpy()).sum() != 0:
103
+ nan_fails.append((wav_name, wav_path))
104
+ print(f"nan filtered:{wav_name}")
105
+ return
106
+
107
+ wavfile.write(
108
+ f"{wav32dir}/{wav_name}",
109
+ 32000,
110
+ tmp_audio32.astype("int16"),
111
+ )
112
+ my_save(ssl, hubert_path, i_part)
113
+ print(f"Successfully processed {wav_name}")
114
+
115
+ print(f"Reading text file: {inp_text}")
116
+ with open(inp_text, "r", encoding="utf8") as f:
117
+ lines = f.read().strip("\n").split("\n")
118
+ print(f"Found {len(lines)} lines in text file")
119
+
120
+ for line in lines[int(i_part)::int(all_parts)]:
121
+ try:
122
+ print(f"Processing line: {line}")
123
+ wav_name, text, _, _ = line.split("\t")
124
+ wav_name = clean_path(wav_name)
125
+ if inp_wav_dir:
126
+ wav_name = os.path.basename(wav_name)
127
+ wav_path = f"{inp_wav_dir}/{wav_name}"
128
+ else:
129
+ wav_path = wav_name
130
+ wav_name = os.path.basename(wav_name)
131
+ name2go(wav_name, wav_path)
132
+ except Exception as e:
133
+ print(f"Error processing line: {line}")
134
+ print(traceback.format_exc())
135
+
136
+ if len(nan_fails) > 0 and is_half:
137
+ print("Retrying failed files in float32 mode...")
138
+ is_half = False
139
+ model = model.float()
140
+ for wav in nan_fails:
141
+ try:
142
+ name2go(wav[0], wav[1])
143
+ except:
144
+ print(f"Error retrying {wav_name}")
145
+ print(traceback.format_exc())
146
+
147
+ print("Hubert feature extraction complete.")
148
+
149
+ if __name__ == "__main__":
150
+ extract_hubert_features()
GPT_SoVITS/inference_cli.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ import soundfile as sf
4
+
5
+ from tools.i18n.i18n import I18nAuto
6
+ from GPT_SoVITS.inference_webui import change_gpt_weights, change_sovits_weights, get_tts_wav
7
+
8
+ i18n = I18nAuto()
9
+
10
+ def synthesize(GPT_model_path, SoVITS_model_path, ref_audio_path, ref_text_path, ref_language, target_text_path, target_language, output_path):
11
+ # Read reference text
12
+ with open(ref_text_path, 'r', encoding='utf-8') as file:
13
+ ref_text = file.read()
14
+
15
+ # Read target text
16
+ with open(target_text_path, 'r', encoding='utf-8') as file:
17
+ target_text = file.read()
18
+
19
+ # Change model weights
20
+ change_gpt_weights(gpt_path=GPT_model_path)
21
+ change_sovits_weights(sovits_path=SoVITS_model_path)
22
+
23
+ # Synthesize audio
24
+ synthesis_result = get_tts_wav(ref_wav_path=ref_audio_path,
25
+ prompt_text=ref_text,
26
+ prompt_language=i18n(ref_language),
27
+ text=target_text,
28
+ text_language=i18n(target_language), top_p=1, temperature=1)
29
+
30
+ result_list = list(synthesis_result)
31
+
32
+ if result_list:
33
+ last_sampling_rate, last_audio_data = result_list[-1]
34
+ output_wav_path = os.path.join(output_path, "output.wav")
35
+ sf.write(output_wav_path, last_audio_data, last_sampling_rate)
36
+ print(f"Audio saved to {output_wav_path}")
37
+
38
+ def main():
39
+ parser = argparse.ArgumentParser(description="GPT-SoVITS Command Line Tool")
40
+ parser.add_argument('--gpt_model', required=True, help="Path to the GPT model file")
41
+ parser.add_argument('--sovits_model', required=True, help="Path to the SoVITS model file")
42
+ parser.add_argument('--ref_audio', required=True, help="Path to the reference audio file")
43
+ parser.add_argument('--ref_text', required=True, help="Path to the reference text file")
44
+ parser.add_argument('--ref_language', required=True, choices=["中文", "英文", "日文"], help="Language of the reference audio")
45
+ parser.add_argument('--target_text', required=True, help="Path to the target text file")
46
+ parser.add_argument('--target_language', required=True, choices=["中文", "英文", "日文", "中英混合", "日英混合", "多语种混合"], help="Language of the target text")
47
+ parser.add_argument('--output_path', required=True, help="Path to the output directory")
48
+
49
+ args = parser.parse_args()
50
+
51
+ synthesize(args.gpt_model, args.sovits_model, args.ref_audio, args.ref_text, args.ref_language, args.target_text, args.target_language, args.output_path)
52
+
53
+ if __name__ == '__main__':
54
+ main()
55
+
GPT_SoVITS/inference_gui.py ADDED
@@ -0,0 +1,310 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ from PyQt5.QtCore import QEvent
4
+ from PyQt5.QtWidgets import QApplication, QMainWindow, QLabel, QLineEdit, QPushButton, QTextEdit
5
+ from PyQt5.QtWidgets import QGridLayout, QVBoxLayout, QWidget, QFileDialog, QStatusBar, QComboBox
6
+ import soundfile as sf
7
+
8
+ from tools.i18n.i18n import I18nAuto
9
+ i18n = I18nAuto()
10
+
11
+ from inference_webui import gpt_path, sovits_path, change_gpt_weights, change_sovits_weights, get_tts_wav
12
+
13
+
14
+ class GPTSoVITSGUI(QMainWindow):
15
+ GPT_Path = gpt_path
16
+ SoVITS_Path = sovits_path
17
+
18
+ def __init__(self):
19
+ super().__init__()
20
+
21
+ self.setWindowTitle('GPT-SoVITS GUI')
22
+ self.setGeometry(800, 450, 950, 850)
23
+
24
+ self.setStyleSheet("""
25
+ QWidget {
26
+ background-color: #a3d3b1;
27
+ }
28
+
29
+ QTabWidget::pane {
30
+ background-color: #a3d3b1;
31
+ }
32
+
33
+ QTabWidget::tab-bar {
34
+ alignment: left;
35
+ }
36
+
37
+ QTabBar::tab {
38
+ background: #8da4bf;
39
+ color: #ffffff;
40
+ padding: 8px;
41
+ }
42
+
43
+ QTabBar::tab:selected {
44
+ background: #2a3f54;
45
+ }
46
+
47
+ QLabel {
48
+ color: #000000;
49
+ }
50
+
51
+ QPushButton {
52
+ background-color: #4CAF50;
53
+ color: white;
54
+ padding: 8px;
55
+ border: 1px solid #4CAF50;
56
+ border-radius: 4px;
57
+ }
58
+
59
+ QPushButton:hover {
60
+ background-color: #45a049;
61
+ border: 1px solid #45a049;
62
+ box-shadow: 2px 2px 2px rgba(0, 0, 0, 0.1);
63
+ }
64
+ """)
65
+
66
+ license_text = (
67
+ "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. "
68
+ "如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.")
69
+ license_label = QLabel(license_text)
70
+ license_label.setWordWrap(True)
71
+
72
+ self.GPT_model_label = QLabel("选择GPT模型:")
73
+ self.GPT_model_input = QLineEdit()
74
+ self.GPT_model_input.setPlaceholderText("拖拽或选择文件")
75
+ self.GPT_model_input.setText(self.GPT_Path)
76
+ self.GPT_model_input.setReadOnly(True)
77
+ self.GPT_model_button = QPushButton("选择GPT模型文件")
78
+ self.GPT_model_button.clicked.connect(self.select_GPT_model)
79
+
80
+ self.SoVITS_model_label = QLabel("选择SoVITS模型:")
81
+ self.SoVITS_model_input = QLineEdit()
82
+ self.SoVITS_model_input.setPlaceholderText("拖拽或选择文件")
83
+ self.SoVITS_model_input.setText(self.SoVITS_Path)
84
+ self.SoVITS_model_input.setReadOnly(True)
85
+ self.SoVITS_model_button = QPushButton("选择SoVITS模型文件")
86
+ self.SoVITS_model_button.clicked.connect(self.select_SoVITS_model)
87
+
88
+ self.ref_audio_label = QLabel("上传参考音频:")
89
+ self.ref_audio_input = QLineEdit()
90
+ self.ref_audio_input.setPlaceholderText("拖拽或选择文件")
91
+ self.ref_audio_input.setReadOnly(True)
92
+ self.ref_audio_button = QPushButton("选择音频文件")
93
+ self.ref_audio_button.clicked.connect(self.select_ref_audio)
94
+
95
+ self.ref_text_label = QLabel("参考音频文本:")
96
+ self.ref_text_input = QLineEdit()
97
+ self.ref_text_input.setPlaceholderText("直接输入文字或上传文本")
98
+ self.ref_text_button = QPushButton("上传文本")
99
+ self.ref_text_button.clicked.connect(self.upload_ref_text)
100
+
101
+ self.ref_language_label = QLabel("参考音频语言:")
102
+ self.ref_language_combobox = QComboBox()
103
+ self.ref_language_combobox.addItems(["中文", "英文", "日文", "中英混合", "日英混合", "多语种混合"])
104
+ self.ref_language_combobox.setCurrentText("多语种混合")
105
+
106
+ self.target_text_label = QLabel("合成目标文本:")
107
+ self.target_text_input = QLineEdit()
108
+ self.target_text_input.setPlaceholderText("直接输入文字或上传文本")
109
+ self.target_text_button = QPushButton("上传文本")
110
+ self.target_text_button.clicked.connect(self.upload_target_text)
111
+
112
+ self.target_language_label = QLabel("合成音频语言:")
113
+ self.target_language_combobox = QComboBox()
114
+ self.target_language_combobox.addItems(["中文", "英文", "日文", "中英混合", "日英混合", "多语种混合"])
115
+ self.target_language_combobox.setCurrentText("多语种混合")
116
+
117
+ self.output_label = QLabel("输出音频路径:")
118
+ self.output_input = QLineEdit()
119
+ self.output_input.setPlaceholderText("拖拽或选择文件")
120
+ self.output_input.setReadOnly(True)
121
+ self.output_button = QPushButton("选择文件夹")
122
+ self.output_button.clicked.connect(self.select_output_path)
123
+
124
+ self.output_text = QTextEdit()
125
+ self.output_text.setReadOnly(True)
126
+
127
+ self.add_drag_drop_events([
128
+ self.GPT_model_input,
129
+ self.SoVITS_model_input,
130
+ self.ref_audio_input,
131
+ self.ref_text_input,
132
+ self.target_text_input,
133
+ self.output_input,
134
+ ])
135
+
136
+ self.synthesize_button = QPushButton("合成")
137
+ self.synthesize_button.clicked.connect(self.synthesize)
138
+
139
+ self.clear_output_button = QPushButton("清空输出")
140
+ self.clear_output_button.clicked.connect(self.clear_output)
141
+
142
+ self.status_bar = QStatusBar()
143
+
144
+ main_layout = QVBoxLayout()
145
+
146
+ input_layout = QGridLayout(self)
147
+ input_layout.setSpacing(10)
148
+
149
+ input_layout.addWidget(license_label, 0, 0, 1, 3)
150
+
151
+ input_layout.addWidget(self.GPT_model_label, 1, 0)
152
+ input_layout.addWidget(self.GPT_model_input, 2, 0, 1, 2)
153
+ input_layout.addWidget(self.GPT_model_button, 2, 2)
154
+
155
+ input_layout.addWidget(self.SoVITS_model_label, 3, 0)
156
+ input_layout.addWidget(self.SoVITS_model_input, 4, 0, 1, 2)
157
+ input_layout.addWidget(self.SoVITS_model_button, 4, 2)
158
+
159
+ input_layout.addWidget(self.ref_audio_label, 5, 0)
160
+ input_layout.addWidget(self.ref_audio_input, 6, 0, 1, 2)
161
+ input_layout.addWidget(self.ref_audio_button, 6, 2)
162
+
163
+ input_layout.addWidget(self.ref_language_label, 7, 0)
164
+ input_layout.addWidget(self.ref_language_combobox, 8, 0, 1, 1)
165
+ input_layout.addWidget(self.ref_text_label, 9, 0)
166
+ input_layout.addWidget(self.ref_text_input, 10, 0, 1, 2)
167
+ input_layout.addWidget(self.ref_text_button, 10, 2)
168
+
169
+ input_layout.addWidget(self.target_language_label, 11, 0)
170
+ input_layout.addWidget(self.target_language_combobox, 12, 0, 1, 1)
171
+ input_layout.addWidget(self.target_text_label, 13, 0)
172
+ input_layout.addWidget(self.target_text_input, 14, 0, 1, 2)
173
+ input_layout.addWidget(self.target_text_button, 14, 2)
174
+
175
+ input_layout.addWidget(self.output_label, 15, 0)
176
+ input_layout.addWidget(self.output_input, 16, 0, 1, 2)
177
+ input_layout.addWidget(self.output_button, 16, 2)
178
+
179
+ main_layout.addLayout(input_layout)
180
+
181
+ output_layout = QVBoxLayout()
182
+ output_layout.addWidget(self.output_text)
183
+ main_layout.addLayout(output_layout)
184
+
185
+ main_layout.addWidget(self.synthesize_button)
186
+
187
+ main_layout.addWidget(self.clear_output_button)
188
+
189
+ main_layout.addWidget(self.status_bar)
190
+
191
+ self.central_widget = QWidget()
192
+ self.central_widget.setLayout(main_layout)
193
+ self.setCentralWidget(self.central_widget)
194
+
195
+ def dragEnterEvent(self, event):
196
+ if event.mimeData().hasUrls():
197
+ event.acceptProposedAction()
198
+
199
+ def dropEvent(self, event):
200
+ if event.mimeData().hasUrls():
201
+ file_paths = [url.toLocalFile() for url in event.mimeData().urls()]
202
+ if len(file_paths) == 1:
203
+ self.update_ref_audio(file_paths[0])
204
+ else:
205
+ self.update_ref_audio(", ".join(file_paths))
206
+
207
+ def add_drag_drop_events(self, widgets):
208
+ for widget in widgets:
209
+ widget.setAcceptDrops(True)
210
+ widget.installEventFilter(self)
211
+
212
+ def eventFilter(self, obj, event):
213
+ if event.type() in (QEvent.DragEnter, QEvent.Drop):
214
+ mime_data = event.mimeData()
215
+ if mime_data.hasUrls():
216
+ event.acceptProposedAction()
217
+
218
+ return super().eventFilter(obj, event)
219
+
220
+ def select_GPT_model(self):
221
+ file_path, _ = QFileDialog.getOpenFileName(self, "选择GPT模型文件", "", "GPT Files (*.ckpt)")
222
+ if file_path:
223
+ self.GPT_model_input.setText(file_path)
224
+
225
+ def select_SoVITS_model(self):
226
+ file_path, _ = QFileDialog.getOpenFileName(self, "选择SoVITS模型文件", "", "SoVITS Files (*.pth)")
227
+ if file_path:
228
+ self.SoVITS_model_input.setText(file_path)
229
+
230
+ def select_ref_audio(self):
231
+ file_path, _ = QFileDialog.getOpenFileName(self, "选择参考音频文件", "", "Audio Files (*.wav *.mp3)")
232
+ if file_path:
233
+ self.update_ref_audio(file_path)
234
+
235
+ def upload_ref_text(self):
236
+ file_path, _ = QFileDialog.getOpenFileName(self, "选择文本文件", "", "Text Files (*.txt)")
237
+ if file_path:
238
+ with open(file_path, 'r', encoding='utf-8') as file:
239
+ content = file.read()
240
+ self.ref_text_input.setText(content)
241
+
242
+ def upload_target_text(self):
243
+ file_path, _ = QFileDialog.getOpenFileName(self, "选择文本文件", "", "Text Files (*.txt)")
244
+ if file_path:
245
+ with open(file_path, 'r', encoding='utf-8') as file:
246
+ content = file.read()
247
+ self.target_text_input.setText(content)
248
+
249
+ def select_output_path(self):
250
+ options = QFileDialog.Options()
251
+ options |= QFileDialog.DontUseNativeDialog
252
+ options |= QFileDialog.ShowDirsOnly
253
+
254
+ folder_dialog = QFileDialog()
255
+ folder_dialog.setOptions(options)
256
+ folder_dialog.setFileMode(QFileDialog.Directory)
257
+
258
+ if folder_dialog.exec_():
259
+ folder_path = folder_dialog.selectedFiles()[0]
260
+ self.output_input.setText(folder_path)
261
+
262
+ def update_ref_audio(self, file_path):
263
+ self.ref_audio_input.setText(file_path)
264
+
265
+ def clear_output(self):
266
+ self.output_text.clear()
267
+
268
+ def synthesize(self):
269
+ GPT_model_path = self.GPT_model_input.text()
270
+ SoVITS_model_path = self.SoVITS_model_input.text()
271
+ ref_audio_path = self.ref_audio_input.text()
272
+ language_combobox = self.ref_language_combobox.currentText()
273
+ language_combobox = i18n(language_combobox)
274
+ ref_text = self.ref_text_input.text()
275
+ target_language_combobox = self.target_language_combobox.currentText()
276
+ target_language_combobox = i18n(target_language_combobox)
277
+ target_text = self.target_text_input.text()
278
+ output_path = self.output_input.text()
279
+
280
+ if GPT_model_path != self.GPT_Path:
281
+ change_gpt_weights(gpt_path=GPT_model_path)
282
+ self.GPT_Path = GPT_model_path
283
+ if SoVITS_model_path != self.SoVITS_Path:
284
+ change_sovits_weights(sovits_path=SoVITS_model_path)
285
+ self.SoVITS_Path = SoVITS_model_path
286
+
287
+ synthesis_result = get_tts_wav(ref_wav_path=ref_audio_path,
288
+ prompt_text=ref_text,
289
+ prompt_language=language_combobox,
290
+ text=target_text,
291
+ text_language=target_language_combobox)
292
+
293
+ result_list = list(synthesis_result)
294
+
295
+ if result_list:
296
+ last_sampling_rate, last_audio_data = result_list[-1]
297
+ output_wav_path = os.path.join(output_path, "output.wav")
298
+ sf.write(output_wav_path, last_audio_data, last_sampling_rate)
299
+
300
+ result = "Audio saved to " + output_wav_path
301
+
302
+ self.status_bar.showMessage("合成完成!输出路径:" + output_wav_path, 5000)
303
+ self.output_text.append("处理结果:\n" + result)
304
+
305
+
306
+ if __name__ == '__main__':
307
+ app = QApplication(sys.argv)
308
+ mainWin = GPTSoVITSGUI()
309
+ mainWin.show()
310
+ sys.exit(app.exec_())
GPT_SoVITS/inference_webui.py ADDED
@@ -0,0 +1,772 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ 按中英混合识别
3
+ 按日英混合识别
4
+ 多语种启动切分识别语种
5
+ 全部按中文识别
6
+ 全部按英文识别
7
+ 全部按日文识别
8
+ '''
9
+ import logging
10
+ import traceback
11
+
12
+ logging.getLogger("markdown_it").setLevel(logging.ERROR)
13
+ logging.getLogger("urllib3").setLevel(logging.ERROR)
14
+ logging.getLogger("httpcore").setLevel(logging.ERROR)
15
+ logging.getLogger("httpx").setLevel(logging.ERROR)
16
+ logging.getLogger("asyncio").setLevel(logging.ERROR)
17
+ logging.getLogger("charset_normalizer").setLevel(logging.ERROR)
18
+ logging.getLogger("torchaudio._extension").setLevel(logging.ERROR)
19
+ logging.getLogger("multipart.multipart").setLevel(logging.ERROR)
20
+ import LangSegment, os, re, sys, json
21
+ import pdb
22
+ import torch
23
+
24
+ try:
25
+ import gradio.analytics as analytics
26
+ analytics.version_check = lambda:None
27
+ except:...
28
+
29
+ version=os.environ.get("version","v2")
30
+ pretrained_sovits_name=["GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth", "GPT_SoVITS/pretrained_models/s2G488k.pth"]
31
+ pretrained_gpt_name=["GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt", "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"]
32
+
33
+ _ =[[],[]]
34
+ for i in range(2):
35
+ if os.path.exists(pretrained_gpt_name[i]):
36
+ _[0].append(pretrained_gpt_name[i])
37
+ if os.path.exists(pretrained_sovits_name[i]):
38
+ _[-1].append(pretrained_sovits_name[i])
39
+ pretrained_gpt_name,pretrained_sovits_name = _
40
+
41
+
42
+
43
+ if os.path.exists(f"./weight.json"):
44
+ pass
45
+ else:
46
+ with open(f"./weight.json", 'w', encoding="utf-8") as file:json.dump({'GPT':{},'SoVITS':{}},file)
47
+
48
+ with open(f"./weight.json", 'r', encoding="utf-8") as file:
49
+ weight_data = file.read()
50
+ weight_data=json.loads(weight_data)
51
+ gpt_path = os.environ.get(
52
+ "gpt_path", weight_data.get('GPT',{}).get(version,pretrained_gpt_name))
53
+ sovits_path = os.environ.get(
54
+ "sovits_path", weight_data.get('SoVITS',{}).get(version,pretrained_sovits_name))
55
+ if isinstance(gpt_path,list):
56
+ gpt_path = gpt_path[0]
57
+ if isinstance(sovits_path,list):
58
+ sovits_path = sovits_path[0]
59
+
60
+ # gpt_path = os.environ.get(
61
+ # "gpt_path", pretrained_gpt_name
62
+ # )
63
+ # sovits_path = os.environ.get("sovits_path", pretrained_sovits_name)
64
+ cnhubert_base_path = os.environ.get(
65
+ "cnhubert_base_path", "GPT_SoVITS/pretrained_models/chinese-hubert-base"
66
+ )
67
+ bert_path = os.environ.get(
68
+ "bert_path", "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large"
69
+ )
70
+ infer_ttswebui = os.environ.get("infer_ttswebui", 9872)
71
+ infer_ttswebui = int(infer_ttswebui)
72
+ is_share = os.environ.get("is_share", "False")
73
+ is_share = eval(is_share)
74
+ if "_CUDA_VISIBLE_DEVICES" in os.environ:
75
+ os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
76
+ is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
77
+ punctuation = set(['!', '?', '…', ',', '.', '-'," "])
78
+ import gradio as gr
79
+ from transformers import AutoModelForMaskedLM, AutoTokenizer
80
+ import numpy as np
81
+ import librosa
82
+ from feature_extractor import cnhubert
83
+
84
+ cnhubert.cnhubert_base_path = cnhubert_base_path
85
+
86
+ from module.models import SynthesizerTrn
87
+ from AR.models.t2s_lightning_module import Text2SemanticLightningModule
88
+ from text import cleaned_text_to_sequence
89
+ from text.cleaner import clean_text
90
+ from time import time as ttime
91
+ from module.mel_processing import spectrogram_torch
92
+ from tools.my_utils import load_audio
93
+ from tools.i18n.i18n import I18nAuto, scan_language_list
94
+
95
+ language=os.environ.get("language","Auto")
96
+ language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
97
+ i18n = I18nAuto(language=language)
98
+
99
+ # os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 确保直接启动推理UI时也能够设置。
100
+
101
+ if torch.cuda.is_available():
102
+ device = "cuda"
103
+ else:
104
+ device = "cpu"
105
+
106
+ dict_language_v1 = {
107
+ i18n("中文"): "all_zh",#全部按中文识别
108
+ i18n("英文"): "en",#全部按英文识别#######不变
109
+ i18n("日文"): "all_ja",#全部按日文识别
110
+ i18n("中英混合"): "zh",#按中英混合识别####不变
111
+ i18n("日英混合"): "ja",#按日英混合识别####不变
112
+ i18n("多语种混合"): "auto",#多语种启动切分识别语种
113
+ }
114
+ dict_language_v2 = {
115
+ i18n("中文"): "all_zh",#全部按中文识别
116
+ i18n("英文"): "en",#全部按英文识别#######不变
117
+ i18n("日文"): "all_ja",#全部按日文识别
118
+ i18n("粤语"): "all_yue",#全部按中文识别
119
+ i18n("韩文"): "all_ko",#全部按韩文识别
120
+ i18n("中英混合"): "zh",#按中英混合识别####不变
121
+ i18n("日英混合"): "ja",#按日英混合识别####不变
122
+ i18n("粤英混合"): "yue",#按粤英混合识别####不变
123
+ i18n("韩英混合"): "ko",#按韩英混合识别####不变
124
+ i18n("多语种混合"): "auto",#多语种启动切分识别语种
125
+ i18n("多语种混合(粤语)"): "auto_yue",#多语种启动切分识别语种
126
+ }
127
+ dict_language = dict_language_v1 if version =='v1' else dict_language_v2
128
+
129
+ tokenizer = AutoTokenizer.from_pretrained(bert_path)
130
+ bert_model = AutoModelForMaskedLM.from_pretrained(bert_path)
131
+ if is_half == True:
132
+ bert_model = bert_model.half().to(device)
133
+ else:
134
+ bert_model = bert_model.to(device)
135
+
136
+
137
+ def get_bert_feature(text, word2ph):
138
+ with torch.no_grad():
139
+ inputs = tokenizer(text, return_tensors="pt")
140
+ for i in inputs:
141
+ inputs[i] = inputs[i].to(device)
142
+ res = bert_model(**inputs, output_hidden_states=True)
143
+ res = torch.cat(res["hidden_states"][-3:-2], -1)[0].cpu()[1:-1]
144
+ assert len(word2ph) == len(text)
145
+ phone_level_feature = []
146
+ for i in range(len(word2ph)):
147
+ repeat_feature = res[i].repeat(word2ph[i], 1)
148
+ phone_level_feature.append(repeat_feature)
149
+ phone_level_feature = torch.cat(phone_level_feature, dim=0)
150
+ return phone_level_feature.T
151
+
152
+
153
+ class DictToAttrRecursive(dict):
154
+ def __init__(self, input_dict):
155
+ super().__init__(input_dict)
156
+ for key, value in input_dict.items():
157
+ if isinstance(value, dict):
158
+ value = DictToAttrRecursive(value)
159
+ self[key] = value
160
+ setattr(self, key, value)
161
+
162
+ def __getattr__(self, item):
163
+ try:
164
+ return self[item]
165
+ except KeyError:
166
+ raise AttributeError(f"Attribute {item} not found")
167
+
168
+ def __setattr__(self, key, value):
169
+ if isinstance(value, dict):
170
+ value = DictToAttrRecursive(value)
171
+ super(DictToAttrRecursive, self).__setitem__(key, value)
172
+ super().__setattr__(key, value)
173
+
174
+ def __delattr__(self, item):
175
+ try:
176
+ del self[item]
177
+ except KeyError:
178
+ raise AttributeError(f"Attribute {item} not found")
179
+
180
+
181
+ ssl_model = cnhubert.get_model()
182
+ if is_half == True:
183
+ ssl_model = ssl_model.half().to(device)
184
+ else:
185
+ ssl_model = ssl_model.to(device)
186
+
187
+
188
+ def change_sovits_weights(sovits_path,prompt_language=None,text_language=None):
189
+ global vq_model, hps, version, dict_language
190
+ dict_s2 = torch.load(sovits_path, map_location="cpu")
191
+ hps = dict_s2["config"]
192
+ hps = DictToAttrRecursive(hps)
193
+ hps.model.semantic_frame_rate = "25hz"
194
+ if dict_s2['weight']['enc_p.text_embedding.weight'].shape[0] == 322:
195
+ hps.model.version = "v1"
196
+ else:
197
+ hps.model.version = "v2"
198
+ version = hps.model.version
199
+ # print("sovits版本:",hps.model.version)
200
+ vq_model = SynthesizerTrn(
201
+ hps.data.filter_length // 2 + 1,
202
+ hps.train.segment_size // hps.data.hop_length,
203
+ n_speakers=hps.data.n_speakers,
204
+ **hps.model
205
+ )
206
+ if ("pretrained" not in sovits_path):
207
+ del vq_model.enc_q
208
+ if is_half == True:
209
+ vq_model = vq_model.half().to(device)
210
+ else:
211
+ vq_model = vq_model.to(device)
212
+ vq_model.eval()
213
+ print(vq_model.load_state_dict(dict_s2["weight"], strict=False))
214
+ dict_language = dict_language_v1 if version =='v1' else dict_language_v2
215
+ with open("./weight.json")as f:
216
+ data=f.read()
217
+ data=json.loads(data)
218
+ data["SoVITS"][version]=sovits_path
219
+ with open("./weight.json","w")as f:f.write(json.dumps(data))
220
+ if prompt_language is not None and text_language is not None:
221
+ if prompt_language in list(dict_language.keys()):
222
+ prompt_text_update, prompt_language_update = {'__type__':'update'}, {'__type__':'update', 'value':prompt_language}
223
+ else:
224
+ prompt_text_update = {'__type__':'update', 'value':''}
225
+ prompt_language_update = {'__type__':'update', 'value':i18n("中文")}
226
+ if text_language in list(dict_language.keys()):
227
+ text_update, text_language_update = {'__type__':'update'}, {'__type__':'update', 'value':text_language}
228
+ else:
229
+ text_update = {'__type__':'update', 'value':''}
230
+ text_language_update = {'__type__':'update', 'value':i18n("中文")}
231
+ return {'__type__':'update', 'choices':list(dict_language.keys())}, {'__type__':'update', 'choices':list(dict_language.keys())}, prompt_text_update, prompt_language_update, text_update, text_language_update
232
+
233
+
234
+
235
+ change_sovits_weights(sovits_path)
236
+
237
+
238
+ def change_gpt_weights(gpt_path):
239
+ global hz, max_sec, t2s_model, config
240
+ hz = 50
241
+ dict_s1 = torch.load(gpt_path, map_location="cpu")
242
+ config = dict_s1["config"]
243
+ max_sec = config["data"]["max_sec"]
244
+ t2s_model = Text2SemanticLightningModule(config, "****", is_train=False)
245
+ t2s_model.load_state_dict(dict_s1["weight"])
246
+ if is_half == True:
247
+ t2s_model = t2s_model.half()
248
+ t2s_model = t2s_model.to(device)
249
+ t2s_model.eval()
250
+ total = sum([param.nelement() for param in t2s_model.parameters()])
251
+ print("Number of parameter: %.2fM" % (total / 1e6))
252
+ with open("./weight.json")as f:
253
+ data=f.read()
254
+ data=json.loads(data)
255
+ data["GPT"][version]=gpt_path
256
+ with open("./weight.json","w")as f:f.write(json.dumps(data))
257
+
258
+
259
+ change_gpt_weights(gpt_path)
260
+
261
+
262
+ def get_spepc(hps, filename):
263
+ audio = load_audio(filename, int(hps.data.sampling_rate))
264
+ audio = torch.FloatTensor(audio)
265
+ maxx=audio.abs().max()
266
+ if(maxx>1):audio/=min(2,maxx)
267
+ audio_norm = audio
268
+ audio_norm = audio_norm.unsqueeze(0)
269
+ spec = spectrogram_torch(
270
+ audio_norm,
271
+ hps.data.filter_length,
272
+ hps.data.sampling_rate,
273
+ hps.data.hop_length,
274
+ hps.data.win_length,
275
+ center=False,
276
+ )
277
+ return spec
278
+
279
+ def clean_text_inf(text, language, version):
280
+ phones, word2ph, norm_text = clean_text(text, language, version)
281
+ phones = cleaned_text_to_sequence(phones, version)
282
+ return phones, word2ph, norm_text
283
+
284
+ dtype=torch.float16 if is_half == True else torch.float32
285
+ def get_bert_inf(phones, word2ph, norm_text, language):
286
+ language=language.replace("all_","")
287
+ if language == "zh":
288
+ bert = get_bert_feature(norm_text, word2ph).to(device)#.to(dtype)
289
+ else:
290
+ bert = torch.zeros(
291
+ (1024, len(phones)),
292
+ dtype=torch.float16 if is_half == True else torch.float32,
293
+ ).to(device)
294
+
295
+ return bert
296
+
297
+
298
+ splits = {",", "。", "?", "!", ",", ".", "?", "!", "~", ":", ":", "—", "…", }
299
+
300
+
301
+ def get_first(text):
302
+ pattern = "[" + "".join(re.escape(sep) for sep in splits) + "]"
303
+ text = re.split(pattern, text)[0].strip()
304
+ return text
305
+
306
+ from text import chinese
307
+ def get_phones_and_bert(text,language,version,final=False):
308
+ if language in {"en", "all_zh", "all_ja", "all_ko", "all_yue"}:
309
+ language = language.replace("all_","")
310
+ if language == "en":
311
+ LangSegment.setfilters(["en"])
312
+ formattext = " ".join(tmp["text"] for tmp in LangSegment.getTexts(text))
313
+ else:
314
+ # 因无法区别中日韩文汉字,以用户输入为准
315
+ formattext = text
316
+ while " " in formattext:
317
+ formattext = formattext.replace(" ", " ")
318
+ if language == "zh":
319
+ if re.search(r'[A-Za-z]', formattext):
320
+ formattext = re.sub(r'[a-z]', lambda x: x.group(0).upper(), formattext)
321
+ formattext = chinese.mix_text_normalize(formattext)
322
+ return get_phones_and_bert(formattext,"zh",version)
323
+ else:
324
+ phones, word2ph, norm_text = clean_text_inf(formattext, language, version)
325
+ bert = get_bert_feature(norm_text, word2ph).to(device)
326
+ elif language == "yue" and re.search(r'[A-Za-z]', formattext):
327
+ formattext = re.sub(r'[a-z]', lambda x: x.group(0).upper(), formattext)
328
+ formattext = chinese.mix_text_normalize(formattext)
329
+ return get_phones_and_bert(formattext,"yue",version)
330
+ else:
331
+ phones, word2ph, norm_text = clean_text_inf(formattext, language, version)
332
+ bert = torch.zeros(
333
+ (1024, len(phones)),
334
+ dtype=torch.float16 if is_half == True else torch.float32,
335
+ ).to(device)
336
+ elif language in {"zh", "ja", "ko", "yue", "auto", "auto_yue"}:
337
+ textlist=[]
338
+ langlist=[]
339
+ LangSegment.setfilters(["zh","ja","en","ko"])
340
+ if language == "auto":
341
+ for tmp in LangSegment.getTexts(text):
342
+ langlist.append(tmp["lang"])
343
+ textlist.append(tmp["text"])
344
+ elif language == "auto_yue":
345
+ for tmp in LangSegment.getTexts(text):
346
+ if tmp["lang"] == "zh":
347
+ tmp["lang"] = "yue"
348
+ langlist.append(tmp["lang"])
349
+ textlist.append(tmp["text"])
350
+ else:
351
+ for tmp in LangSegment.getTexts(text):
352
+ if tmp["lang"] == "en":
353
+ langlist.append(tmp["lang"])
354
+ else:
355
+ # 因无法区别中日韩文汉字,以用户输入为准
356
+ langlist.append(language)
357
+ textlist.append(tmp["text"])
358
+ print(textlist)
359
+ print(langlist)
360
+ phones_list = []
361
+ bert_list = []
362
+ norm_text_list = []
363
+ for i in range(len(textlist)):
364
+ lang = langlist[i]
365
+ phones, word2ph, norm_text = clean_text_inf(textlist[i], lang, version)
366
+ bert = get_bert_inf(phones, word2ph, norm_text, lang)
367
+ phones_list.append(phones)
368
+ norm_text_list.append(norm_text)
369
+ bert_list.append(bert)
370
+ bert = torch.cat(bert_list, dim=1)
371
+ phones = sum(phones_list, [])
372
+ norm_text = ''.join(norm_text_list)
373
+
374
+ if not final and len(phones) < 6:
375
+ return get_phones_and_bert("." + text,language,version,final=True)
376
+
377
+ return phones,bert.to(dtype),norm_text
378
+
379
+
380
+ def merge_short_text_in_array(texts, threshold):
381
+ if (len(texts)) < 2:
382
+ return texts
383
+ result = []
384
+ text = ""
385
+ for ele in texts:
386
+ text += ele
387
+ if len(text) >= threshold:
388
+ result.append(text)
389
+ text = ""
390
+ if (len(text) > 0):
391
+ if len(result) == 0:
392
+ result.append(text)
393
+ else:
394
+ result[len(result) - 1] += text
395
+ return result
396
+
397
+ ##ref_wav_path+prompt_text+prompt_language+text(单个)+text_language+top_k+top_p+temperature
398
+ # cache_tokens={}#暂未实现清理机制
399
+ cache= {}
400
+ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, how_to_cut=i18n("不切"), top_k=20, top_p=0.6, temperature=0.6, ref_free
401
+ =False,speed=1,if_freeze=False,inp_refs=None):
402
+ global cache
403
+ if ref_wav_path:pass
404
+ else:gr.Warning(i18n('请上传参考音频'))
405
+ if text:pass
406
+ else:gr.Warning(i18n('请填入推理文本'))
407
+ t = []
408
+ if prompt_text is None or len(prompt_text) == 0:
409
+ ref_free = True
410
+ t0 = ttime()
411
+ prompt_language = dict_language[prompt_language]
412
+ text_language = dict_language[text_language]
413
+
414
+
415
+ if not ref_free:
416
+ prompt_text = prompt_text.strip("\n")
417
+ if (prompt_text[-1] not in splits): prompt_text += "。" if prompt_language != "en" else "."
418
+ print(i18n("实际输入的参考文本:"), prompt_text)
419
+ text = text.strip("\n")
420
+ # if (text[0] not in splits and len(get_first(text)) < 4): text = "。" + text if text_language != "en" else "." + text
421
+
422
+ print(i18n("实际输入的目标文本:"), text)
423
+ zero_wav = np.zeros(
424
+ int(hps.data.sampling_rate * 0.3),
425
+ dtype=np.float16 if is_half == True else np.float32,
426
+ )
427
+ if not ref_free:
428
+ with torch.no_grad():
429
+ wav16k, sr = librosa.load(ref_wav_path, sr=16000)
430
+ if (wav16k.shape[0] > 160000 or wav16k.shape[0] < 48000):
431
+ gr.Warning(i18n("参考音频在3~10秒范围外,请更换!"))
432
+ raise OSError(i18n("参考音频在3~10秒范围外,请更换!"))
433
+ wav16k = torch.from_numpy(wav16k)
434
+ zero_wav_torch = torch.from_numpy(zero_wav)
435
+ if is_half == True:
436
+ wav16k = wav16k.half().to(device)
437
+ zero_wav_torch = zero_wav_torch.half().to(device)
438
+ else:
439
+ wav16k = wav16k.to(device)
440
+ zero_wav_torch = zero_wav_torch.to(device)
441
+ wav16k = torch.cat([wav16k, zero_wav_torch])
442
+ ssl_content = ssl_model.model(wav16k.unsqueeze(0))[
443
+ "last_hidden_state"
444
+ ].transpose(
445
+ 1, 2
446
+ ) # .float()
447
+ codes = vq_model.extract_latent(ssl_content)
448
+ prompt_semantic = codes[0, 0]
449
+ prompt = prompt_semantic.unsqueeze(0).to(device)
450
+
451
+ t1 = ttime()
452
+ t.append(t1-t0)
453
+
454
+ if (how_to_cut == i18n("凑四句一切")):
455
+ text = cut1(text)
456
+ elif (how_to_cut == i18n("凑50字一切")):
457
+ text = cut2(text)
458
+ elif (how_to_cut == i18n("按中文句号。切")):
459
+ text = cut3(text)
460
+ elif (how_to_cut == i18n("按英文句号.切")):
461
+ text = cut4(text)
462
+ elif (how_to_cut == i18n("按标点符号切")):
463
+ text = cut5(text)
464
+ while "\n\n" in text:
465
+ text = text.replace("\n\n", "\n")
466
+ print(i18n("实际输入的目标文本(切句后):"), text)
467
+ texts = text.split("\n")
468
+ texts = process_text(texts)
469
+ texts = merge_short_text_in_array(texts, 5)
470
+ audio_opt = []
471
+ if not ref_free:
472
+ phones1,bert1,norm_text1=get_phones_and_bert(prompt_text, prompt_language, version)
473
+
474
+ for i_text,text in enumerate(texts):
475
+ # 解决输入目标文本的空行导致报错的问题
476
+ if (len(text.strip()) == 0):
477
+ continue
478
+ if (text[-1] not in splits): text += "。" if text_language != "en" else "."
479
+ print(i18n("实际输入的目标文本(每句):"), text)
480
+ phones2,bert2,norm_text2=get_phones_and_bert(text, text_language, version)
481
+ print(i18n("前端处理后的文本(每句):"), norm_text2)
482
+ if not ref_free:
483
+ bert = torch.cat([bert1, bert2], 1)
484
+ all_phoneme_ids = torch.LongTensor(phones1+phones2).to(device).unsqueeze(0)
485
+ else:
486
+ bert = bert2
487
+ all_phoneme_ids = torch.LongTensor(phones2).to(device).unsqueeze(0)
488
+
489
+ bert = bert.to(device).unsqueeze(0)
490
+ all_phoneme_len = torch.tensor([all_phoneme_ids.shape[-1]]).to(device)
491
+
492
+ t2 = ttime()
493
+ # cache_key="%s-%s-%s-%s-%s-%s-%s-%s"%(ref_wav_path,prompt_text,prompt_language,text,text_language,top_k,top_p,temperature)
494
+ # print(cache.keys(),if_freeze)
495
+ if(i_text in cache and if_freeze==True):pred_semantic=cache[i_text]
496
+ else:
497
+ with torch.no_grad():
498
+ pred_semantic, idx = t2s_model.model.infer_panel(
499
+ all_phoneme_ids,
500
+ all_phoneme_len,
501
+ None if ref_free else prompt,
502
+ bert,
503
+ # prompt_phone_len=ph_offset,
504
+ top_k=top_k,
505
+ top_p=top_p,
506
+ temperature=temperature,
507
+ early_stop_num=hz * max_sec,
508
+ )
509
+ pred_semantic = pred_semantic[:, -idx:].unsqueeze(0)
510
+ cache[i_text]=pred_semantic
511
+ t3 = ttime()
512
+ refers=[]
513
+ if(inp_refs):
514
+ for path in inp_refs:
515
+ try:
516
+ refer = get_spepc(hps, path.name).to(dtype).to(device)
517
+ refers.append(refer)
518
+ except:
519
+ traceback.print_exc()
520
+ if(len(refers)==0):refers = [get_spepc(hps, ref_wav_path).to(dtype).to(device)]
521
+ audio = (vq_model.decode(pred_semantic, torch.LongTensor(phones2).to(device).unsqueeze(0), refers,speed=speed).detach().cpu().numpy()[0, 0])
522
+ max_audio=np.abs(audio).max()#简单防止16bit爆音
523
+ if max_audio>1:audio/=max_audio
524
+ audio_opt.append(audio)
525
+ audio_opt.append(zero_wav)
526
+ t4 = ttime()
527
+ t.extend([t2 - t1,t3 - t2, t4 - t3])
528
+ t1 = ttime()
529
+ print("%.3f\t%.3f\t%.3f\t%.3f" %
530
+ (t[0], sum(t[1::3]), sum(t[2::3]), sum(t[3::3]))
531
+ )
532
+ yield hps.data.sampling_rate, (np.concatenate(audio_opt, 0) * 32768).astype(
533
+ np.int16
534
+ )
535
+
536
+
537
+ def split(todo_text):
538
+ todo_text = todo_text.replace("……", "。").replace("——", ",")
539
+ if todo_text[-1] not in splits:
540
+ todo_text += "。"
541
+ i_split_head = i_split_tail = 0
542
+ len_text = len(todo_text)
543
+ todo_texts = []
544
+ while 1:
545
+ if i_split_head >= len_text:
546
+ break # 结尾一定有标点,所以直接跳出即可,最后一段在上次已加入
547
+ if todo_text[i_split_head] in splits:
548
+ i_split_head += 1
549
+ todo_texts.append(todo_text[i_split_tail:i_split_head])
550
+ i_split_tail = i_split_head
551
+ else:
552
+ i_split_head += 1
553
+ return todo_texts
554
+
555
+
556
+ def cut1(inp):
557
+ inp = inp.strip("\n")
558
+ inps = split(inp)
559
+ split_idx = list(range(0, len(inps), 4))
560
+ split_idx[-1] = None
561
+ if len(split_idx) > 1:
562
+ opts = []
563
+ for idx in range(len(split_idx) - 1):
564
+ opts.append("".join(inps[split_idx[idx]: split_idx[idx + 1]]))
565
+ else:
566
+ opts = [inp]
567
+ opts = [item for item in opts if not set(item).issubset(punctuation)]
568
+ return "\n".join(opts)
569
+
570
+
571
+ def cut2(inp):
572
+ inp = inp.strip("\n")
573
+ inps = split(inp)
574
+ if len(inps) < 2:
575
+ return inp
576
+ opts = []
577
+ summ = 0
578
+ tmp_str = ""
579
+ for i in range(len(inps)):
580
+ summ += len(inps[i])
581
+ tmp_str += inps[i]
582
+ if summ > 50:
583
+ summ = 0
584
+ opts.append(tmp_str)
585
+ tmp_str = ""
586
+ if tmp_str != "":
587
+ opts.append(tmp_str)
588
+ # print(opts)
589
+ if len(opts) > 1 and len(opts[-1]) < 50: ##如果最后一个太短了,和前一个合一起
590
+ opts[-2] = opts[-2] + opts[-1]
591
+ opts = opts[:-1]
592
+ opts = [item for item in opts if not set(item).issubset(punctuation)]
593
+ return "\n".join(opts)
594
+
595
+
596
+ def cut3(inp):
597
+ inp = inp.strip("\n")
598
+ opts = ["%s" % item for item in inp.strip("。").split("。")]
599
+ opts = [item for item in opts if not set(item).issubset(punctuation)]
600
+ return "\n".join(opts)
601
+
602
+ def cut4(inp):
603
+ inp = inp.strip("\n")
604
+ opts = ["%s" % item for item in inp.strip(".").split(".")]
605
+ opts = [item for item in opts if not set(item).issubset(punctuation)]
606
+ return "\n".join(opts)
607
+
608
+
609
+ # contributed by https://github.com/AI-Hobbyist/GPT-SoVITS/blob/main/GPT_SoVITS/inference_webui.py
610
+ def cut5(inp):
611
+ inp = inp.strip("\n")
612
+ punds = {',', '.', ';', '?', '!', '、', ',', '。', '?', '!', ';', ':', '…'}
613
+ mergeitems = []
614
+ items = []
615
+
616
+ for i, char in enumerate(inp):
617
+ if char in punds:
618
+ if char == '.' and i > 0 and i < len(inp) - 1 and inp[i - 1].isdigit() and inp[i + 1].isdigit():
619
+ items.append(char)
620
+ else:
621
+ items.append(char)
622
+ mergeitems.append("".join(items))
623
+ items = []
624
+ else:
625
+ items.append(char)
626
+
627
+ if items:
628
+ mergeitems.append("".join(items))
629
+
630
+ opt = [item for item in mergeitems if not set(item).issubset(punds)]
631
+ return "\n".join(opt)
632
+
633
+
634
+ def custom_sort_key(s):
635
+ # 使用正则表达式提取字符串中的数字部分和非数字部分
636
+ parts = re.split('(\d+)', s)
637
+ # 将数字部分转换为整数,非数字部分保持不变
638
+ parts = [int(part) if part.isdigit() else part for part in parts]
639
+ return parts
640
+
641
+ def process_text(texts):
642
+ _text=[]
643
+ if all(text in [None, " ", "\n",""] for text in texts):
644
+ raise ValueError(i18n("请输入有效文本"))
645
+ for text in texts:
646
+ if text in [None, " ", ""]:
647
+ pass
648
+ else:
649
+ _text.append(text)
650
+ return _text
651
+
652
+
653
+ def change_choices():
654
+ SoVITS_names, GPT_names = get_weights_names(GPT_weight_root, SoVITS_weight_root)
655
+ return {"choices": sorted(SoVITS_names, key=custom_sort_key), "__type__": "update"}, {"choices": sorted(GPT_names, key=custom_sort_key), "__type__": "update"}
656
+
657
+
658
+ SoVITS_weight_root=["SoVITS_weights_v2","SoVITS_weights"]
659
+ GPT_weight_root=["GPT_weights_v2","GPT_weights"]
660
+ for path in SoVITS_weight_root+GPT_weight_root:
661
+ os.makedirs(path,exist_ok=True)
662
+
663
+
664
+ def get_weights_names(GPT_weight_root, SoVITS_weight_root):
665
+ SoVITS_names = [i for i in pretrained_sovits_name]
666
+ for path in SoVITS_weight_root:
667
+ for name in os.listdir(path):
668
+ if name.endswith(".pth"): SoVITS_names.append("%s/%s" % (path, name))
669
+ GPT_names = [i for i in pretrained_gpt_name]
670
+ for path in GPT_weight_root:
671
+ for name in os.listdir(path):
672
+ if name.endswith(".ckpt"): GPT_names.append("%s/%s" % (path, name))
673
+ return SoVITS_names, GPT_names
674
+
675
+
676
+ SoVITS_names, GPT_names = get_weights_names(GPT_weight_root, SoVITS_weight_root)
677
+
678
+ def html_center(text, label='p'):
679
+ return f"""<div style="text-align: center; margin: 100; padding: 50;">
680
+ <{label} style="margin: 0; padding: 0;">{text}</{label}>
681
+ </div>"""
682
+
683
+ def html_left(text, label='p'):
684
+ return f"""<div style="text-align: left; margin: 0; padding: 0;">
685
+ <{label} style="margin: 0; padding: 0;">{text}</{label}>
686
+ </div>"""
687
+
688
+
689
+ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
690
+ gr.Markdown(
691
+ value=i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>LICENSE</b>.")
692
+ )
693
+ with gr.Group():
694
+ gr.Markdown(html_center(i18n("模型切换"),'h3'))
695
+ with gr.Row():
696
+ GPT_dropdown = gr.Dropdown(label=i18n("GPT模型列表"), choices=sorted(GPT_names, key=custom_sort_key), value=gpt_path, interactive=True, scale=14)
697
+ SoVITS_dropdown = gr.Dropdown(label=i18n("SoVITS模型列表"), choices=sorted(SoVITS_names, key=custom_sort_key), value=sovits_path, interactive=True, scale=14)
698
+ refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary", scale=14)
699
+ refresh_button.click(fn=change_choices, inputs=[], outputs=[SoVITS_dropdown, GPT_dropdown])
700
+ gr.Markdown(html_center(i18n("*请上传并填写参考信息"),'h3'))
701
+ with gr.Row():
702
+ inp_ref = gr.Audio(label=i18n("请上传3~10秒内参考音频,超过会报错!"), type="filepath", scale=13)
703
+ with gr.Column(scale=13):
704
+ ref_text_free = gr.Checkbox(label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。"), value=False, interactive=True, show_label=True,scale=1)
705
+ gr.Markdown(html_left(i18n("使用无参考文本模式时建议使用微调的GPT,听不清参考音频说的啥(不晓得写啥)可以开。<br>开启后无视填写的参考文本。")))
706
+ prompt_text = gr.Textbox(label=i18n("参考音频的文本"), value="", lines=5, max_lines=5,scale=1)
707
+ with gr.Column(scale=14):
708
+ prompt_language = gr.Dropdown(
709
+ label=i18n("参考音频的语种"), choices=list(dict_language.keys()), value=i18n("中文"),
710
+ )
711
+ inp_refs = gr.File(label=i18n("可选项:通过拖拽多个文件上传多个参考音频(建议同性),平均融合他们的音色。如不填写此项,音色由左侧单个参考音频控制。如是微调模型,建议参考音频全部在微调训练集音色内,底模不用管。"),file_count="multiple")
712
+ gr.Markdown(html_center(i18n("*请填写需要合成的目标文本和语种模式"),'h3'))
713
+ with gr.Row():
714
+ with gr.Column(scale=13):
715
+ text = gr.Textbox(label=i18n("需要合成的文本"), value="", lines=26, max_lines=26)
716
+ with gr.Column(scale=7):
717
+ text_language = gr.Dropdown(
718
+ label=i18n("需要合成的语种")+i18n(".限制范围越小判别效果越好。"), choices=list(dict_language.keys()), value=i18n("中文"), scale=1
719
+ )
720
+ how_to_cut = gr.Dropdown(
721
+ label=i18n("怎么切"),
722
+ choices=[i18n("不切"), i18n("凑四句一切"), i18n("凑50字一切"), i18n("按中文句号。切"), i18n("按英文句号.切"), i18n("按标点符号切"), ],
723
+ value=i18n("凑四句一切"),
724
+ interactive=True, scale=1
725
+ )
726
+ gr.Markdown(value=html_center(i18n("语速调整,高为更快")))
727
+ if_freeze=gr.Checkbox(label=i18n("是否直接对上次合成结果调整语速和音色。防止随机性。"), value=False, interactive=True,show_label=True, scale=1)
728
+ speed = gr.Slider(minimum=0.6,maximum=1.65,step=0.05,label=i18n("语速"),value=1,interactive=True, scale=1)
729
+ gr.Markdown(html_center(i18n("GPT采样参数(无参考文本时不要太低。不懂就用默认):")))
730
+ top_k = gr.Slider(minimum=1,maximum=100,step=1,label=i18n("top_k"),value=15,interactive=True, scale=1)
731
+ top_p = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("top_p"),value=1,interactive=True, scale=1)
732
+ temperature = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("temperature"),value=1,interactive=True, scale=1)
733
+ # with gr.Column():
734
+ # gr.Markdown(value=i18n("手工调整音素。当音素框不为空时使用手工音素输入推理,无视目标文本框。"))
735
+ # phoneme=gr.Textbox(label=i18n("音素框"), value="")
736
+ # get_phoneme_button = gr.Button(i18n("目标文本转音素"), variant="primary")
737
+ with gr.Row():
738
+ inference_button = gr.Button(i18n("合成语音"), variant="primary", size='lg', scale=25)
739
+ output = gr.Audio(label=i18n("输出的语音"), scale=14)
740
+
741
+ inference_button.click(
742
+ get_tts_wav,
743
+ [inp_ref, prompt_text, prompt_language, text, text_language, how_to_cut, top_k, top_p, temperature, ref_text_free,speed,if_freeze,inp_refs],
744
+ [output],
745
+ )
746
+ SoVITS_dropdown.change(change_sovits_weights, [SoVITS_dropdown,prompt_language,text_language], [prompt_language,text_language,prompt_text,prompt_language,text,text_language])
747
+ GPT_dropdown.change(change_gpt_weights, [GPT_dropdown], [])
748
+
749
+ # gr.Markdown(value=i18n("文本切分工具。太长的文本合成出来效果不一定好,所以太长建议先切。合成会根据文本的换行分开合成再拼起来。"))
750
+ # with gr.Row():
751
+ # text_inp = gr.Textbox(label=i18n("需要合成的切分前文本"), value="")
752
+ # button1 = gr.Button(i18n("凑四句一切"), variant="primary")
753
+ # button2 = gr.Button(i18n("凑50字一切"), variant="primary")
754
+ # button3 = gr.Button(i18n("按中文句号。切"), variant="primary")
755
+ # button4 = gr.Button(i18n("按英文句号.切"), variant="primary")
756
+ # button5 = gr.Button(i18n("按标点符号切"), variant="primary")
757
+ # text_opt = gr.Textbox(label=i18n("切分后文本"), value="")
758
+ # button1.click(cut1, [text_inp], [text_opt])
759
+ # button2.click(cut2, [text_inp], [text_opt])
760
+ # button3.click(cut3, [text_inp], [text_opt])
761
+ # button4.click(cut4, [text_inp], [text_opt])
762
+ # button5.click(cut5, [text_inp], [text_opt])
763
+ # gr.Markdown(html_center(i18n("后续将支持转音素、手工修改音素、语音合成分步执行。")))
764
+
765
+ if __name__ == '__main__':
766
+ app.queue().launch(#concurrency_count=511, max_size=1022
767
+ server_name="0.0.0.0",
768
+ inbrowser=True,
769
+ share=is_share,
770
+ server_port=infer_ttswebui,
771
+ quiet=True,
772
+ )
GPT_SoVITS/inference_webui_fast.py ADDED
@@ -0,0 +1,336 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ 按中英混合识别
3
+ 按日英混合识别
4
+ 多语种启动切分识别语种
5
+ 全部按中文识别
6
+ 全部按英文识别
7
+ 全部按日文识别
8
+ '''
9
+ import random
10
+ import os, re, logging
11
+ import sys
12
+ now_dir = os.getcwd()
13
+ sys.path.append(now_dir)
14
+ sys.path.append("%s/GPT_SoVITS" % (now_dir))
15
+
16
+ logging.getLogger("markdown_it").setLevel(logging.ERROR)
17
+ logging.getLogger("urllib3").setLevel(logging.ERROR)
18
+ logging.getLogger("httpcore").setLevel(logging.ERROR)
19
+ logging.getLogger("httpx").setLevel(logging.ERROR)
20
+ logging.getLogger("asyncio").setLevel(logging.ERROR)
21
+ logging.getLogger("charset_normalizer").setLevel(logging.ERROR)
22
+ logging.getLogger("torchaudio._extension").setLevel(logging.ERROR)
23
+ import pdb
24
+ import torch
25
+
26
+ try:
27
+ import gradio.analytics as analytics
28
+ analytics.version_check = lambda:None
29
+ except:...
30
+
31
+
32
+ infer_ttswebui = os.environ.get("infer_ttswebui", 9872)
33
+ infer_ttswebui = int(infer_ttswebui)
34
+ is_share = os.environ.get("is_share", "False")
35
+ is_share = eval(is_share)
36
+ if "_CUDA_VISIBLE_DEVICES" in os.environ:
37
+ os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
38
+
39
+ is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
40
+ gpt_path = os.environ.get("gpt_path", None)
41
+ sovits_path = os.environ.get("sovits_path", None)
42
+ cnhubert_base_path = os.environ.get("cnhubert_base_path", None)
43
+ bert_path = os.environ.get("bert_path", None)
44
+ version=os.environ.get("version","v2")
45
+
46
+ import gradio as gr
47
+ from TTS_infer_pack.TTS import TTS, TTS_Config
48
+ from TTS_infer_pack.text_segmentation_method import get_method
49
+ from tools.i18n.i18n import I18nAuto, scan_language_list
50
+
51
+ language=os.environ.get("language","Auto")
52
+ language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
53
+ i18n = I18nAuto(language=language)
54
+
55
+
56
+ # os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 确保直接启动推理UI时也能够设置。
57
+
58
+ if torch.cuda.is_available():
59
+ device = "cuda"
60
+ # elif torch.backends.mps.is_available():
61
+ # device = "mps"
62
+ else:
63
+ device = "cpu"
64
+
65
+ dict_language_v1 = {
66
+ i18n("中文"): "all_zh",#全部按中文识别
67
+ i18n("英文"): "en",#全部按英文识别#######不变
68
+ i18n("日文"): "all_ja",#全部按日文识别
69
+ i18n("中英混合"): "zh",#按中英混合识别####不变
70
+ i18n("日英混合"): "ja",#按日英混合识别####不变
71
+ i18n("多语种混合"): "auto",#多语种启动切分识别语种
72
+ }
73
+ dict_language_v2 = {
74
+ i18n("中文"): "all_zh",#全部按中文识别
75
+ i18n("英文"): "en",#全部按英文识别#######不变
76
+ i18n("日文"): "all_ja",#全部按日文识别
77
+ i18n("粤语"): "all_yue",#全部按中文识别
78
+ i18n("韩文"): "all_ko",#全部按韩文识别
79
+ i18n("中英混合"): "zh",#按中英混合识别####不变
80
+ i18n("日英混合"): "ja",#按日英混合识别####不变
81
+ i18n("粤英混合"): "yue",#按粤英混合识别####不变
82
+ i18n("韩英混合"): "ko",#按韩英混合识别####不变
83
+ i18n("多语种混合"): "auto",#多语种启动切分识别语种
84
+ i18n("多语种混合(粤语)"): "auto_yue",#多语种启动切分识别语种
85
+ }
86
+ dict_language = dict_language_v1 if version =='v1' else dict_language_v2
87
+
88
+ cut_method = {
89
+ i18n("不切"):"cut0",
90
+ i18n("凑四句一切"): "cut1",
91
+ i18n("凑50字一切"): "cut2",
92
+ i18n("按中文句号。切"): "cut3",
93
+ i18n("按英文句号.切"): "cut4",
94
+ i18n("按标点符号切"): "cut5",
95
+ }
96
+
97
+ tts_config = TTS_Config("GPT_SoVITS/configs/tts_infer.yaml")
98
+ tts_config.device = device
99
+ tts_config.is_half = is_half
100
+ tts_config.version = version
101
+ if gpt_path is not None:
102
+ tts_config.t2s_weights_path = gpt_path
103
+ if sovits_path is not None:
104
+ tts_config.vits_weights_path = sovits_path
105
+ if cnhubert_base_path is not None:
106
+ tts_config.cnhuhbert_base_path = cnhubert_base_path
107
+ if bert_path is not None:
108
+ tts_config.bert_base_path = bert_path
109
+
110
+ print(tts_config)
111
+ tts_pipeline = TTS(tts_config)
112
+ gpt_path = tts_config.t2s_weights_path
113
+ sovits_path = tts_config.vits_weights_path
114
+ version = tts_config.version
115
+
116
+ def inference(text, text_lang,
117
+ ref_audio_path,
118
+ aux_ref_audio_paths,
119
+ prompt_text,
120
+ prompt_lang, top_k,
121
+ top_p, temperature,
122
+ text_split_method, batch_size,
123
+ speed_factor, ref_text_free,
124
+ split_bucket,fragment_interval,
125
+ seed, keep_random, parallel_infer,
126
+ repetition_penalty
127
+ ):
128
+
129
+ seed = -1 if keep_random else seed
130
+ actual_seed = seed if seed not in [-1, "", None] else random.randrange(1 << 32)
131
+ inputs={
132
+ "text": text,
133
+ "text_lang": dict_language[text_lang],
134
+ "ref_audio_path": ref_audio_path,
135
+ "aux_ref_audio_paths": [item.name for item in aux_ref_audio_paths] if aux_ref_audio_paths is not None else [],
136
+ "prompt_text": prompt_text if not ref_text_free else "",
137
+ "prompt_lang": dict_language[prompt_lang],
138
+ "top_k": top_k,
139
+ "top_p": top_p,
140
+ "temperature": temperature,
141
+ "text_split_method": cut_method[text_split_method],
142
+ "batch_size":int(batch_size),
143
+ "speed_factor":float(speed_factor),
144
+ "split_bucket":split_bucket,
145
+ "return_fragment":False,
146
+ "fragment_interval":fragment_interval,
147
+ "seed":actual_seed,
148
+ "parallel_infer": parallel_infer,
149
+ "repetition_penalty": repetition_penalty,
150
+ }
151
+ for item in tts_pipeline.run(inputs):
152
+ yield item, actual_seed
153
+
154
+ def custom_sort_key(s):
155
+ # 使用正则表达式提取字符串中的数字部分和非数字部分
156
+ parts = re.split('(\d+)', s)
157
+ # 将数字部分转换为整数,非数字部分保持不变
158
+ parts = [int(part) if part.isdigit() else part for part in parts]
159
+ return parts
160
+
161
+
162
+ def change_choices():
163
+ SoVITS_names, GPT_names = get_weights_names(GPT_weight_root, SoVITS_weight_root)
164
+ return {"choices": sorted(SoVITS_names, key=custom_sort_key), "__type__": "update"}, {"choices": sorted(GPT_names, key=custom_sort_key), "__type__": "update"}
165
+
166
+
167
+ pretrained_sovits_name=["GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth", "GPT_SoVITS/pretrained_models/s2G488k.pth"]
168
+ pretrained_gpt_name=["GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt", "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"]
169
+ _ =[[],[]]
170
+ for i in range(2):
171
+ if os.path.exists(pretrained_gpt_name[i]):
172
+ _[0].append(pretrained_gpt_name[i])
173
+ if os.path.exists(pretrained_sovits_name[i]):
174
+ _[-1].append(pretrained_sovits_name[i])
175
+ pretrained_gpt_name,pretrained_sovits_name = _
176
+
177
+ SoVITS_weight_root=["SoVITS_weights_v2","SoVITS_weights"]
178
+ GPT_weight_root=["GPT_weights_v2","GPT_weights"]
179
+ for path in SoVITS_weight_root+GPT_weight_root:
180
+ os.makedirs(path,exist_ok=True)
181
+
182
+ def get_weights_names(GPT_weight_root, SoVITS_weight_root):
183
+ SoVITS_names = [i for i in pretrained_sovits_name]
184
+ for path in SoVITS_weight_root:
185
+ for name in os.listdir(path):
186
+ if name.endswith(".pth"): SoVITS_names.append("%s/%s" % (path, name))
187
+ GPT_names = [i for i in pretrained_gpt_name]
188
+ for path in GPT_weight_root:
189
+ for name in os.listdir(path):
190
+ if name.endswith(".ckpt"): GPT_names.append("%s/%s" % (path, name))
191
+ return SoVITS_names, GPT_names
192
+
193
+
194
+ SoVITS_names, GPT_names = get_weights_names(GPT_weight_root, SoVITS_weight_root)
195
+
196
+
197
+
198
+ def change_sovits_weights(sovits_path,prompt_language=None,text_language=None):
199
+ tts_pipeline.init_vits_weights(sovits_path)
200
+ global version, dict_language
201
+ dict_language = dict_language_v1 if tts_pipeline.configs.version =='v1' else dict_language_v2
202
+ if prompt_language is not None and text_language is not None:
203
+ if prompt_language in list(dict_language.keys()):
204
+ prompt_text_update, prompt_language_update = {'__type__':'update'}, {'__type__':'update', 'value':prompt_language}
205
+ else:
206
+ prompt_text_update = {'__type__':'update', 'value':''}
207
+ prompt_language_update = {'__type__':'update', 'value':i18n("中文")}
208
+ if text_language in list(dict_language.keys()):
209
+ text_update, text_language_update = {'__type__':'update'}, {'__type__':'update', 'value':text_language}
210
+ else:
211
+ text_update = {'__type__':'update', 'value':''}
212
+ text_language_update = {'__type__':'update', 'value':i18n("中文")}
213
+ return {'__type__':'update', 'choices':list(dict_language.keys())}, {'__type__':'update', 'choices':list(dict_language.keys())}, prompt_text_update, prompt_language_update, text_update, text_language_update
214
+
215
+
216
+
217
+ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
218
+ gr.Markdown(
219
+ value=i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>LICENSE</b>.")
220
+ )
221
+
222
+ with gr.Column():
223
+ # with gr.Group():
224
+ gr.Markdown(value=i18n("模型切换"))
225
+ with gr.Row():
226
+ GPT_dropdown = gr.Dropdown(label=i18n("GPT模型列表"), choices=sorted(GPT_names, key=custom_sort_key), value=gpt_path, interactive=True)
227
+ SoVITS_dropdown = gr.Dropdown(label=i18n("SoVITS模型列表"), choices=sorted(SoVITS_names, key=custom_sort_key), value=sovits_path, interactive=True)
228
+ refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary")
229
+ refresh_button.click(fn=change_choices, inputs=[], outputs=[SoVITS_dropdown, GPT_dropdown])
230
+
231
+
232
+ with gr.Row():
233
+ with gr.Column():
234
+ gr.Markdown(value=i18n("*请上传并填写参考信息"))
235
+ with gr.Row():
236
+ inp_ref = gr.Audio(label=i18n("主参考音频(请上传3~10秒内参考音频,超过会报错!)"), type="filepath")
237
+ inp_refs = gr.File(label=i18n("辅参考音频(可选多个,或不选)"),file_count="multiple")
238
+ prompt_text = gr.Textbox(label=i18n("主参考音频的文本"), value="", lines=2)
239
+ with gr.Row():
240
+ prompt_language = gr.Dropdown(
241
+ label=i18n("主参考音频的语种"), choices=list(dict_language.keys()), value=i18n("中文")
242
+ )
243
+ with gr.Column():
244
+ ref_text_free = gr.Checkbox(label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。"), value=False, interactive=True, show_label=True)
245
+ gr.Markdown(i18n("使用无参考文本模式时建议使用微调的GPT,听不清参考音频说的啥(不晓得写啥)可以开,开启后无视填写的参考文本。"))
246
+
247
+ with gr.Column():
248
+ gr.Markdown(value=i18n("*请填写需要合成的目标文本和语种模式"))
249
+ text = gr.Textbox(label=i18n("需要合成的文本"), value="", lines=20, max_lines=20)
250
+ text_language = gr.Dropdown(
251
+ label=i18n("需要合成的文本的语种"), choices=list(dict_language.keys()), value=i18n("中文")
252
+ )
253
+
254
+
255
+ with gr.Group():
256
+ gr.Markdown(value=i18n("推理设置"))
257
+ with gr.Row():
258
+
259
+ with gr.Column():
260
+ batch_size = gr.Slider(minimum=1,maximum=200,step=1,label=i18n("batch_size"),value=20,interactive=True)
261
+ fragment_interval = gr.Slider(minimum=0.01,maximum=1,step=0.01,label=i18n("分段间隔(秒)"),value=0.3,interactive=True)
262
+ speed_factor = gr.Slider(minimum=0.6,maximum=1.65,step=0.05,label="speed_factor",value=1.0,interactive=True)
263
+ top_k = gr.Slider(minimum=1,maximum=100,step=1,label=i18n("top_k"),value=5,interactive=True)
264
+ top_p = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("top_p"),value=1,interactive=True)
265
+ temperature = gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("temperature"),value=1,interactive=True)
266
+ repetition_penalty = gr.Slider(minimum=0,maximum=2,step=0.05,label=i18n("重复惩罚"),value=1.35,interactive=True)
267
+ with gr.Column():
268
+ with gr.Row():
269
+ how_to_cut = gr.Dropdown(
270
+ label=i18n("怎么切"),
271
+ choices=[i18n("不切"), i18n("凑四句一切"), i18n("凑50字一切"), i18n("按中文句号。切"), i18n("按英文句号.切"), i18n("按标点符号切"), ],
272
+ value=i18n("凑四句一切"),
273
+ interactive=True, scale=1
274
+ )
275
+ parallel_infer = gr.Checkbox(label=i18n("并行推理"), value=True, interactive=True, show_label=True)
276
+ split_bucket = gr.Checkbox(label=i18n("数据分桶(并行推理时会降低一点计算量)"), value=True, interactive=True, show_label=True)
277
+
278
+ with gr.Row():
279
+ seed = gr.Number(label=i18n("随机种子"),value=-1)
280
+ keep_random = gr.Checkbox(label=i18n("保持随机"), value=True, interactive=True, show_label=True)
281
+
282
+ output = gr.Audio(label=i18n("输出的语音"))
283
+ with gr.Row():
284
+ inference_button = gr.Button(i18n("合成语音"), variant="primary")
285
+ stop_infer = gr.Button(i18n("终止合成"), variant="primary")
286
+
287
+
288
+ inference_button.click(
289
+ inference,
290
+ [
291
+ text,text_language, inp_ref, inp_refs,
292
+ prompt_text, prompt_language,
293
+ top_k, top_p, temperature,
294
+ how_to_cut, batch_size,
295
+ speed_factor, ref_text_free,
296
+ split_bucket,fragment_interval,
297
+ seed, keep_random, parallel_infer,
298
+ repetition_penalty
299
+ ],
300
+ [output, seed],
301
+ )
302
+ stop_infer.click(tts_pipeline.stop, [], [])
303
+ SoVITS_dropdown.change(change_sovits_weights, [SoVITS_dropdown,prompt_language,text_language], [prompt_language,text_language,prompt_text,prompt_language,text,text_language])
304
+ GPT_dropdown.change(tts_pipeline.init_t2s_weights, [GPT_dropdown], [])
305
+
306
+ with gr.Group():
307
+ gr.Markdown(value=i18n("文本切分工具。太长的文本合成出来效果不一定好,所以太长建议先切。合成会根据文本的换行分开合成再拼起来。"))
308
+ with gr.Row():
309
+ text_inp = gr.Textbox(label=i18n("需要合成的切分前文本"), value="", lines=4)
310
+ with gr.Column():
311
+ _how_to_cut = gr.Radio(
312
+ label=i18n("怎么切"),
313
+ choices=[i18n("不切"), i18n("凑四句一切"), i18n("凑50字一切"), i18n("按中文句号。切"), i18n("按英文句号.切"), i18n("按标点符号切"), ],
314
+ value=i18n("凑四句一切"),
315
+ interactive=True,
316
+ )
317
+ cut_text= gr.Button(i18n("切分"), variant="primary")
318
+
319
+ def to_cut(text_inp, how_to_cut):
320
+ if len(text_inp.strip()) == 0 or text_inp==[]:
321
+ return ""
322
+ method = get_method(cut_method[how_to_cut])
323
+ return method(text_inp)
324
+
325
+ text_opt = gr.Textbox(label=i18n("切分后文本"), value="", lines=4)
326
+ cut_text.click(to_cut, [text_inp, _how_to_cut], [text_opt])
327
+ gr.Markdown(value=i18n("后续将支持转音素、手工修改音素、语音合成分步执行。"))
328
+
329
+ if __name__ == '__main__':
330
+ app.queue().launch(#concurrency_count=511, max_size=1022
331
+ server_name="0.0.0.0",
332
+ inbrowser=True,
333
+ share=is_share,
334
+ server_port=infer_ttswebui,
335
+ quiet=True,
336
+ )
GPT_SoVITS/onnx_export.py ADDED
@@ -0,0 +1,344 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from module.models_onnx import SynthesizerTrn, symbols_v1, symbols_v2
2
+ from AR.models.t2s_lightning_module_onnx import Text2SemanticLightningModule
3
+ import torch
4
+ import torchaudio
5
+ from torch import nn
6
+ from feature_extractor import cnhubert
7
+
8
+ cnhubert_base_path = "GPT_SoVITS/pretrained_models/chinese-hubert-base"
9
+ cnhubert.cnhubert_base_path = cnhubert_base_path
10
+ ssl_model = cnhubert.get_model()
11
+ from text import cleaned_text_to_sequence
12
+ import soundfile
13
+ from tools.my_utils import load_audio
14
+ import os
15
+ import json
16
+
17
+ def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False):
18
+ hann_window = torch.hann_window(win_size).to(
19
+ dtype=y.dtype, device=y.device
20
+ )
21
+ y = torch.nn.functional.pad(
22
+ y.unsqueeze(1),
23
+ (int((n_fft - hop_size) / 2), int((n_fft - hop_size) / 2)),
24
+ mode="reflect",
25
+ )
26
+ y = y.squeeze(1)
27
+ spec = torch.stft(
28
+ y,
29
+ n_fft,
30
+ hop_length=hop_size,
31
+ win_length=win_size,
32
+ window=hann_window,
33
+ center=center,
34
+ pad_mode="reflect",
35
+ normalized=False,
36
+ onesided=True,
37
+ return_complex=False,
38
+ )
39
+ spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6)
40
+ return spec
41
+
42
+
43
+ class DictToAttrRecursive(dict):
44
+ def __init__(self, input_dict):
45
+ super().__init__(input_dict)
46
+ for key, value in input_dict.items():
47
+ if isinstance(value, dict):
48
+ value = DictToAttrRecursive(value)
49
+ self[key] = value
50
+ setattr(self, key, value)
51
+
52
+ def __getattr__(self, item):
53
+ try:
54
+ return self[item]
55
+ except KeyError:
56
+ raise AttributeError(f"Attribute {item} not found")
57
+
58
+ def __setattr__(self, key, value):
59
+ if isinstance(value, dict):
60
+ value = DictToAttrRecursive(value)
61
+ super(DictToAttrRecursive, self).__setitem__(key, value)
62
+ super().__setattr__(key, value)
63
+
64
+ def __delattr__(self, item):
65
+ try:
66
+ del self[item]
67
+ except KeyError:
68
+ raise AttributeError(f"Attribute {item} not found")
69
+
70
+
71
+ class T2SEncoder(nn.Module):
72
+ def __init__(self, t2s, vits):
73
+ super().__init__()
74
+ self.encoder = t2s.onnx_encoder
75
+ self.vits = vits
76
+
77
+ def forward(self, ref_seq, text_seq, ref_bert, text_bert, ssl_content):
78
+ codes = self.vits.extract_latent(ssl_content)
79
+ prompt_semantic = codes[0, 0]
80
+ bert = torch.cat([ref_bert.transpose(0, 1), text_bert.transpose(0, 1)], 1)
81
+ all_phoneme_ids = torch.cat([ref_seq, text_seq], 1)
82
+ bert = bert.unsqueeze(0)
83
+ prompt = prompt_semantic.unsqueeze(0)
84
+ return self.encoder(all_phoneme_ids, bert), prompt
85
+
86
+
87
+ class T2SModel(nn.Module):
88
+ def __init__(self, t2s_path, vits_model):
89
+ super().__init__()
90
+ dict_s1 = torch.load(t2s_path, map_location="cpu")
91
+ self.config = dict_s1["config"]
92
+ self.t2s_model = Text2SemanticLightningModule(self.config, "ojbk", is_train=False)
93
+ self.t2s_model.load_state_dict(dict_s1["weight"])
94
+ self.t2s_model.eval()
95
+ self.vits_model = vits_model.vq_model
96
+ self.hz = 50
97
+ self.max_sec = self.config["data"]["max_sec"]
98
+ self.t2s_model.model.top_k = torch.LongTensor([self.config["inference"]["top_k"]])
99
+ self.t2s_model.model.early_stop_num = torch.LongTensor([self.hz * self.max_sec])
100
+ self.t2s_model = self.t2s_model.model
101
+ self.t2s_model.init_onnx()
102
+ self.onnx_encoder = T2SEncoder(self.t2s_model, self.vits_model)
103
+ self.first_stage_decoder = self.t2s_model.first_stage_decoder
104
+ self.stage_decoder = self.t2s_model.stage_decoder
105
+ #self.t2s_model = torch.jit.script(self.t2s_model)
106
+
107
+ def forward(self, ref_seq, text_seq, ref_bert, text_bert, ssl_content):
108
+ early_stop_num = self.t2s_model.early_stop_num
109
+
110
+ #[1,N] [1,N] [N, 1024] [N, 1024] [1, 768, N]
111
+ x, prompts = self.onnx_encoder(ref_seq, text_seq, ref_bert, text_bert, ssl_content)
112
+
113
+ prefix_len = prompts.shape[1]
114
+
115
+ #[1,N,512] [1,N]
116
+ y, k, v, y_emb, x_example = self.first_stage_decoder(x, prompts)
117
+
118
+ stop = False
119
+ for idx in range(1, 1500):
120
+ #[1, N] [N_layer, N, 1, 512] [N_layer, N, 1, 512] [1, N, 512] [1] [1, N, 512] [1, N]
121
+ enco = self.stage_decoder(y, k, v, y_emb, x_example)
122
+ y, k, v, y_emb, logits, samples = enco
123
+ if early_stop_num != -1 and (y.shape[1] - prefix_len) > early_stop_num:
124
+ stop = True
125
+ if torch.argmax(logits, dim=-1)[0] == self.t2s_model.EOS or samples[0, 0] == self.t2s_model.EOS:
126
+ stop = True
127
+ if stop:
128
+ break
129
+ y[0, -1] = 0
130
+
131
+ return y[:, -idx:].unsqueeze(0)
132
+
133
+ def export(self, ref_seq, text_seq, ref_bert, text_bert, ssl_content, project_name, dynamo=False):
134
+ #self.onnx_encoder = torch.jit.script(self.onnx_encoder)
135
+ if dynamo:
136
+ export_options = torch.onnx.ExportOptions(dynamic_shapes=True)
137
+ onnx_encoder_export_output = torch.onnx.dynamo_export(
138
+ self.onnx_encoder,
139
+ (ref_seq, text_seq, ref_bert, text_bert, ssl_content),
140
+ export_options=export_options
141
+ )
142
+ onnx_encoder_export_output.save(f"onnx/{project_name}/{project_name}_t2s_encoder.onnx")
143
+ return
144
+
145
+ torch.onnx.export(
146
+ self.onnx_encoder,
147
+ (ref_seq, text_seq, ref_bert, text_bert, ssl_content),
148
+ f"onnx/{project_name}/{project_name}_t2s_encoder.onnx",
149
+ input_names=["ref_seq", "text_seq", "ref_bert", "text_bert", "ssl_content"],
150
+ output_names=["x", "prompts"],
151
+ dynamic_axes={
152
+ "ref_seq": {1 : "ref_length"},
153
+ "text_seq": {1 : "text_length"},
154
+ "ref_bert": {0 : "ref_length"},
155
+ "text_bert": {0 : "text_length"},
156
+ "ssl_content": {2 : "ssl_length"},
157
+ },
158
+ opset_version=16
159
+ )
160
+ x, prompts = self.onnx_encoder(ref_seq, text_seq, ref_bert, text_bert, ssl_content)
161
+
162
+ torch.onnx.export(
163
+ self.first_stage_decoder,
164
+ (x, prompts),
165
+ f"onnx/{project_name}/{project_name}_t2s_fsdec.onnx",
166
+ input_names=["x", "prompts"],
167
+ output_names=["y", "k", "v", "y_emb", "x_example"],
168
+ dynamic_axes={
169
+ "x": {1 : "x_length"},
170
+ "prompts": {1 : "prompts_length"},
171
+ },
172
+ verbose=False,
173
+ opset_version=16
174
+ )
175
+ y, k, v, y_emb, x_example = self.first_stage_decoder(x, prompts)
176
+
177
+ torch.onnx.export(
178
+ self.stage_decoder,
179
+ (y, k, v, y_emb, x_example),
180
+ f"onnx/{project_name}/{project_name}_t2s_sdec.onnx",
181
+ input_names=["iy", "ik", "iv", "iy_emb", "ix_example"],
182
+ output_names=["y", "k", "v", "y_emb", "logits", "samples"],
183
+ dynamic_axes={
184
+ "iy": {1 : "iy_length"},
185
+ "ik": {1 : "ik_length"},
186
+ "iv": {1 : "iv_length"},
187
+ "iy_emb": {1 : "iy_emb_length"},
188
+ "ix_example": {1 : "ix_example_length"},
189
+ },
190
+ verbose=False,
191
+ opset_version=16
192
+ )
193
+
194
+
195
+ class VitsModel(nn.Module):
196
+ def __init__(self, vits_path):
197
+ super().__init__()
198
+ dict_s2 = torch.load(vits_path,map_location="cpu")
199
+ self.hps = dict_s2["config"]
200
+ if dict_s2['weight']['enc_p.text_embedding.weight'].shape[0] == 322:
201
+ self.hps["model"]["version"] = "v1"
202
+ else:
203
+ self.hps["model"]["version"] = "v2"
204
+
205
+ self.hps = DictToAttrRecursive(self.hps)
206
+ self.hps.model.semantic_frame_rate = "25hz"
207
+ self.vq_model = SynthesizerTrn(
208
+ self.hps.data.filter_length // 2 + 1,
209
+ self.hps.train.segment_size // self.hps.data.hop_length,
210
+ n_speakers=self.hps.data.n_speakers,
211
+ **self.hps.model
212
+ )
213
+ self.vq_model.eval()
214
+ self.vq_model.load_state_dict(dict_s2["weight"], strict=False)
215
+
216
+ def forward(self, text_seq, pred_semantic, ref_audio):
217
+ refer = spectrogram_torch(
218
+ ref_audio,
219
+ self.hps.data.filter_length,
220
+ self.hps.data.sampling_rate,
221
+ self.hps.data.hop_length,
222
+ self.hps.data.win_length,
223
+ center=False
224
+ )
225
+ return self.vq_model(pred_semantic, text_seq, refer)[0, 0]
226
+
227
+
228
+ class GptSoVits(nn.Module):
229
+ def __init__(self, vits, t2s):
230
+ super().__init__()
231
+ self.vits = vits
232
+ self.t2s = t2s
233
+
234
+ def forward(self, ref_seq, text_seq, ref_bert, text_bert, ref_audio, ssl_content, debug=False):
235
+ pred_semantic = self.t2s(ref_seq, text_seq, ref_bert, text_bert, ssl_content)
236
+ audio = self.vits(text_seq, pred_semantic, ref_audio)
237
+ if debug:
238
+ import onnxruntime
239
+ sess = onnxruntime.InferenceSession("onnx/koharu/koharu_vits.onnx", providers=["CPU"])
240
+ audio1 = sess.run(None, {
241
+ "text_seq" : text_seq.detach().cpu().numpy(),
242
+ "pred_semantic" : pred_semantic.detach().cpu().numpy(),
243
+ "ref_audio" : ref_audio.detach().cpu().numpy()
244
+ })
245
+ return audio, audio1
246
+ return audio
247
+
248
+ def export(self, ref_seq, text_seq, ref_bert, text_bert, ref_audio, ssl_content, project_name):
249
+ self.t2s.export(ref_seq, text_seq, ref_bert, text_bert, ssl_content, project_name)
250
+ pred_semantic = self.t2s(ref_seq, text_seq, ref_bert, text_bert, ssl_content)
251
+ torch.onnx.export(
252
+ self.vits,
253
+ (text_seq, pred_semantic, ref_audio),
254
+ f"onnx/{project_name}/{project_name}_vits.onnx",
255
+ input_names=["text_seq", "pred_semantic", "ref_audio"],
256
+ output_names=["audio"],
257
+ dynamic_axes={
258
+ "text_seq": {1 : "text_length"},
259
+ "pred_semantic": {2 : "pred_length"},
260
+ "ref_audio": {1 : "audio_length"},
261
+ },
262
+ opset_version=17,
263
+ verbose=False
264
+ )
265
+
266
+
267
+ class SSLModel(nn.Module):
268
+ def __init__(self):
269
+ super().__init__()
270
+ self.ssl = ssl_model
271
+
272
+ def forward(self, ref_audio_16k):
273
+ return self.ssl.model(ref_audio_16k)["last_hidden_state"].transpose(1, 2)
274
+
275
+
276
+ def export(vits_path, gpt_path, project_name, vits_model="v2"):
277
+ vits = VitsModel(vits_path)
278
+ gpt = T2SModel(gpt_path, vits)
279
+ gpt_sovits = GptSoVits(vits, gpt)
280
+ ssl = SSLModel()
281
+ ref_seq = torch.LongTensor([cleaned_text_to_sequence(["n", "i2", "h", "ao3", ",", "w", "o3", "sh", "i4", "b", "ai2", "y", "e4"],version=vits_model)])
282
+ text_seq = torch.LongTensor([cleaned_text_to_sequence(["w", "o3", "sh", "i4", "b", "ai2", "y", "e4", "w", "o3", "sh", "i4", "b", "ai2", "y", "e4", "w", "o3", "sh", "i4", "b", "ai2", "y", "e4"],version=vits_model)])
283
+ ref_bert = torch.randn((ref_seq.shape[1], 1024)).float()
284
+ text_bert = torch.randn((text_seq.shape[1], 1024)).float()
285
+ ref_audio = torch.randn((1, 48000 * 5)).float()
286
+ # ref_audio = torch.tensor([load_audio("rec.wav", 48000)]).float()
287
+ ref_audio_16k = torchaudio.functional.resample(ref_audio,48000,16000).float()
288
+ ref_audio_sr = torchaudio.functional.resample(ref_audio,48000,vits.hps.data.sampling_rate).float()
289
+
290
+ try:
291
+ os.mkdir(f"onnx/{project_name}")
292
+ except:
293
+ pass
294
+
295
+ ssl_content = ssl(ref_audio_16k).float()
296
+
297
+ # debug = False
298
+ debug = True
299
+
300
+ # gpt_sovits.export(ref_seq, text_seq, ref_bert, text_bert, ref_audio_sr, ssl_content, project_name)
301
+
302
+ if debug:
303
+ a, b = gpt_sovits(ref_seq, text_seq, ref_bert, text_bert, ref_audio_sr, ssl_content, debug=debug)
304
+ soundfile.write("out1.wav", a.cpu().detach().numpy(), vits.hps.data.sampling_rate)
305
+ soundfile.write("out2.wav", b[0], vits.hps.data.sampling_rate)
306
+ else:
307
+ a = gpt_sovits(ref_seq, text_seq, ref_bert, text_bert, ref_audio_sr, ssl_content).detach().cpu().numpy()
308
+ soundfile.write("out.wav", a, vits.hps.data.sampling_rate)
309
+
310
+ if vits_model == "v1":
311
+ symbols = symbols_v1
312
+ else:
313
+ symbols = symbols_v2
314
+
315
+ MoeVSConf = {
316
+ "Folder": f"{project_name}",
317
+ "Name": f"{project_name}",
318
+ "Type": "GPT-SoVits",
319
+ "Rate": vits.hps.data.sampling_rate,
320
+ "NumLayers": gpt.t2s_model.num_layers,
321
+ "EmbeddingDim": gpt.t2s_model.embedding_dim,
322
+ "Dict": "BasicDict",
323
+ "BertPath": "chinese-roberta-wwm-ext-large",
324
+ # "Symbol": symbols,
325
+ "AddBlank": False,
326
+ }
327
+
328
+ MoeVSConfJson = json.dumps(MoeVSConf)
329
+ with open(f"onnx/{project_name}.json", 'w') as MoeVsConfFile:
330
+ json.dump(MoeVSConf, MoeVsConfFile, indent = 4)
331
+
332
+
333
+ if __name__ == "__main__":
334
+ try:
335
+ os.mkdir("onnx")
336
+ except:
337
+ pass
338
+
339
+ gpt_path = "GPT_weights/nahida-e25.ckpt"
340
+ vits_path = "SoVITS_weights/nahida_e30_s3930.pth"
341
+ exp_path = "nahida"
342
+ export(vits_path, gpt_path, exp_path)
343
+
344
+ # soundfile.write("out.wav", a, vits.hps.data.sampling_rate)
GPT_SoVITS/prepare_data.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import torch
4
+ import torchaudio
5
+ from pathlib import Path
6
+
7
+ def prepare_data_stage2(data_dir="data8", exp_dir="logs/s2"):
8
+ """Prepare data for stage 2 training"""
9
+
10
+ # Get project root directory (parent of GPT_SoVITS)
11
+ root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
12
+
13
+ # Convert relative paths to absolute
14
+ data_dir = os.path.join(root_dir, data_dir)
15
+ exp_dir = os.path.join(root_dir, exp_dir)
16
+
17
+ print(f"Data directory: {data_dir}")
18
+ print(f"Experiment directory: {exp_dir}")
19
+
20
+ # Create required directories
21
+ os.makedirs(exp_dir, exist_ok=True)
22
+ os.makedirs(os.path.join(exp_dir, "4-cnhubert"), exist_ok=True)
23
+ os.makedirs(os.path.join(exp_dir, "5-wav32k"), exist_ok=True)
24
+
25
+ # Convert phoneme.txt to name2text.txt format
26
+ phoneme_path = os.path.join(data_dir, "phoneme.txt")
27
+ name2text_path = os.path.join(exp_dir, "2-name2text.txt")
28
+
29
+ print(f"Reading phoneme data from: {phoneme_path}")
30
+ print(f"Writing text data to: {name2text_path}")
31
+
32
+ with open(phoneme_path, "r", encoding="utf8") as f_in, \
33
+ open(name2text_path, "w", encoding="utf8") as f_out:
34
+ for line in f_in:
35
+ parts = line.strip().split("|")
36
+ if len(parts) >= 2:
37
+ wav_name = os.path.basename(parts[0])
38
+ text = parts[1]
39
+ # Format: wav_name \t text \t speaker_id \t language_id
40
+ f_out.write(f"{wav_name}\t{text}\t0\tHindi\n")
41
+
42
+ # Copy wav files to 5-wav32k
43
+ wav_dir = os.path.join(data_dir, "wavs")
44
+ wav32k_dir = os.path.join(exp_dir, "5-wav32k")
45
+
46
+ print(f"Processing wav files from: {wav_dir}")
47
+ print(f"Saving to: {wav32k_dir}")
48
+
49
+ for wav_file in os.listdir(wav_dir):
50
+ if wav_file.endswith(".wav"):
51
+ src_path = os.path.join(wav_dir, wav_file)
52
+ dst_path = os.path.join(wav32k_dir, wav_file)
53
+
54
+ # Load and resample if needed
55
+ waveform, sr = torchaudio.load(src_path)
56
+ if sr != 32000:
57
+ resampler = torchaudio.transforms.Resample(sr, 32000)
58
+ waveform = resampler(waveform)
59
+
60
+ # Save as 32kHz wav
61
+ torchaudio.save(dst_path, waveform, 32000)
62
+
63
+ print("Data preparation complete. Please run the Hubert feature extraction before training.")
64
+
65
+ if __name__ == "__main__":
66
+ prepare_data_stage2()
GPT_SoVITS/pretrained_models/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *
2
+ !.gitignore
GPT_SoVITS/pretrained_models/README.md ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ pipeline_tag: text-to-speech
4
+ ---
5
+ pretrained models used in https://github.com/RVC-Boss/GPT-SoVITS
GPT_SoVITS/process_ckpt.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import traceback
2
+ from collections import OrderedDict
3
+ from time import time as ttime
4
+ import shutil,os
5
+ import torch
6
+ from tools.i18n.i18n import I18nAuto
7
+
8
+ i18n = I18nAuto()
9
+
10
+ def my_save(fea,path):#####fix issue: torch.save doesn't support chinese path
11
+ dir=os.path.dirname(path)
12
+ name=os.path.basename(path)
13
+ tmp_path="%s.pth"%(ttime())
14
+ torch.save(fea,tmp_path)
15
+ shutil.move(tmp_path,"%s/%s"%(dir,name))
16
+
17
+ def savee(ckpt, name, epoch, steps, hps):
18
+ try:
19
+ opt = OrderedDict()
20
+ opt["weight"] = {}
21
+ for key in ckpt.keys():
22
+ if "enc_q" in key:
23
+ continue
24
+ opt["weight"][key] = ckpt[key].half()
25
+ opt["config"] = hps
26
+ opt["info"] = "%sepoch_%siteration" % (epoch, steps)
27
+ # torch.save(opt, "%s/%s.pth" % (hps.save_weight_dir, name))
28
+ my_save(opt, "%s/%s.pth" % (hps.save_weight_dir, name))
29
+ return "Success."
30
+ except:
31
+ return traceback.format_exc()
GPT_SoVITS/s1_train.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # modified from https://github.com/feng-yufei/shared_debugging_code/blob/main/train_t2s.py
2
+ import os
3
+ import pdb
4
+ import logging
5
+ import argparse
6
+ from pathlib import Path
7
+ import torch, platform
8
+ from pytorch_lightning import seed_everything
9
+ from pytorch_lightning import Trainer
10
+ from pytorch_lightning.callbacks import ModelCheckpoint
11
+ from pytorch_lightning.strategies import DDPStrategy
12
+ from AR.data.data_module import Text2SemanticDataModule
13
+ from AR.models.t2s_lightning_module import Text2SemanticLightningModule
14
+ from AR.utils.io import load_yaml_config
15
+ from GPT_SoVITS.utils.wandb_logger import WandbLoggerWithConfig
16
+
17
+ logging.getLogger("numba").setLevel(logging.WARNING)
18
+ logging.getLogger("matplotlib").setLevel(logging.WARNING)
19
+ torch.set_float32_matmul_precision("high")
20
+
21
+ def my_model_ckpt(
22
+ config,
23
+ if_save_latest,
24
+ if_save_every_weights,
25
+ half_weights_save_dir,
26
+ exp_name,
27
+ **kwargs,
28
+ ):
29
+ if if_save_latest:
30
+ kwargs["save_last"] = True
31
+ callbacks = []
32
+ callbacks.append(
33
+ ModelCheckpoint(
34
+ **kwargs,
35
+ filename=exp_name + "_{epoch}-{step}",
36
+ )
37
+ )
38
+ return callbacks[0]
39
+
40
+ def main(args):
41
+ config = load_yaml_config(args.config_file)
42
+
43
+ output_dir = Path(config["output_dir"])
44
+ output_dir.mkdir(parents=True, exist_ok=True)
45
+
46
+ ckpt_dir = output_dir / "ckpt"
47
+ ckpt_dir.mkdir(parents=True, exist_ok=True)
48
+
49
+ seed_everything(config["train"]["seed"], workers=True)
50
+
51
+ # Initialize wandb logger
52
+ wandb_logger = WandbLoggerWithConfig(config=config)
53
+
54
+ ckpt_callback = my_model_ckpt(
55
+ config=config,
56
+ if_save_latest=config["train"]["if_save_latest"],
57
+ if_save_every_weights=config["train"]["if_save_every_weights"],
58
+ half_weights_save_dir=config["train"]["half_weights_save_dir"],
59
+ exp_name=config["train"]["exp_name"],
60
+ save_top_k=-1,
61
+ monitor="loss",
62
+ mode="min",
63
+ save_on_train_epoch_end=True,
64
+ every_n_epochs=config["train"]["save_every_n_epoch"],
65
+ dirpath=ckpt_dir,
66
+ )
67
+
68
+ # Create data module
69
+ data_module = Text2SemanticDataModule(
70
+ config=config,
71
+ train_semantic_path=config.get("train_semantic_path", ""),
72
+ train_phoneme_path=config.get("train_phoneme_path", "")
73
+ )
74
+
75
+ # Initialize model with correct parameters
76
+ model = Text2SemanticLightningModule(
77
+ config=config,
78
+ output_dir=output_dir,
79
+ is_train=True
80
+ )
81
+
82
+ # Watch the model in wandb
83
+ wandb_logger.watch_model(model)
84
+
85
+ trainer = Trainer(
86
+ max_epochs=config["train"]["epochs"],
87
+ accelerator="gpu" if torch.cuda.is_available() else "cpu",
88
+ devices=-1 if torch.cuda.is_available() else 1,
89
+ benchmark=False,
90
+ fast_dev_run=False,
91
+ strategy=DDPStrategy(
92
+ process_group_backend="nccl" if platform.system() != "Windows" else "gloo"
93
+ ) if torch.cuda.is_available() else "auto",
94
+ precision=config["train"]["precision"],
95
+ logger=wandb_logger,
96
+ callbacks=[ckpt_callback],
97
+ use_distributed_sampler=False,
98
+ )
99
+
100
+ trainer.fit(model, data_module)
101
+ wandb.finish()
102
+
103
+ if __name__ == "__main__":
104
+ parser = argparse.ArgumentParser()
105
+ parser.add_argument(
106
+ "-c",
107
+ "--config_file",
108
+ type=str,
109
+ default="configs/s1.yaml",
110
+ help="path of config file",
111
+ )
112
+ args = parser.parse_args()
113
+ main(args)
GPT_SoVITS/s2_train.py ADDED
@@ -0,0 +1,610 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ warnings.filterwarnings("ignore")
3
+ import utils
4
+ from utils import get_hparams
5
+ import os
6
+ os.chdir(os.path.dirname(os.path.abspath(__file__)))
7
+ hps = get_hparams(stage=2)
8
+
9
+ # Set GPU device - use default if not specified in config
10
+ if hasattr(hps.train, 'gpu_numbers'):
11
+ os.environ["CUDA_VISIBLE_DEVICES"] = hps.train.gpu_numbers.replace("-", ",")
12
+ else:
13
+ os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Default to first GPU
14
+
15
+ import torch
16
+ from torch.nn import functional as F
17
+ from torch.utils.data import DataLoader
18
+ from torch.utils.tensorboard import SummaryWriter
19
+ import torch.multiprocessing as mp
20
+ import torch.distributed as dist, traceback
21
+ from torch.nn.parallel import DistributedDataParallel as DDP
22
+ from torch.cuda.amp import autocast, GradScaler
23
+ from tqdm import tqdm
24
+ import logging, traceback
25
+
26
+ logging.getLogger("matplotlib").setLevel(logging.INFO)
27
+ logging.getLogger("h5py").setLevel(logging.INFO)
28
+ logging.getLogger("numba").setLevel(logging.INFO)
29
+ from random import randint
30
+ from module import commons
31
+
32
+ from module.data_utils import (
33
+ TextAudioSpeakerLoader,
34
+ TextAudioSpeakerCollate,
35
+ DistributedBucketSampler,
36
+ )
37
+ from module.models import (
38
+ SynthesizerTrn,
39
+ MultiPeriodDiscriminator,
40
+ )
41
+ from module.losses import generator_loss, discriminator_loss, feature_loss, kl_loss
42
+ from module.mel_processing import mel_spectrogram_torch, spec_to_mel_torch
43
+ from process_ckpt import savee
44
+
45
+ torch.backends.cudnn.benchmark = False
46
+ torch.backends.cudnn.deterministic = False
47
+ ###反正A100fp32更快,那试试tf32吧
48
+ torch.backends.cuda.matmul.allow_tf32 = True
49
+ torch.backends.cudnn.allow_tf32 = True
50
+ torch.set_float32_matmul_precision("medium") # 最低精度但最快(也就快一丁点),对于结果造成不了影响
51
+ # from config import pretrained_s2G,pretrained_s2D
52
+ global_step = 0
53
+
54
+ device = "cpu" # cuda以外的设备,等mps优化后加入
55
+
56
+
57
+ def main():
58
+
59
+ if torch.cuda.is_available():
60
+ n_gpus = torch.cuda.device_count()
61
+ else:
62
+ n_gpus = 1
63
+ os.environ["MASTER_ADDR"] = "localhost"
64
+ os.environ["MASTER_PORT"] = str(randint(20000, 55555))
65
+
66
+ mp.spawn(
67
+ run,
68
+ nprocs=n_gpus,
69
+ args=(
70
+ n_gpus,
71
+ hps,
72
+ ),
73
+ )
74
+
75
+
76
+ def run(rank, n_gpus, hps):
77
+ global global_step
78
+ if rank == 0:
79
+ logger = utils.get_logger(hps.data.exp_dir)
80
+ logger.info(hps)
81
+ # utils.check_git_hash(hps.s2_ckpt_dir)
82
+ writer = SummaryWriter(log_dir=hps.s2_ckpt_dir)
83
+ writer_eval = SummaryWriter(log_dir=os.path.join(hps.s2_ckpt_dir, "eval"))
84
+
85
+ dist.init_process_group(
86
+ backend = "gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl",
87
+ init_method="env://",
88
+ world_size=n_gpus,
89
+ rank=rank,
90
+ )
91
+ torch.manual_seed(hps.train.seed)
92
+ if torch.cuda.is_available():
93
+ torch.cuda.set_device(rank)
94
+
95
+ train_dataset = TextAudioSpeakerLoader(hps.data) ########
96
+ train_sampler = DistributedBucketSampler(
97
+ train_dataset,
98
+ hps.train.batch_size,
99
+ [
100
+ 32,
101
+ 300,
102
+ 400,
103
+ 500,
104
+ 600,
105
+ 700,
106
+ 800,
107
+ 900,
108
+ 1000,
109
+ 1100,
110
+ 1200,
111
+ 1300,
112
+ 1400,
113
+ 1500,
114
+ 1600,
115
+ 1700,
116
+ 1800,
117
+ 1900,
118
+ ],
119
+ num_replicas=n_gpus,
120
+ rank=rank,
121
+ shuffle=True,
122
+ )
123
+ collate_fn = TextAudioSpeakerCollate()
124
+ train_loader = DataLoader(
125
+ train_dataset,
126
+ num_workers=6,
127
+ shuffle=False,
128
+ pin_memory=True,
129
+ collate_fn=collate_fn,
130
+ batch_sampler=train_sampler,
131
+ persistent_workers=True,
132
+ prefetch_factor=4,
133
+ )
134
+ # if rank == 0:
135
+ # eval_dataset = TextAudioSpeakerLoader(hps.data.validation_files, hps.data, val=True)
136
+ # eval_loader = DataLoader(eval_dataset, num_workers=0, shuffle=False,
137
+ # batch_size=1, pin_memory=True,
138
+ # drop_last=False, collate_fn=collate_fn)
139
+
140
+ net_g = SynthesizerTrn(
141
+ hps.data.filter_length // 2 + 1,
142
+ hps.train.segment_size // hps.data.hop_length,
143
+ n_speakers=hps.data.n_speakers,
144
+ **hps.model,
145
+ ).cuda(rank) if torch.cuda.is_available() else SynthesizerTrn(
146
+ hps.data.filter_length // 2 + 1,
147
+ hps.train.segment_size // hps.data.hop_length,
148
+ n_speakers=hps.data.n_speakers,
149
+ **hps.model,
150
+ ).to(device)
151
+
152
+ net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm).cuda(rank) if torch.cuda.is_available() else MultiPeriodDiscriminator(hps.model.use_spectral_norm).to(device)
153
+ for name, param in net_g.named_parameters():
154
+ if not param.requires_grad:
155
+ print(name, "not requires_grad")
156
+
157
+ te_p = list(map(id, net_g.enc_p.text_embedding.parameters()))
158
+ et_p = list(map(id, net_g.enc_p.encoder_text.parameters()))
159
+ mrte_p = list(map(id, net_g.enc_p.mrte.parameters()))
160
+ base_params = filter(
161
+ lambda p: id(p) not in te_p + et_p + mrte_p and p.requires_grad,
162
+ net_g.parameters(),
163
+ )
164
+
165
+ # te_p=net_g.enc_p.text_embedding.parameters()
166
+ # et_p=net_g.enc_p.encoder_text.parameters()
167
+ # mrte_p=net_g.enc_p.mrte.parameters()
168
+
169
+ optim_g = torch.optim.AdamW(
170
+ # filter(lambda p: p.requires_grad, net_g.parameters()),###默认所有层lr一致
171
+ [
172
+ {"params": base_params, "lr": hps.train.learning_rate},
173
+ {
174
+ "params": net_g.enc_p.text_embedding.parameters(),
175
+ "lr": hps.train.learning_rate * hps.train.text_low_lr_rate,
176
+ },
177
+ {
178
+ "params": net_g.enc_p.encoder_text.parameters(),
179
+ "lr": hps.train.learning_rate * hps.train.text_low_lr_rate,
180
+ },
181
+ {
182
+ "params": net_g.enc_p.mrte.parameters(),
183
+ "lr": hps.train.learning_rate * hps.train.text_low_lr_rate,
184
+ },
185
+ ],
186
+ hps.train.learning_rate,
187
+ betas=hps.train.betas,
188
+ eps=hps.train.eps,
189
+ )
190
+ optim_d = torch.optim.AdamW(
191
+ net_d.parameters(),
192
+ hps.train.learning_rate,
193
+ betas=hps.train.betas,
194
+ eps=hps.train.eps,
195
+ )
196
+ if torch.cuda.is_available():
197
+ net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True)
198
+ net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True)
199
+ else:
200
+ net_g = net_g.to(device)
201
+ net_d = net_d.to(device)
202
+
203
+ try: # 如果能加载自动resume
204
+ _, _, _, epoch_str = utils.load_checkpoint(
205
+ utils.latest_checkpoint_path("%s/logs_s2" % hps.data.exp_dir, "D_*.pth"),
206
+ net_d,
207
+ optim_d,
208
+ ) # D多半加载没事
209
+ if rank == 0:
210
+ logger.info("loaded D")
211
+ # _, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.model_dir, "G_*.pth"), net_g, optim_g,load_opt=0)
212
+ _, _, _, epoch_str = utils.load_checkpoint(
213
+ utils.latest_checkpoint_path("%s/logs_s2" % hps.data.exp_dir, "G_*.pth"),
214
+ net_g,
215
+ optim_g,
216
+ )
217
+ global_step = (epoch_str - 1) * len(train_loader)
218
+ # epoch_str = 1
219
+ # global_step = 0
220
+ except: # 如果首次不能加载,加载pretrain
221
+ # traceback.print_exc()
222
+ epoch_str = 1
223
+ global_step = 0
224
+ if hps.train.pretrained_s2G != ""and hps.train.pretrained_s2G != None and os.path.exists(hps.train.pretrained_s2G):
225
+ if rank == 0:
226
+ logger.info("loaded pretrained %s" % hps.train.pretrained_s2G)
227
+ print(
228
+ net_g.module.load_state_dict(
229
+ torch.load(hps.train.pretrained_s2G, map_location="cpu")["weight"],
230
+ strict=False,
231
+ ) if torch.cuda.is_available() else net_g.load_state_dict(
232
+ torch.load(hps.train.pretrained_s2G, map_location="cpu")["weight"],
233
+ strict=False,
234
+ )
235
+ ) ##测试不加载优化器
236
+ if hps.train.pretrained_s2D != ""and hps.train.pretrained_s2D != None and os.path.exists(hps.train.pretrained_s2D):
237
+ if rank == 0:
238
+ logger.info("loaded pretrained %s" % hps.train.pretrained_s2D)
239
+ print(
240
+ net_d.module.load_state_dict(
241
+ torch.load(hps.train.pretrained_s2D, map_location="cpu")["weight"]
242
+ ) if torch.cuda.is_available() else net_d.load_state_dict(
243
+ torch.load(hps.train.pretrained_s2D, map_location="cpu")["weight"]
244
+ )
245
+ )
246
+
247
+ # scheduler_g = torch.optim.lr_scheduler.ExponentialLR(optim_g, gamma=hps.train.lr_decay, last_epoch=epoch_str - 2)
248
+ # scheduler_d = torch.optim.lr_scheduler.ExponentialLR(optim_d, gamma=hps.train.lr_decay, last_epoch=epoch_str - 2)
249
+
250
+ scheduler_g = torch.optim.lr_scheduler.ExponentialLR(
251
+ optim_g, gamma=hps.train.lr_decay, last_epoch=-1
252
+ )
253
+ scheduler_d = torch.optim.lr_scheduler.ExponentialLR(
254
+ optim_d, gamma=hps.train.lr_decay, last_epoch=-1
255
+ )
256
+ for _ in range(epoch_str):
257
+ scheduler_g.step()
258
+ scheduler_d.step()
259
+
260
+ scaler = GradScaler(enabled=hps.train.fp16_run)
261
+
262
+ for epoch in range(epoch_str, hps.train.epochs + 1):
263
+ if rank == 0:
264
+ train_and_evaluate(
265
+ rank,
266
+ epoch,
267
+ hps,
268
+ [net_g, net_d],
269
+ [optim_g, optim_d],
270
+ [scheduler_g, scheduler_d],
271
+ scaler,
272
+ # [train_loader, eval_loader], logger, [writer, writer_eval])
273
+ [train_loader, None],
274
+ logger,
275
+ [writer, writer_eval],
276
+ )
277
+ else:
278
+ train_and_evaluate(
279
+ rank,
280
+ epoch,
281
+ hps,
282
+ [net_g, net_d],
283
+ [optim_g, optim_d],
284
+ [scheduler_g, scheduler_d],
285
+ scaler,
286
+ [train_loader, None],
287
+ None,
288
+ None,
289
+ )
290
+ scheduler_g.step()
291
+ scheduler_d.step()
292
+
293
+
294
+ def train_and_evaluate(
295
+ rank, epoch, hps, nets, optims, schedulers, scaler, loaders, logger, writers
296
+ ):
297
+ net_g, net_d = nets
298
+ optim_g, optim_d = optims
299
+ # scheduler_g, scheduler_d = schedulers
300
+ train_loader, eval_loader = loaders
301
+ if writers is not None:
302
+ writer, writer_eval = writers
303
+
304
+ train_loader.batch_sampler.set_epoch(epoch)
305
+ global global_step
306
+
307
+ net_g.train()
308
+ net_d.train()
309
+ for batch_idx, (
310
+ ssl,
311
+ ssl_lengths,
312
+ spec,
313
+ spec_lengths,
314
+ y,
315
+ y_lengths,
316
+ text,
317
+ text_lengths,
318
+ ) in enumerate(tqdm(train_loader)):
319
+ if torch.cuda.is_available():
320
+ spec, spec_lengths = spec.cuda(rank, non_blocking=True), spec_lengths.cuda(
321
+ rank, non_blocking=True
322
+ )
323
+ y, y_lengths = y.cuda(rank, non_blocking=True), y_lengths.cuda(
324
+ rank, non_blocking=True
325
+ )
326
+ ssl = ssl.cuda(rank, non_blocking=True)
327
+ ssl.requires_grad = False
328
+ # ssl_lengths = ssl_lengths.cuda(rank, non_blocking=True)
329
+ text, text_lengths = text.cuda(rank, non_blocking=True), text_lengths.cuda(
330
+ rank, non_blocking=True
331
+ )
332
+ else:
333
+ spec, spec_lengths = spec.to(device), spec_lengths.to(device)
334
+ y, y_lengths = y.to(device), y_lengths.to(device)
335
+ ssl = ssl.to(device)
336
+ ssl.requires_grad = False
337
+ # ssl_lengths = ssl_lengths.cuda(rank, non_blocking=True)
338
+ text, text_lengths = text.to(device), text_lengths.to(device)
339
+
340
+ with autocast(enabled=hps.train.fp16_run):
341
+ (
342
+ y_hat,
343
+ kl_ssl,
344
+ ids_slice,
345
+ x_mask,
346
+ z_mask,
347
+ (z, z_p, m_p, logs_p, m_q, logs_q),
348
+ stats_ssl,
349
+ ) = net_g(ssl, spec, spec_lengths, text, text_lengths)
350
+
351
+ mel = spec_to_mel_torch(
352
+ spec,
353
+ hps.data.filter_length,
354
+ hps.data.n_mel_channels,
355
+ hps.data.sampling_rate,
356
+ hps.data.mel_fmin,
357
+ hps.data.mel_fmax,
358
+ )
359
+ y_mel = commons.slice_segments(
360
+ mel, ids_slice, hps.train.segment_size // hps.data.hop_length
361
+ )
362
+ y_hat_mel = mel_spectrogram_torch(
363
+ y_hat.squeeze(1),
364
+ hps.data.filter_length,
365
+ hps.data.n_mel_channels,
366
+ hps.data.sampling_rate,
367
+ hps.data.hop_length,
368
+ hps.data.win_length,
369
+ hps.data.mel_fmin,
370
+ hps.data.mel_fmax,
371
+ )
372
+
373
+ y = commons.slice_segments(
374
+ y, ids_slice * hps.data.hop_length, hps.train.segment_size
375
+ ) # slice
376
+
377
+ # Discriminator
378
+ y_d_hat_r, y_d_hat_g, _, _ = net_d(y, y_hat.detach())
379
+ with autocast(enabled=False):
380
+ loss_disc, losses_disc_r, losses_disc_g = discriminator_loss(
381
+ y_d_hat_r, y_d_hat_g
382
+ )
383
+ loss_disc_all = loss_disc
384
+ optim_d.zero_grad()
385
+ scaler.scale(loss_disc_all).backward()
386
+ scaler.unscale_(optim_d)
387
+ grad_norm_d = commons.clip_grad_value_(net_d.parameters(), None)
388
+ scaler.step(optim_d)
389
+
390
+ with autocast(enabled=hps.train.fp16_run):
391
+ # Generator
392
+ y_d_hat_r, y_d_hat_g, fmap_r, fmap_g = net_d(y, y_hat)
393
+ with autocast(enabled=False):
394
+ loss_mel = F.l1_loss(y_mel, y_hat_mel) * hps.train.c_mel
395
+ loss_kl = kl_loss(z_p, logs_q, m_p, logs_p, z_mask) * hps.train.c_kl
396
+
397
+ loss_fm = feature_loss(fmap_r, fmap_g)
398
+ loss_gen, losses_gen = generator_loss(y_d_hat_g)
399
+ loss_gen_all = loss_gen + loss_fm + loss_mel + kl_ssl * 1 + loss_kl
400
+
401
+ optim_g.zero_grad()
402
+ scaler.scale(loss_gen_all).backward()
403
+ scaler.unscale_(optim_g)
404
+ grad_norm_g = commons.clip_grad_value_(net_g.parameters(), None)
405
+ scaler.step(optim_g)
406
+ scaler.update()
407
+
408
+ if rank == 0:
409
+ if global_step % hps.train.log_interval == 0:
410
+ lr = optim_g.param_groups[0]["lr"]
411
+ losses = [loss_disc, loss_gen, loss_fm, loss_mel, kl_ssl, loss_kl]
412
+ logger.info(
413
+ "Train Epoch: {} [{:.0f}%]".format(
414
+ epoch, 100.0 * batch_idx / len(train_loader)
415
+ )
416
+ )
417
+ logger.info([x.item() for x in losses] + [global_step, lr])
418
+
419
+ scalar_dict = {
420
+ "loss/g/total": loss_gen_all,
421
+ "loss/d/total": loss_disc_all,
422
+ "learning_rate": lr,
423
+ "grad_norm_d": grad_norm_d,
424
+ "grad_norm_g": grad_norm_g,
425
+ }
426
+ scalar_dict.update(
427
+ {
428
+ "loss/g/fm": loss_fm,
429
+ "loss/g/mel": loss_mel,
430
+ "loss/g/kl_ssl": kl_ssl,
431
+ "loss/g/kl": loss_kl,
432
+ }
433
+ )
434
+
435
+ # scalar_dict.update({"loss/g/{}".format(i): v for i, v in enumerate(losses_gen)})
436
+ # scalar_dict.update({"loss/d_r/{}".format(i): v for i, v in enumerate(losses_disc_r)})
437
+ # scalar_dict.update({"loss/d_g/{}".format(i): v for i, v in enumerate(losses_disc_g)})
438
+ image_dict = {
439
+ "slice/mel_org": utils.plot_spectrogram_to_numpy(
440
+ y_mel[0].data.cpu().numpy()
441
+ ),
442
+ "slice/mel_gen": utils.plot_spectrogram_to_numpy(
443
+ y_hat_mel[0].data.cpu().numpy()
444
+ ),
445
+ "all/mel": utils.plot_spectrogram_to_numpy(
446
+ mel[0].data.cpu().numpy()
447
+ ),
448
+ "all/stats_ssl": utils.plot_spectrogram_to_numpy(
449
+ stats_ssl[0].data.cpu().numpy()
450
+ ),
451
+ }
452
+ utils.summarize(
453
+ writer=writer,
454
+ global_step=global_step,
455
+ images=image_dict,
456
+ scalars=scalar_dict,
457
+ )
458
+ global_step += 1
459
+ if epoch % hps.train.save_every_epoch == 0 and rank == 0:
460
+ if hps.train.if_save_latest == 0:
461
+ utils.save_checkpoint(
462
+ net_g,
463
+ optim_g,
464
+ hps.train.learning_rate,
465
+ epoch,
466
+ os.path.join(
467
+ "%s/logs_s2" % hps.data.exp_dir, "G_{}.pth".format(global_step)
468
+ ),
469
+ )
470
+ utils.save_checkpoint(
471
+ net_d,
472
+ optim_d,
473
+ hps.train.learning_rate,
474
+ epoch,
475
+ os.path.join(
476
+ "%s/logs_s2" % hps.data.exp_dir, "D_{}.pth".format(global_step)
477
+ ),
478
+ )
479
+ else:
480
+ utils.save_checkpoint(
481
+ net_g,
482
+ optim_g,
483
+ hps.train.learning_rate,
484
+ epoch,
485
+ os.path.join(
486
+ "%s/logs_s2" % hps.data.exp_dir, "G_{}.pth".format(233333333333)
487
+ ),
488
+ )
489
+ utils.save_checkpoint(
490
+ net_d,
491
+ optim_d,
492
+ hps.train.learning_rate,
493
+ epoch,
494
+ os.path.join(
495
+ "%s/logs_s2" % hps.data.exp_dir, "D_{}.pth".format(233333333333)
496
+ ),
497
+ )
498
+ if rank == 0 and hps.train.if_save_every_weights == True:
499
+ if hasattr(net_g, "module"):
500
+ ckpt = net_g.module.state_dict()
501
+ else:
502
+ ckpt = net_g.state_dict()
503
+ logger.info(
504
+ "saving ckpt %s_e%s:%s"
505
+ % (
506
+ hps.name,
507
+ epoch,
508
+ savee(
509
+ ckpt,
510
+ hps.name + "_e%s_s%s" % (epoch, global_step),
511
+ epoch,
512
+ global_step,
513
+ hps,
514
+ ),
515
+ )
516
+ )
517
+
518
+ if rank == 0:
519
+ logger.info("====> Epoch: {}".format(epoch))
520
+
521
+
522
+ def evaluate(hps, generator, eval_loader, writer_eval):
523
+ generator.eval()
524
+ image_dict = {}
525
+ audio_dict = {}
526
+ print("Evaluating ...")
527
+ with torch.no_grad():
528
+ for batch_idx, (
529
+ ssl,
530
+ ssl_lengths,
531
+ spec,
532
+ spec_lengths,
533
+ y,
534
+ y_lengths,
535
+ text,
536
+ text_lengths,
537
+ ) in enumerate(eval_loader):
538
+ print(111)
539
+ if torch.cuda.is_available():
540
+ spec, spec_lengths = spec.cuda(), spec_lengths.cuda()
541
+ y, y_lengths = y.cuda(), y_lengths.cuda()
542
+ ssl = ssl.cuda()
543
+ text, text_lengths = text.cuda(), text_lengths.cuda()
544
+ else:
545
+ spec, spec_lengths = spec.to(device), spec_lengths.to(device)
546
+ y, y_lengths = y.to(device), y_lengths.to(device)
547
+ ssl = ssl.to(device)
548
+ text, text_lengths = text.to(device), text_lengths.to(device)
549
+ for test in [0, 1]:
550
+ y_hat, mask, *_ = generator.module.infer(
551
+ ssl, spec, spec_lengths, text, text_lengths, test=test
552
+ ) if torch.cuda.is_available() else generator.infer(
553
+ ssl, spec, spec_lengths, text, text_lengths, test=test
554
+ )
555
+ y_hat_lengths = mask.sum([1, 2]).long() * hps.data.hop_length
556
+
557
+ mel = spec_to_mel_torch(
558
+ spec,
559
+ hps.data.filter_length,
560
+ hps.data.n_mel_channels,
561
+ hps.data.sampling_rate,
562
+ hps.data.mel_fmin,
563
+ hps.data.mel_fmax,
564
+ )
565
+ y_hat_mel = mel_spectrogram_torch(
566
+ y_hat.squeeze(1).float(),
567
+ hps.data.filter_length,
568
+ hps.data.n_mel_channels,
569
+ hps.data.sampling_rate,
570
+ hps.data.hop_length,
571
+ hps.data.win_length,
572
+ hps.data.mel_fmin,
573
+ hps.data.mel_fmax,
574
+ )
575
+ image_dict.update(
576
+ {
577
+ f"gen/mel_{batch_idx}_{test}": utils.plot_spectrogram_to_numpy(
578
+ y_hat_mel[0].cpu().numpy()
579
+ )
580
+ }
581
+ )
582
+ audio_dict.update(
583
+ {f"gen/audio_{batch_idx}_{test}": y_hat[0, :, : y_hat_lengths[0]]}
584
+ )
585
+ image_dict.update(
586
+ {
587
+ f"gt/mel_{batch_idx}": utils.plot_spectrogram_to_numpy(
588
+ mel[0].cpu().numpy()
589
+ )
590
+ }
591
+ )
592
+ audio_dict.update({f"gt/audio_{batch_idx}": y[0, :, : y_lengths[0]]})
593
+
594
+ # y_hat, mask, *_ = generator.module.infer(ssl, spec_lengths, speakers, y=None)
595
+ # audio_dict.update({
596
+ # f"gen/audio_{batch_idx}_style_pred": y_hat[0, :, :]
597
+ # })
598
+
599
+ utils.summarize(
600
+ writer=writer_eval,
601
+ global_step=global_step,
602
+ images=image_dict,
603
+ audios=audio_dict,
604
+ audio_sampling_rate=hps.data.sampling_rate,
605
+ )
606
+ generator.train()
607
+
608
+
609
+ if __name__ == "__main__":
610
+ main()
GPT_SoVITS/text/.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ G2PWModel
2
+ __pycache__
3
+ *.zip
GPT_SoVITS/text/__init__.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import importlib
3
+
4
+ # Force reload the symbol modules to get updated symbols
5
+ from text import symbols as symbols_v1
6
+ from text import symbols2 as symbols_v2
7
+ importlib.reload(symbols_v1)
8
+ importlib.reload(symbols_v2)
9
+
10
+ _symbol_to_id_v1 = {s: i for i, s in enumerate(symbols_v1.symbols)}
11
+ _symbol_to_id_v2 = {s: i for i, s in enumerate(symbols_v2.symbols)}
12
+
13
+ def cleaned_text_to_sequence(cleaned_text, version=None):
14
+ '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
15
+ Args:
16
+ text: string to convert to a sequence
17
+ Returns:
18
+ List of integers corresponding to the symbols in the text
19
+ '''
20
+ if version is None:version=os.environ.get('version', 'v2')
21
+ if version == "v1":
22
+ phones = [_symbol_to_id_v1[symbol] for symbol in cleaned_text]
23
+ else:
24
+ phones = [_symbol_to_id_v2[symbol] for symbol in cleaned_text]
25
+
26
+ return phones
27
+
GPT_SoVITS/text/cantonese.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # reference: https://huggingface.co/spaces/Naozumi0512/Bert-VITS2-Cantonese-Yue/blob/main/text/chinese.py
2
+
3
+ import sys
4
+ import re
5
+ import cn2an
6
+
7
+ from pyjyutping import jyutping
8
+ from text.symbols import punctuation
9
+ from text.zh_normalization.text_normlization import TextNormalizer
10
+
11
+ normalizer = lambda x: cn2an.transform(x, "an2cn")
12
+
13
+ INITIALS = [
14
+ "aa",
15
+ "aai",
16
+ "aak",
17
+ "aap",
18
+ "aat",
19
+ "aau",
20
+ "ai",
21
+ "au",
22
+ "ap",
23
+ "at",
24
+ "ak",
25
+ "a",
26
+ "p",
27
+ "b",
28
+ "e",
29
+ "ts",
30
+ "t",
31
+ "dz",
32
+ "d",
33
+ "kw",
34
+ "k",
35
+ "gw",
36
+ "g",
37
+ "f",
38
+ "h",
39
+ "l",
40
+ "m",
41
+ "ng",
42
+ "n",
43
+ "s",
44
+ "y",
45
+ "w",
46
+ "c",
47
+ "z",
48
+ "j",
49
+ "ong",
50
+ "on",
51
+ "ou",
52
+ "oi",
53
+ "ok",
54
+ "o",
55
+ "uk",
56
+ "ung",
57
+ ]
58
+ INITIALS += ["sp", "spl", "spn", "sil"]
59
+
60
+
61
+ rep_map = {
62
+ ":": ",",
63
+ ";": ",",
64
+ ",": ",",
65
+ "。": ".",
66
+ "!": "!",
67
+ "?": "?",
68
+ "\n": ".",
69
+ "·": ",",
70
+ "、": ",",
71
+ "...": "…",
72
+ "$": ".",
73
+ "“": "'",
74
+ "”": "'",
75
+ '"': "'",
76
+ "‘": "'",
77
+ "’": "'",
78
+ "(": "'",
79
+ ")": "'",
80
+ "(": "'",
81
+ ")": "'",
82
+ "《": "'",
83
+ "》": "'",
84
+ "【": "'",
85
+ "】": "'",
86
+ "[": "'",
87
+ "]": "'",
88
+ "—": "-",
89
+ "~": "-",
90
+ "~": "-",
91
+ "「": "'",
92
+ "」": "'",
93
+ }
94
+
95
+
96
+ def replace_punctuation(text):
97
+ # text = text.replace("嗯", "恩").replace("呣", "母")
98
+ pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
99
+
100
+ replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
101
+
102
+ replaced_text = re.sub(
103
+ r"[^\u4e00-\u9fa5" + "".join(punctuation) + r"]+", "", replaced_text
104
+ )
105
+
106
+ return replaced_text
107
+
108
+
109
+ def text_normalize(text):
110
+ tx = TextNormalizer()
111
+ sentences = tx.normalize(text)
112
+ dest_text = ""
113
+ for sentence in sentences:
114
+ dest_text += replace_punctuation(sentence)
115
+ return dest_text
116
+
117
+
118
+ punctuation_set=set(punctuation)
119
+ def jyuping_to_initials_finals_tones(jyuping_syllables):
120
+ initials_finals = []
121
+ tones = []
122
+ word2ph = []
123
+
124
+ for syllable in jyuping_syllables:
125
+ if syllable in punctuation:
126
+ initials_finals.append(syllable)
127
+ tones.append(0)
128
+ word2ph.append(1) # Add 1 for punctuation
129
+ elif syllable == "_":
130
+ initials_finals.append(syllable)
131
+ tones.append(0)
132
+ word2ph.append(1) # Add 1 for underscore
133
+ else:
134
+ try:
135
+ tone = int(syllable[-1])
136
+ syllable_without_tone = syllable[:-1]
137
+ except ValueError:
138
+ tone = 0
139
+ syllable_without_tone = syllable
140
+
141
+ for initial in INITIALS:
142
+ if syllable_without_tone.startswith(initial):
143
+ if syllable_without_tone.startswith("nga"):
144
+ initials_finals.extend(
145
+ [
146
+ syllable_without_tone[:2],
147
+ syllable_without_tone[2:] or syllable_without_tone[-1],
148
+ ]
149
+ )
150
+ # tones.extend([tone, tone])
151
+ tones.extend([-1, tone])
152
+ word2ph.append(2)
153
+ else:
154
+ final = syllable_without_tone[len(initial) :] or initial[-1]
155
+ initials_finals.extend([initial, final])
156
+ # tones.extend([tone, tone])
157
+ tones.extend([-1, tone])
158
+ word2ph.append(2)
159
+ break
160
+ assert len(initials_finals) == len(tones)
161
+
162
+ ###魔改为辅音+带音调的元音
163
+ phones=[]
164
+ for a,b in zip(initials_finals,tones):
165
+ if(b not in [-1,0]):###防止粤语和普通话重合开头加Y,如果是标点,不加。
166
+ todo="%s%s"%(a,b)
167
+ else:todo=a
168
+ if(todo not in punctuation_set):todo="Y%s"%todo
169
+ phones.append(todo)
170
+
171
+ # return initials_finals, tones, word2ph
172
+ return phones, word2ph
173
+
174
+
175
+ def get_jyutping(text):
176
+ jp = jyutping.convert(text)
177
+ # print(1111111,jp)
178
+ for symbol in punctuation:
179
+ jp = jp.replace(symbol, " " + symbol + " ")
180
+ jp_array = jp.split()
181
+ return jp_array
182
+
183
+
184
+ def get_bert_feature(text, word2ph):
185
+ from text import chinese_bert
186
+
187
+ return chinese_bert.get_bert_feature(text, word2ph)
188
+
189
+
190
+ def g2p(text):
191
+ # word2ph = []
192
+ jyuping = get_jyutping(text)
193
+ # print(jyuping)
194
+ # phones, tones, word2ph = jyuping_to_initials_finals_tones(jyuping)
195
+ phones, word2ph = jyuping_to_initials_finals_tones(jyuping)
196
+ # phones = ["_"] + phones + ["_"]
197
+ # tones = [0] + tones + [0]
198
+ # word2ph = [1] + word2ph + [1]
199
+ return phones, word2ph
200
+
201
+
202
+ if __name__ == "__main__":
203
+ # text = "啊!但是《原神》是由,米哈\游自主, [研发]的一款全.新开放世界.冒险游戏"
204
+ text = "佢個鋤頭太短啦。"
205
+ text = text_normalize(text)
206
+ # phones, tones, word2ph = g2p(text)
207
+ phones, word2ph = g2p(text)
208
+ # print(phones, tones, word2ph)
209
+ print(phones, word2ph)
GPT_SoVITS/text/chinese.py ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pdb
3
+ import re
4
+
5
+ import cn2an
6
+ from pypinyin import lazy_pinyin, Style
7
+
8
+ from text.symbols import punctuation
9
+ from text.tone_sandhi import ToneSandhi
10
+ from text.zh_normalization.text_normlization import TextNormalizer
11
+
12
+ normalizer = lambda x: cn2an.transform(x, "an2cn")
13
+
14
+ current_file_path = os.path.dirname(__file__)
15
+ pinyin_to_symbol_map = {
16
+ line.split("\t")[0]: line.strip().split("\t")[1]
17
+ for line in open(os.path.join(current_file_path, "opencpop-strict.txt")).readlines()
18
+ }
19
+
20
+ import jieba_fast.posseg as psg
21
+
22
+
23
+ rep_map = {
24
+ ":": ",",
25
+ ";": ",",
26
+ ",": ",",
27
+ "。": ".",
28
+ "!": "!",
29
+ "?": "?",
30
+ "\n": ".",
31
+ "·": ",",
32
+ "、": ",",
33
+ "...": "…",
34
+ "$": ".",
35
+ "/": ",",
36
+ "—": "-",
37
+ "~": "…",
38
+ "~":"…",
39
+ }
40
+
41
+ tone_modifier = ToneSandhi()
42
+
43
+
44
+ def replace_punctuation(text):
45
+ text = text.replace("嗯", "恩").replace("呣", "母")
46
+ pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
47
+
48
+ replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
49
+
50
+ replaced_text = re.sub(
51
+ r"[^\u4e00-\u9fa5" + "".join(punctuation) + r"]+", "", replaced_text
52
+ )
53
+
54
+ return replaced_text
55
+
56
+
57
+ def replace_punctuation_with_en(text):
58
+ text = text.replace("嗯", "恩").replace("呣", "母")
59
+ pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
60
+
61
+ replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
62
+
63
+ replaced_text = re.sub(
64
+ r"[^\u4e00-\u9fa5A-Za-z" + "".join(punctuation) + r"]+", "", replaced_text
65
+ )
66
+
67
+ return replaced_text
68
+
69
+
70
+ def replace_consecutive_punctuation(text):
71
+ punctuations = ''.join(re.escape(p) for p in punctuation)
72
+ pattern = f'([{punctuations}])([{punctuations}])+'
73
+ result = re.sub(pattern, r'\1', text)
74
+ return result
75
+
76
+
77
+ def g2p(text):
78
+ pattern = r"(?<=[{0}])\s*".format("".join(punctuation))
79
+ sentences = [i for i in re.split(pattern, text) if i.strip() != ""]
80
+ phones, word2ph = _g2p(sentences)
81
+ return phones, word2ph
82
+
83
+
84
+ def _get_initials_finals(word):
85
+ initials = []
86
+ finals = []
87
+ orig_initials = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.INITIALS)
88
+ orig_finals = lazy_pinyin(
89
+ word, neutral_tone_with_five=True, style=Style.FINALS_TONE3
90
+ )
91
+ for c, v in zip(orig_initials, orig_finals):
92
+ initials.append(c)
93
+ finals.append(v)
94
+ return initials, finals
95
+
96
+
97
+ def _g2p(segments):
98
+ phones_list = []
99
+ word2ph = []
100
+ for seg in segments:
101
+ pinyins = []
102
+ # Replace all English words in the sentence
103
+ seg = re.sub("[a-zA-Z]+", "", seg)
104
+ seg_cut = psg.lcut(seg)
105
+ initials = []
106
+ finals = []
107
+ seg_cut = tone_modifier.pre_merge_for_modify(seg_cut)
108
+ for word, pos in seg_cut:
109
+ if pos == "eng":
110
+ continue
111
+ sub_initials, sub_finals = _get_initials_finals(word)
112
+ sub_finals = tone_modifier.modified_tone(word, pos, sub_finals)
113
+ initials.append(sub_initials)
114
+ finals.append(sub_finals)
115
+
116
+ # assert len(sub_initials) == len(sub_finals) == len(word)
117
+ initials = sum(initials, [])
118
+ finals = sum(finals, [])
119
+ #
120
+ for c, v in zip(initials, finals):
121
+ raw_pinyin = c + v
122
+ # NOTE: post process for pypinyin outputs
123
+ # we discriminate i, ii and iii
124
+ if c == v:
125
+ assert c in punctuation
126
+ phone = [c]
127
+ word2ph.append(1)
128
+ else:
129
+ v_without_tone = v[:-1]
130
+ tone = v[-1]
131
+
132
+ pinyin = c + v_without_tone
133
+ assert tone in "12345"
134
+
135
+ if c:
136
+ # 多音节
137
+ v_rep_map = {
138
+ "uei": "ui",
139
+ "iou": "iu",
140
+ "uen": "un",
141
+ }
142
+ if v_without_tone in v_rep_map.keys():
143
+ pinyin = c + v_rep_map[v_without_tone]
144
+ else:
145
+ # 单音节
146
+ pinyin_rep_map = {
147
+ "ing": "ying",
148
+ "i": "yi",
149
+ "in": "yin",
150
+ "u": "wu",
151
+ }
152
+ if pinyin in pinyin_rep_map.keys():
153
+ pinyin = pinyin_rep_map[pinyin]
154
+ else:
155
+ single_rep_map = {
156
+ "v": "yu",
157
+ "e": "e",
158
+ "i": "y",
159
+ "u": "w",
160
+ }
161
+ if pinyin[0] in single_rep_map.keys():
162
+ pinyin = single_rep_map[pinyin[0]] + pinyin[1:]
163
+
164
+ assert pinyin in pinyin_to_symbol_map.keys(), (pinyin, seg, raw_pinyin)
165
+ new_c, new_v = pinyin_to_symbol_map[pinyin].split(" ")
166
+ new_v = new_v + tone
167
+ phone = [new_c, new_v]
168
+ word2ph.append(len(phone))
169
+
170
+ phones_list += phone
171
+ return phones_list, word2ph
172
+
173
+
174
+ def text_normalize(text):
175
+ # https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization
176
+ tx = TextNormalizer()
177
+ sentences = tx.normalize(text)
178
+ dest_text = ""
179
+ for sentence in sentences:
180
+ dest_text += replace_punctuation(sentence)
181
+
182
+ # 避免重复标点引起的参考泄露
183
+ dest_text = replace_consecutive_punctuation(dest_text)
184
+ return dest_text
185
+
186
+
187
+ # 不排除英文的文本格式化
188
+ def mix_text_normalize(text):
189
+ # https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization
190
+ tx = TextNormalizer()
191
+ sentences = tx.normalize(text)
192
+ dest_text = ""
193
+ for sentence in sentences:
194
+ dest_text += replace_punctuation_with_en(sentence)
195
+
196
+ # 避免重复标点引起的参考泄露
197
+ dest_text = replace_consecutive_punctuation(dest_text)
198
+ return dest_text
199
+
200
+
201
+ if __name__ == "__main__":
202
+ text = "啊——但是《原神》是由,米哈\游自主,研发的一款全.新开放世界.冒险游戏"
203
+ text = "呣呣呣~就是…大人的鼹鼠党吧?"
204
+ text = "你好"
205
+ text = text_normalize(text)
206
+ print(g2p(text))
207
+
208
+
209
+ # # 示例用法
210
+ # text = "这是一个示例文本:,你好!这是一个测试..."
211
+ # print(g2p_paddle(text)) # 输出: 这是一个示例文本你好这是一个测试
GPT_SoVITS/text/chinese2.py ADDED
@@ -0,0 +1,308 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pdb
3
+ import re
4
+
5
+ import cn2an
6
+ from pypinyin import lazy_pinyin, Style
7
+ from pypinyin.contrib.tone_convert import to_normal, to_finals_tone3, to_initials, to_finals
8
+
9
+ from text.symbols import punctuation
10
+ from text.tone_sandhi import ToneSandhi
11
+ from text.zh_normalization.text_normlization import TextNormalizer
12
+
13
+ normalizer = lambda x: cn2an.transform(x, "an2cn")
14
+
15
+ current_file_path = os.path.dirname(__file__)
16
+ pinyin_to_symbol_map = {
17
+ line.split("\t")[0]: line.strip().split("\t")[1]
18
+ for line in open(os.path.join(current_file_path, "opencpop-strict.txt")).readlines()
19
+ }
20
+
21
+ import jieba_fast.posseg as psg
22
+
23
+ # is_g2pw_str = os.environ.get("is_g2pw", "True")##默认开启
24
+ # is_g2pw = False#True if is_g2pw_str.lower() == 'true' else False
25
+ is_g2pw = True#True if is_g2pw_str.lower() == 'true' else False
26
+ if is_g2pw:
27
+ print("当前使用g2pw进行拼音推理")
28
+ from text.g2pw import G2PWPinyin, correct_pronunciation
29
+ parent_directory = os.path.dirname(current_file_path)
30
+ g2pw = G2PWPinyin(model_dir="GPT_SoVITS/text/G2PWModel",model_source=os.environ.get("bert_path","GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large"),v_to_u=False, neutral_tone_with_five=True)
31
+
32
+ rep_map = {
33
+ ":": ",",
34
+ ";": ",",
35
+ ",": ",",
36
+ "。": ".",
37
+ "!": "!",
38
+ "?": "?",
39
+ "\n": ".",
40
+ "·": ",",
41
+ "、": ",",
42
+ "...": "…",
43
+ "$": ".",
44
+ "/": ",",
45
+ "—": "-",
46
+ "~": "…",
47
+ "~":"…",
48
+ }
49
+
50
+ tone_modifier = ToneSandhi()
51
+
52
+
53
+ def replace_punctuation(text):
54
+ text = text.replace("嗯", "恩").replace("呣", "母")
55
+ pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
56
+
57
+ replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
58
+
59
+ replaced_text = re.sub(
60
+ r"[^\u4e00-\u9fa5" + "".join(punctuation) + r"]+", "", replaced_text
61
+ )
62
+
63
+ return replaced_text
64
+
65
+
66
+ def g2p(text):
67
+ pattern = r"(?<=[{0}])\s*".format("".join(punctuation))
68
+ sentences = [i for i in re.split(pattern, text) if i.strip() != ""]
69
+ phones, word2ph = _g2p(sentences)
70
+ return phones, word2ph
71
+
72
+
73
+ def _get_initials_finals(word):
74
+ initials = []
75
+ finals = []
76
+
77
+ orig_initials = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.INITIALS)
78
+ orig_finals = lazy_pinyin(
79
+ word, neutral_tone_with_five=True, style=Style.FINALS_TONE3
80
+ )
81
+
82
+ for c, v in zip(orig_initials, orig_finals):
83
+ initials.append(c)
84
+ finals.append(v)
85
+ return initials, finals
86
+
87
+
88
+ must_erhua = {
89
+ "小院儿", "胡同儿", "范儿", "老汉儿", "撒欢儿", "寻老礼儿", "妥妥儿", "媳妇儿"
90
+ }
91
+ not_erhua = {
92
+ "虐儿", "为儿", "护儿", "瞒儿", "救儿", "替儿", "有儿", "一儿", "我儿", "俺儿", "妻儿",
93
+ "拐儿", "聋儿", "乞儿", "患儿", "幼儿", "孤儿", "婴儿", "婴幼儿", "连体儿", "脑瘫儿",
94
+ "流浪儿", "体弱儿", "混血儿", "蜜雪儿", "舫儿", "祖儿", "美儿", "应采儿", "可儿", "侄儿",
95
+ "孙儿", "侄孙儿", "女儿", "男儿", "红孩儿", "花儿", "虫儿", "马儿", "鸟儿", "猪儿", "猫儿",
96
+ "狗儿", "少儿"
97
+ }
98
+ def _merge_erhua(initials: list[str],
99
+ finals: list[str],
100
+ word: str,
101
+ pos: str) -> list[list[str]]:
102
+ """
103
+ Do erhub.
104
+ """
105
+ # fix er1
106
+ for i, phn in enumerate(finals):
107
+ if i == len(finals) - 1 and word[i] == "儿" and phn == 'er1':
108
+ finals[i] = 'er2'
109
+
110
+ # 发音
111
+ if word not in must_erhua and (word in not_erhua or
112
+ pos in {"a", "j", "nr"}):
113
+ return initials, finals
114
+
115
+ # "……" 等情况直接返回
116
+ if len(finals) != len(word):
117
+ return initials, finals
118
+
119
+ assert len(finals) == len(word)
120
+
121
+ # 与前一个字发同音
122
+ new_initials = []
123
+ new_finals = []
124
+ for i, phn in enumerate(finals):
125
+ if i == len(finals) - 1 and word[i] == "儿" and phn in {
126
+ "er2", "er5"
127
+ } and word[-2:] not in not_erhua and new_finals:
128
+ phn = "er" + new_finals[-1][-1]
129
+
130
+ new_initials.append(initials[i])
131
+ new_finals.append(phn)
132
+
133
+ return new_initials, new_finals
134
+
135
+
136
+ def _g2p(segments):
137
+ phones_list = []
138
+ word2ph = []
139
+ for seg in segments:
140
+ pinyins = []
141
+ # Replace all English words in the sentence
142
+ seg = re.sub("[a-zA-Z]+", "", seg)
143
+ seg_cut = psg.lcut(seg)
144
+ seg_cut = tone_modifier.pre_merge_for_modify(seg_cut)
145
+ initials = []
146
+ finals = []
147
+
148
+ if not is_g2pw:
149
+ for word, pos in seg_cut:
150
+ if pos == "eng":
151
+ continue
152
+ sub_initials, sub_finals = _get_initials_finals(word)
153
+ sub_finals = tone_modifier.modified_tone(word, pos, sub_finals)
154
+ # 儿化
155
+ sub_initials, sub_finals = _merge_erhua(sub_initials, sub_finals, word, pos)
156
+ initials.append(sub_initials)
157
+ finals.append(sub_finals)
158
+ # assert len(sub_initials) == len(sub_finals) == len(word)
159
+ initials = sum(initials, [])
160
+ finals = sum(finals, [])
161
+ print("pypinyin结果",initials,finals)
162
+ else:
163
+ # g2pw采用整句推理
164
+ pinyins = g2pw.lazy_pinyin(seg, neutral_tone_with_five=True, style=Style.TONE3)
165
+
166
+ pre_word_length = 0
167
+ for word, pos in seg_cut:
168
+ sub_initials = []
169
+ sub_finals = []
170
+ now_word_length = pre_word_length + len(word)
171
+
172
+ if pos == 'eng':
173
+ pre_word_length = now_word_length
174
+ continue
175
+
176
+ word_pinyins = pinyins[pre_word_length:now_word_length]
177
+
178
+ # 多音字消歧
179
+ word_pinyins = correct_pronunciation(word,word_pinyins)
180
+
181
+ for pinyin in word_pinyins:
182
+ if pinyin[0].isalpha():
183
+ sub_initials.append(to_initials(pinyin))
184
+ sub_finals.append(to_finals_tone3(pinyin,neutral_tone_with_five=True))
185
+ else:
186
+ sub_initials.append(pinyin)
187
+ sub_finals.append(pinyin)
188
+
189
+ pre_word_length = now_word_length
190
+ sub_finals = tone_modifier.modified_tone(word, pos, sub_finals)
191
+ # 儿化
192
+ sub_initials, sub_finals = _merge_erhua(sub_initials, sub_finals, word, pos)
193
+ initials.append(sub_initials)
194
+ finals.append(sub_finals)
195
+
196
+ initials = sum(initials, [])
197
+ finals = sum(finals, [])
198
+ # print("g2pw结果",initials,finals)
199
+
200
+ for c, v in zip(initials, finals):
201
+ raw_pinyin = c + v
202
+ # NOTE: post process for pypinyin outputs
203
+ # we discriminate i, ii and iii
204
+ if c == v:
205
+ assert c in punctuation
206
+ phone = [c]
207
+ word2ph.append(1)
208
+ else:
209
+ v_without_tone = v[:-1]
210
+ tone = v[-1]
211
+
212
+ pinyin = c + v_without_tone
213
+ assert tone in "12345"
214
+
215
+ if c:
216
+ # 多音节
217
+ v_rep_map = {
218
+ "uei": "ui",
219
+ "iou": "iu",
220
+ "uen": "un",
221
+ }
222
+ if v_without_tone in v_rep_map.keys():
223
+ pinyin = c + v_rep_map[v_without_tone]
224
+ else:
225
+ # 单音节
226
+ pinyin_rep_map = {
227
+ "ing": "ying",
228
+ "i": "yi",
229
+ "in": "yin",
230
+ "u": "wu",
231
+ }
232
+ if pinyin in pinyin_rep_map.keys():
233
+ pinyin = pinyin_rep_map[pinyin]
234
+ else:
235
+ single_rep_map = {
236
+ "v": "yu",
237
+ "e": "e",
238
+ "i": "y",
239
+ "u": "w",
240
+ }
241
+ if pinyin[0] in single_rep_map.keys():
242
+ pinyin = single_rep_map[pinyin[0]] + pinyin[1:]
243
+
244
+ assert pinyin in pinyin_to_symbol_map.keys(), (pinyin, seg, raw_pinyin)
245
+ new_c, new_v = pinyin_to_symbol_map[pinyin].split(" ")
246
+ new_v = new_v + tone
247
+ phone = [new_c, new_v]
248
+ word2ph.append(len(phone))
249
+
250
+ phones_list += phone
251
+ return phones_list, word2ph
252
+
253
+
254
+ def replace_punctuation_with_en(text):
255
+ text = text.replace("嗯", "恩").replace("呣", "母")
256
+ pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
257
+
258
+ replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
259
+
260
+ replaced_text = re.sub(
261
+ r"[^\u4e00-\u9fa5A-Za-z" + "".join(punctuation) + r"]+", "", replaced_text
262
+ )
263
+
264
+ return replaced_text
265
+
266
+ def replace_consecutive_punctuation(text):
267
+ punctuations = ''.join(re.escape(p) for p in punctuation)
268
+ pattern = f'([{punctuations}])([{punctuations}])+'
269
+ result = re.sub(pattern, r'\1', text)
270
+ return result
271
+
272
+ def text_normalize(text):
273
+ # https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization
274
+ tx = TextNormalizer()
275
+ sentences = tx.normalize(text)
276
+ dest_text = ""
277
+ for sentence in sentences:
278
+ dest_text += replace_punctuation(sentence)
279
+
280
+ # 避免重复标点引起的参考泄露
281
+ dest_text = replace_consecutive_punctuation(dest_text)
282
+ return dest_text
283
+
284
+ # 不排除英文的文本格式化
285
+ def mix_text_normalize(text):
286
+ # https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization
287
+ tx = TextNormalizer()
288
+ sentences = tx.normalize(text)
289
+ dest_text = ""
290
+ for sentence in sentences:
291
+ dest_text += replace_punctuation_with_en(sentence)
292
+
293
+ # 避免重复标点引起的参考泄露
294
+ dest_text = replace_consecutive_punctuation(dest_text)
295
+ return dest_text
296
+
297
+
298
+ if __name__ == "__main__":
299
+ text = "啊——但是《原神》是由,米哈\游自主,研发的一款全.新开放世界.冒险游戏"
300
+ text = "呣呣呣~就是…大人的鼹鼠党吧?"
301
+ text = "你好"
302
+ text = text_normalize(text)
303
+ print(g2p(text))
304
+
305
+
306
+ # # 示例用法
307
+ # text = "这是一个示例文本:,你好!这是一个测试..."
308
+ # print(g2p_paddle(text)) # 输出: 这是一个示例文本你好这是一个测试
GPT_SoVITS/text/cleaner.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from text import cleaned_text_to_sequence
2
+ import os
3
+ # if os.environ.get("version","v1")=="v1":
4
+ # from text import chinese
5
+ # from text.symbols import symbols
6
+ # else:
7
+ # from text import chinese2 as chinese
8
+ # from text.symbols2 import symbols
9
+
10
+ from text import symbols as symbols_v1
11
+ from text import symbols2 as symbols_v2
12
+
13
+ special = [
14
+ # ("%", "zh", "SP"),
15
+ ("¥", "zh", "SP2"),
16
+ ("^", "zh", "SP3"),
17
+ # ('@', 'zh', "SP4")#不搞鬼畜了,和第二版保持一致吧
18
+ ]
19
+
20
+
21
+ def clean_text(text, language, version=None):
22
+ if version is None:version=os.environ.get('version', 'v2')
23
+ if version == "v1":
24
+ symbols = symbols_v1.symbols
25
+ language_module_map = {"zh": "chinese", "ja": "japanese", "en": "english"}
26
+ else:
27
+ symbols = symbols_v2.symbols
28
+ language_module_map = {"zh": "chinese2", "ja": "japanese", "en": "english", "ko": "korean","yue":"cantonese"}
29
+
30
+ if(language not in language_module_map):
31
+ language="en"
32
+ text=" "
33
+ for special_s, special_l, target_symbol in special:
34
+ if special_s in text and language == special_l:
35
+ return clean_special(text, language, special_s, target_symbol, version)
36
+ language_module = __import__("text."+language_module_map[language],fromlist=[language_module_map[language]])
37
+ if hasattr(language_module,"text_normalize"):
38
+ norm_text = language_module.text_normalize(text)
39
+ else:
40
+ norm_text=text
41
+ if language == "zh" or language=="yue":##########
42
+ phones, word2ph = language_module.g2p(norm_text)
43
+ assert len(phones) == sum(word2ph)
44
+ assert len(norm_text) == len(word2ph)
45
+ elif language == "en":
46
+ phones = language_module.g2p(norm_text)
47
+ if len(phones) < 4:
48
+ phones = [','] + phones
49
+ word2ph = None
50
+ else:
51
+ phones = language_module.g2p(norm_text)
52
+ word2ph = None
53
+ phones = ['UNK' if ph not in symbols else ph for ph in phones]
54
+ return phones, word2ph, norm_text
55
+
56
+
57
+ def clean_special(text, language, special_s, target_symbol, version=None):
58
+ if version is None:version=os.environ.get('version', 'v2')
59
+ if version == "v1":
60
+ symbols = symbols_v1.symbols
61
+ language_module_map = {"zh": "chinese", "ja": "japanese", "en": "english"}
62
+ else:
63
+ symbols = symbols_v2.symbols
64
+ language_module_map = {"zh": "chinese2", "ja": "japanese", "en": "english", "ko": "korean","yue":"cantonese"}
65
+
66
+ """
67
+ 特殊静音段sp符号处理
68
+ """
69
+ text = text.replace(special_s, ",")
70
+ language_module = __import__("text."+language_module_map[language],fromlist=[language_module_map[language]])
71
+ norm_text = language_module.text_normalize(text)
72
+ phones = language_module.g2p(norm_text)
73
+ new_ph = []
74
+ for ph in phones[0]:
75
+ assert ph in symbols
76
+ if ph == ",":
77
+ new_ph.append(target_symbol)
78
+ else:
79
+ new_ph.append(ph)
80
+ return new_ph, phones[1], norm_text
81
+
82
+
83
+ def text_to_sequence(text, language, version=None):
84
+ version = os.environ.get('version',version)
85
+ if version is None:version='v2'
86
+ phones = clean_text(text)
87
+ return cleaned_text_to_sequence(phones, version)
88
+
89
+
90
+ if __name__ == "__main__":
91
+ print(clean_text("你好%啊啊啊额、还是到付红四方。", "zh"))
GPT_SoVITS/text/cmudict-fast.rep ADDED
The diff for this file is too large to render. See raw diff
 
GPT_SoVITS/text/cmudict.rep ADDED
The diff for this file is too large to render. See raw diff
 
GPT_SoVITS/text/engdict-hot.rep ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ CHATGPT CH AE1 T JH IY1 P IY1 T IY1
2
+ JSON JH EY1 S AH0 N
3
+ CONDA K AA1 N D AH0
GPT_SoVITS/text/english.py ADDED
@@ -0,0 +1,374 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import os
3
+ import re
4
+ import wordsegment
5
+ from g2p_en import G2p
6
+
7
+ from text.symbols import punctuation
8
+
9
+ from text.symbols2 import symbols
10
+
11
+ import unicodedata
12
+ from builtins import str as unicode
13
+ from g2p_en.expand import normalize_numbers
14
+ from nltk.tokenize import TweetTokenizer
15
+ word_tokenize = TweetTokenizer().tokenize
16
+ from nltk import pos_tag
17
+
18
+ current_file_path = os.path.dirname(__file__)
19
+ CMU_DICT_PATH = os.path.join(current_file_path, "cmudict.rep")
20
+ CMU_DICT_FAST_PATH = os.path.join(current_file_path, "cmudict-fast.rep")
21
+ CMU_DICT_HOT_PATH = os.path.join(current_file_path, "engdict-hot.rep")
22
+ CACHE_PATH = os.path.join(current_file_path, "engdict_cache.pickle")
23
+ NAMECACHE_PATH = os.path.join(current_file_path, "namedict_cache.pickle")
24
+
25
+ arpa = {
26
+ "AH0",
27
+ "S",
28
+ "AH1",
29
+ "EY2",
30
+ "AE2",
31
+ "EH0",
32
+ "OW2",
33
+ "UH0",
34
+ "NG",
35
+ "B",
36
+ "G",
37
+ "AY0",
38
+ "M",
39
+ "AA0",
40
+ "F",
41
+ "AO0",
42
+ "ER2",
43
+ "UH1",
44
+ "IY1",
45
+ "AH2",
46
+ "DH",
47
+ "IY0",
48
+ "EY1",
49
+ "IH0",
50
+ "K",
51
+ "N",
52
+ "W",
53
+ "IY2",
54
+ "T",
55
+ "AA1",
56
+ "ER1",
57
+ "EH2",
58
+ "OY0",
59
+ "UH2",
60
+ "UW1",
61
+ "Z",
62
+ "AW2",
63
+ "AW1",
64
+ "V",
65
+ "UW2",
66
+ "AA2",
67
+ "ER",
68
+ "AW0",
69
+ "UW0",
70
+ "R",
71
+ "OW1",
72
+ "EH1",
73
+ "ZH",
74
+ "AE0",
75
+ "IH2",
76
+ "IH",
77
+ "Y",
78
+ "JH",
79
+ "P",
80
+ "AY1",
81
+ "EY0",
82
+ "OY2",
83
+ "TH",
84
+ "HH",
85
+ "D",
86
+ "ER0",
87
+ "CH",
88
+ "AO1",
89
+ "AE1",
90
+ "AO2",
91
+ "OY1",
92
+ "AY2",
93
+ "IH1",
94
+ "OW0",
95
+ "L",
96
+ "SH",
97
+ }
98
+
99
+
100
+ def replace_phs(phs):
101
+ rep_map = {"'": "-"}
102
+ phs_new = []
103
+ for ph in phs:
104
+ if ph in symbols:
105
+ phs_new.append(ph)
106
+ elif ph in rep_map.keys():
107
+ phs_new.append(rep_map[ph])
108
+ else:
109
+ print("ph not in symbols: ", ph)
110
+ return phs_new
111
+
112
+
113
+ def replace_consecutive_punctuation(text):
114
+ punctuations = ''.join(re.escape(p) for p in punctuation)
115
+ pattern = f'([{punctuations}])([{punctuations}])+'
116
+ result = re.sub(pattern, r'\1', text)
117
+ return result
118
+
119
+
120
+ def read_dict():
121
+ g2p_dict = {}
122
+ start_line = 49
123
+ with open(CMU_DICT_PATH) as f:
124
+ line = f.readline()
125
+ line_index = 1
126
+ while line:
127
+ if line_index >= start_line:
128
+ line = line.strip()
129
+ word_split = line.split(" ")
130
+ word = word_split[0].lower()
131
+
132
+ syllable_split = word_split[1].split(" - ")
133
+ g2p_dict[word] = []
134
+ for syllable in syllable_split:
135
+ phone_split = syllable.split(" ")
136
+ g2p_dict[word].append(phone_split)
137
+
138
+ line_index = line_index + 1
139
+ line = f.readline()
140
+
141
+ return g2p_dict
142
+
143
+
144
+ def read_dict_new():
145
+ g2p_dict = {}
146
+ with open(CMU_DICT_PATH) as f:
147
+ line = f.readline()
148
+ line_index = 1
149
+ while line:
150
+ if line_index >= 57:
151
+ line = line.strip()
152
+ word_split = line.split(" ")
153
+ word = word_split[0].lower()
154
+ g2p_dict[word] = [word_split[1].split(" ")]
155
+
156
+ line_index = line_index + 1
157
+ line = f.readline()
158
+
159
+ with open(CMU_DICT_FAST_PATH) as f:
160
+ line = f.readline()
161
+ line_index = 1
162
+ while line:
163
+ if line_index >= 0:
164
+ line = line.strip()
165
+ word_split = line.split(" ")
166
+ word = word_split[0].lower()
167
+ if word not in g2p_dict:
168
+ g2p_dict[word] = [word_split[1:]]
169
+
170
+ line_index = line_index + 1
171
+ line = f.readline()
172
+
173
+ return g2p_dict
174
+
175
+ def hot_reload_hot(g2p_dict):
176
+ with open(CMU_DICT_HOT_PATH) as f:
177
+ line = f.readline()
178
+ line_index = 1
179
+ while line:
180
+ if line_index >= 0:
181
+ line = line.strip()
182
+ word_split = line.split(" ")
183
+ word = word_split[0].lower()
184
+ # 自定义发音词直接覆盖字典
185
+ g2p_dict[word] = [word_split[1:]]
186
+
187
+ line_index = line_index + 1
188
+ line = f.readline()
189
+
190
+ return g2p_dict
191
+
192
+
193
+ def cache_dict(g2p_dict, file_path):
194
+ with open(file_path, "wb") as pickle_file:
195
+ pickle.dump(g2p_dict, pickle_file)
196
+
197
+
198
+ def get_dict():
199
+ if os.path.exists(CACHE_PATH):
200
+ with open(CACHE_PATH, "rb") as pickle_file:
201
+ g2p_dict = pickle.load(pickle_file)
202
+ else:
203
+ g2p_dict = read_dict_new()
204
+ cache_dict(g2p_dict, CACHE_PATH)
205
+
206
+ g2p_dict = hot_reload_hot(g2p_dict)
207
+
208
+ return g2p_dict
209
+
210
+
211
+ def get_namedict():
212
+ if os.path.exists(NAMECACHE_PATH):
213
+ with open(NAMECACHE_PATH, "rb") as pickle_file:
214
+ name_dict = pickle.load(pickle_file)
215
+ else:
216
+ name_dict = {}
217
+
218
+ return name_dict
219
+
220
+
221
+ def text_normalize(text):
222
+ # todo: eng text normalize
223
+ # 适配中文及 g2p_en 标点
224
+ rep_map = {
225
+ "[;::��;]": ",",
226
+ '["’]': "'",
227
+ "。": ".",
228
+ "!": "!",
229
+ "?": "?",
230
+ }
231
+ for p, r in rep_map.items():
232
+ text = re.sub(p, r, text)
233
+
234
+ # 来自 g2p_en 文本格式化处理
235
+ # 增加大写兼容
236
+ text = unicode(text)
237
+ text = normalize_numbers(text)
238
+ text = ''.join(char for char in unicodedata.normalize('NFD', text)
239
+ if unicodedata.category(char) != 'Mn') # Strip accents
240
+ text = re.sub("[^ A-Za-z'.,?!\-]", "", text)
241
+ text = re.sub(r"(?i)i\.e\.", "that is", text)
242
+ text = re.sub(r"(?i)e\.g\.", "for example", text)
243
+
244
+ # 避免重复标点引起的参考泄露
245
+ text = replace_consecutive_punctuation(text)
246
+
247
+ return text
248
+
249
+
250
+ class en_G2p(G2p):
251
+ def __init__(self):
252
+ super().__init__()
253
+ # 分词初始化
254
+ wordsegment.load()
255
+
256
+ # 扩展过时字典, 添加姓名字典
257
+ self.cmu = get_dict()
258
+ self.namedict = get_namedict()
259
+
260
+ # 剔除读音错误的几个缩写
261
+ for word in ["AE", "AI", "AR", "IOS", "HUD", "OS"]:
262
+ del self.cmu[word.lower()]
263
+
264
+ # 修正多音字
265
+ self.homograph2features["read"] = (['R', 'IY1', 'D'], ['R', 'EH1', 'D'], 'VBP')
266
+ self.homograph2features["complex"] = (['K', 'AH0', 'M', 'P', 'L', 'EH1', 'K', 'S'], ['K', 'AA1', 'M', 'P', 'L', 'EH0', 'K', 'S'], 'JJ')
267
+
268
+
269
+ def __call__(self, text):
270
+ # tokenization
271
+ words = word_tokenize(text)
272
+ tokens = pos_tag(words) # tuples of (word, tag)
273
+
274
+ # steps
275
+ prons = []
276
+ for o_word, pos in tokens:
277
+ # 还原 g2p_en 小写操作逻辑
278
+ word = o_word.lower()
279
+
280
+ if re.search("[a-z]", word) is None:
281
+ pron = [word]
282
+ # 先把单字母推出去
283
+ elif len(word) == 1:
284
+ # 单读 A 发音修正, 这里需要原格式 o_word 判断大写
285
+ if o_word == "A":
286
+ pron = ['EY1']
287
+ else:
288
+ pron = self.cmu[word][0]
289
+ # g2p_en 原版多音字处理
290
+ elif word in self.homograph2features: # Check homograph
291
+ pron1, pron2, pos1 = self.homograph2features[word]
292
+ if pos.startswith(pos1):
293
+ pron = pron1
294
+ # pos1比pos长仅出现在read
295
+ elif len(pos) < len(pos1) and pos == pos1[:len(pos)]:
296
+ pron = pron1
297
+ else:
298
+ pron = pron2
299
+ else:
300
+ # 递归查找预测
301
+ pron = self.qryword(o_word)
302
+
303
+ prons.extend(pron)
304
+ prons.extend([" "])
305
+
306
+ return prons[:-1]
307
+
308
+
309
+ def qryword(self, o_word):
310
+ word = o_word.lower()
311
+
312
+ # 查字典, 单字母除外
313
+ if len(word) > 1 and word in self.cmu: # lookup CMU dict
314
+ return self.cmu[word][0]
315
+
316
+ # 单词仅首字母大写时查找姓名字典
317
+ if o_word.istitle() and word in self.namedict:
318
+ return self.namedict[word][0]
319
+
320
+ # oov 长度小于等于 3 直接读字母
321
+ if len(word) <= 3:
322
+ phones = []
323
+ for w in word:
324
+ # 单读 A 发音修正, 此处不存在大写的情况
325
+ if w == "a":
326
+ phones.extend(['EY1'])
327
+ elif not w.isalpha():
328
+ phones.extend([w])
329
+ else:
330
+ phones.extend(self.cmu[w][0])
331
+ return phones
332
+
333
+ # 尝试分离所有格
334
+ if re.match(r"^([a-z]+)('s)$", word):
335
+ phones = self.qryword(word[:-2])[:]
336
+ # P T K F TH HH 无声辅音结尾 's 发 ['S']
337
+ if phones[-1] in ['P', 'T', 'K', 'F', 'TH', 'HH']:
338
+ phones.extend(['S'])
339
+ # S Z SH ZH CH JH 擦声结尾 's 发 ['IH1', 'Z'] 或 ['AH0', 'Z']
340
+ elif phones[-1] in ['S', 'Z', 'SH', 'ZH', 'CH', 'JH']:
341
+ phones.extend(['AH0', 'Z'])
342
+ # B D G DH V M N NG L R W Y 有声辅音结尾 's 发 ['Z']
343
+ # AH0 AH1 AH2 EY0 EY1 EY2 AE0 AE1 AE2 EH0 EH1 EH2 OW0 OW1 OW2 UH0 UH1 UH2 IY0 IY1 IY2 AA0 AA1 AA2 AO0 AO1 AO2
344
+ # ER ER0 ER1 ER2 UW0 UW1 UW2 AY0 AY1 AY2 AW0 AW1 AW2 OY0 OY1 OY2 IH IH0 IH1 IH2 元音结尾 's 发 ['Z']
345
+ else:
346
+ phones.extend(['Z'])
347
+ return phones
348
+
349
+ # 尝试进行分词,应对复合词
350
+ comps = wordsegment.segment(word.lower())
351
+
352
+ # 无法分词的送回去预测
353
+ if len(comps)==1:
354
+ return self.predict(word)
355
+
356
+ # 可以分词的递归处理
357
+ return [phone for comp in comps for phone in self.qryword(comp)]
358
+
359
+
360
+ _g2p = en_G2p()
361
+
362
+
363
+ def g2p(text):
364
+ # g2p_en 整段推理,剔除不存在的arpa返回
365
+ phone_list = _g2p(text)
366
+ phones = [ph if ph != "<unk>" else "UNK" for ph in phone_list if ph not in [" ", "<pad>", "UW", "</s>", "<s>"]]
367
+
368
+ return replace_phs(phones)
369
+
370
+
371
+ if __name__ == "__main__":
372
+ print(g2p("hello"))
373
+ print(g2p(text_normalize("e.g. I used openai's AI tool to draw a picture.")))
374
+ print(g2p(text_normalize("In this; paper, we propose 1 DSPGAN, a GAN-based universal vocoder.")))
GPT_SoVITS/text/g2pw/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from text.g2pw.g2pw import *
GPT_SoVITS/text/g2pw/dataset.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """
15
+ Credits
16
+ This code is modified from https://github.com/GitYCC/g2pW
17
+ """
18
+ from typing import Dict
19
+ from typing import List
20
+ from typing import Tuple
21
+
22
+ import numpy as np
23
+
24
+ from .utils import tokenize_and_map
25
+
26
+ ANCHOR_CHAR = '▁'
27
+
28
+
29
+ def prepare_onnx_input(tokenizer,
30
+ labels: List[str],
31
+ char2phonemes: Dict[str, List[int]],
32
+ chars: List[str],
33
+ texts: List[str],
34
+ query_ids: List[int],
35
+ use_mask: bool=False,
36
+ window_size: int=None,
37
+ max_len: int=512) -> Dict[str, np.array]:
38
+ if window_size is not None:
39
+ truncated_texts, truncated_query_ids = _truncate_texts(
40
+ window_size=window_size, texts=texts, query_ids=query_ids)
41
+ input_ids = []
42
+ token_type_ids = []
43
+ attention_masks = []
44
+ phoneme_masks = []
45
+ char_ids = []
46
+ position_ids = []
47
+
48
+ for idx in range(len(texts)):
49
+ text = (truncated_texts if window_size else texts)[idx].lower()
50
+ query_id = (truncated_query_ids if window_size else query_ids)[idx]
51
+
52
+ try:
53
+ tokens, text2token, token2text = tokenize_and_map(
54
+ tokenizer=tokenizer, text=text)
55
+ except Exception:
56
+ print(f'warning: text "{text}" is invalid')
57
+ return {}
58
+
59
+ text, query_id, tokens, text2token, token2text = _truncate(
60
+ max_len=max_len,
61
+ text=text,
62
+ query_id=query_id,
63
+ tokens=tokens,
64
+ text2token=text2token,
65
+ token2text=token2text)
66
+
67
+ processed_tokens = ['[CLS]'] + tokens + ['[SEP]']
68
+
69
+ input_id = list(
70
+ np.array(tokenizer.convert_tokens_to_ids(processed_tokens)))
71
+ token_type_id = list(np.zeros((len(processed_tokens), ), dtype=int))
72
+ attention_mask = list(np.ones((len(processed_tokens), ), dtype=int))
73
+
74
+ query_char = text[query_id]
75
+ phoneme_mask = [1 if i in char2phonemes[query_char] else 0 for i in range(len(labels))] \
76
+ if use_mask else [1] * len(labels)
77
+ char_id = chars.index(query_char)
78
+ position_id = text2token[
79
+ query_id] + 1 # [CLS] token locate at first place
80
+
81
+ input_ids.append(input_id)
82
+ token_type_ids.append(token_type_id)
83
+ attention_masks.append(attention_mask)
84
+ phoneme_masks.append(phoneme_mask)
85
+ char_ids.append(char_id)
86
+ position_ids.append(position_id)
87
+
88
+ outputs = {
89
+ 'input_ids': np.array(input_ids).astype(np.int64),
90
+ 'token_type_ids': np.array(token_type_ids).astype(np.int64),
91
+ 'attention_masks': np.array(attention_masks).astype(np.int64),
92
+ 'phoneme_masks': np.array(phoneme_masks).astype(np.float32),
93
+ 'char_ids': np.array(char_ids).astype(np.int64),
94
+ 'position_ids': np.array(position_ids).astype(np.int64),
95
+ }
96
+ return outputs
97
+
98
+
99
+ def _truncate_texts(window_size: int, texts: List[str],
100
+ query_ids: List[int]) -> Tuple[List[str], List[int]]:
101
+ truncated_texts = []
102
+ truncated_query_ids = []
103
+ for text, query_id in zip(texts, query_ids):
104
+ start = max(0, query_id - window_size // 2)
105
+ end = min(len(text), query_id + window_size // 2)
106
+ truncated_text = text[start:end]
107
+ truncated_texts.append(truncated_text)
108
+
109
+ truncated_query_id = query_id - start
110
+ truncated_query_ids.append(truncated_query_id)
111
+ return truncated_texts, truncated_query_ids
112
+
113
+
114
+ def _truncate(max_len: int,
115
+ text: str,
116
+ query_id: int,
117
+ tokens: List[str],
118
+ text2token: List[int],
119
+ token2text: List[Tuple[int]]):
120
+ truncate_len = max_len - 2
121
+ if len(tokens) <= truncate_len:
122
+ return (text, query_id, tokens, text2token, token2text)
123
+
124
+ token_position = text2token[query_id]
125
+
126
+ token_start = token_position - truncate_len // 2
127
+ token_end = token_start + truncate_len
128
+ font_exceed_dist = -token_start
129
+ back_exceed_dist = token_end - len(tokens)
130
+ if font_exceed_dist > 0:
131
+ token_start += font_exceed_dist
132
+ token_end += font_exceed_dist
133
+ elif back_exceed_dist > 0:
134
+ token_start -= back_exceed_dist
135
+ token_end -= back_exceed_dist
136
+
137
+ start = token2text[token_start][0]
138
+ end = token2text[token_end - 1][1]
139
+
140
+ return (text[start:end], query_id - start, tokens[token_start:token_end], [
141
+ i - token_start if i is not None else None
142
+ for i in text2token[start:end]
143
+ ], [(s - start, e - start) for s, e in token2text[token_start:token_end]])
144
+
145
+
146
+ def get_phoneme_labels(polyphonic_chars: List[List[str]]
147
+ ) -> Tuple[List[str], Dict[str, List[int]]]:
148
+ labels = sorted(list(set([phoneme for char, phoneme in polyphonic_chars])))
149
+ char2phonemes = {}
150
+ for char, phoneme in polyphonic_chars:
151
+ if char not in char2phonemes:
152
+ char2phonemes[char] = []
153
+ char2phonemes[char].append(labels.index(phoneme))
154
+ return labels, char2phonemes
155
+
156
+
157
+ def get_char_phoneme_labels(polyphonic_chars: List[List[str]]
158
+ ) -> Tuple[List[str], Dict[str, List[int]]]:
159
+ labels = sorted(
160
+ list(set([f'{char} {phoneme}' for char, phoneme in polyphonic_chars])))
161
+ char2phonemes = {}
162
+ for char, phoneme in polyphonic_chars:
163
+ if char not in char2phonemes:
164
+ char2phonemes[char] = []
165
+ char2phonemes[char].append(labels.index(f'{char} {phoneme}'))
166
+ return labels, char2phonemes
GPT_SoVITS/text/g2pw/g2pw.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This code is modified from https://github.com/mozillazg/pypinyin-g2pW
2
+
3
+ import pickle
4
+ import os
5
+
6
+ from pypinyin.constants import RE_HANS
7
+ from pypinyin.core import Pinyin, Style
8
+ from pypinyin.seg.simpleseg import simple_seg
9
+ from pypinyin.converter import UltimateConverter
10
+ from pypinyin.contrib.tone_convert import to_tone
11
+ from .onnx_api import G2PWOnnxConverter
12
+
13
+ current_file_path = os.path.dirname(__file__)
14
+ CACHE_PATH = os.path.join(current_file_path, "polyphonic.pickle")
15
+ PP_DICT_PATH = os.path.join(current_file_path, "polyphonic.rep")
16
+ PP_FIX_DICT_PATH = os.path.join(current_file_path, "polyphonic-fix.rep")
17
+
18
+
19
+ class G2PWPinyin(Pinyin):
20
+ def __init__(self, model_dir='G2PWModel/', model_source=None,
21
+ enable_non_tradional_chinese=True,
22
+ v_to_u=False, neutral_tone_with_five=False, tone_sandhi=False, **kwargs):
23
+ self._g2pw = G2PWOnnxConverter(
24
+ model_dir=model_dir,
25
+ style='pinyin',
26
+ model_source=model_source,
27
+ enable_non_tradional_chinese=enable_non_tradional_chinese,
28
+ )
29
+ self._converter = Converter(
30
+ self._g2pw, v_to_u=v_to_u,
31
+ neutral_tone_with_five=neutral_tone_with_five,
32
+ tone_sandhi=tone_sandhi,
33
+ )
34
+
35
+ def get_seg(self, **kwargs):
36
+ return simple_seg
37
+
38
+
39
+ class Converter(UltimateConverter):
40
+ def __init__(self, g2pw_instance, v_to_u=False,
41
+ neutral_tone_with_five=False,
42
+ tone_sandhi=False, **kwargs):
43
+ super(Converter, self).__init__(
44
+ v_to_u=v_to_u,
45
+ neutral_tone_with_five=neutral_tone_with_five,
46
+ tone_sandhi=tone_sandhi, **kwargs)
47
+
48
+ self._g2pw = g2pw_instance
49
+
50
+ def convert(self, words, style, heteronym, errors, strict, **kwargs):
51
+ pys = []
52
+ if RE_HANS.match(words):
53
+ pys = self._to_pinyin(words, style=style, heteronym=heteronym,
54
+ errors=errors, strict=strict)
55
+ post_data = self.post_pinyin(words, heteronym, pys)
56
+ if post_data is not None:
57
+ pys = post_data
58
+
59
+ pys = self.convert_styles(
60
+ pys, words, style, heteronym, errors, strict)
61
+
62
+ else:
63
+ py = self.handle_nopinyin(words, style=style, errors=errors,
64
+ heteronym=heteronym, strict=strict)
65
+ if py:
66
+ pys.extend(py)
67
+
68
+ return _remove_dup_and_empty(pys)
69
+
70
+ def _to_pinyin(self, han, style, heteronym, errors, strict, **kwargs):
71
+ pinyins = []
72
+
73
+ g2pw_pinyin = self._g2pw(han)
74
+
75
+ if not g2pw_pinyin: # g2pw 不支持的汉字改为使用 pypinyin 原有逻辑
76
+ return super(Converter, self).convert(
77
+ han, Style.TONE, heteronym, errors, strict, **kwargs)
78
+
79
+ for i, item in enumerate(g2pw_pinyin[0]):
80
+ if item is None: # g2pw 不支持的汉字改为使用 pypinyin 原有逻辑
81
+ py = super(Converter, self).convert(
82
+ han[i], Style.TONE, heteronym, errors, strict, **kwargs)
83
+ pinyins.extend(py)
84
+ else:
85
+ pinyins.append([to_tone(item)])
86
+
87
+ return pinyins
88
+
89
+
90
+ def _remove_dup_items(lst, remove_empty=False):
91
+ new_lst = []
92
+ for item in lst:
93
+ if remove_empty and not item:
94
+ continue
95
+ if item not in new_lst:
96
+ new_lst.append(item)
97
+ return new_lst
98
+
99
+
100
+ def _remove_dup_and_empty(lst_list):
101
+ new_lst_list = []
102
+ for lst in lst_list:
103
+ lst = _remove_dup_items(lst, remove_empty=True)
104
+ if lst:
105
+ new_lst_list.append(lst)
106
+ else:
107
+ new_lst_list.append([''])
108
+
109
+ return new_lst_list
110
+
111
+
112
+ def cache_dict(polyphonic_dict, file_path):
113
+ with open(file_path, "wb") as pickle_file:
114
+ pickle.dump(polyphonic_dict, pickle_file)
115
+
116
+
117
+ def get_dict():
118
+ if os.path.exists(CACHE_PATH):
119
+ with open(CACHE_PATH, "rb") as pickle_file:
120
+ polyphonic_dict = pickle.load(pickle_file)
121
+ else:
122
+ polyphonic_dict = read_dict()
123
+ cache_dict(polyphonic_dict, CACHE_PATH)
124
+
125
+ return polyphonic_dict
126
+
127
+
128
+ def read_dict():
129
+ polyphonic_dict = {}
130
+ with open(PP_DICT_PATH,encoding="utf-8") as f:
131
+ line = f.readline()
132
+ while line:
133
+ key, value_str = line.split(':')
134
+ value = eval(value_str.strip())
135
+ polyphonic_dict[key.strip()] = value
136
+ line = f.readline()
137
+ with open(PP_FIX_DICT_PATH,encoding="utf-8") as f:
138
+ line = f.readline()
139
+ while line:
140
+ key, value_str = line.split(':')
141
+ value = eval(value_str.strip())
142
+ polyphonic_dict[key.strip()] = value
143
+ line = f.readline()
144
+ return polyphonic_dict
145
+
146
+
147
+ def correct_pronunciation(word,word_pinyins):
148
+ if word in pp_dict:
149
+ word_pinyins = pp_dict[word]
150
+
151
+ return word_pinyins
152
+
153
+
154
+ pp_dict = get_dict()
GPT_SoVITS/text/g2pw/onnx_api.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This code is modified from https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/g2pw
2
+ # This code is modified from https://github.com/GitYCC/g2pW
3
+
4
+ import warnings
5
+ warnings.filterwarnings("ignore")
6
+ import json
7
+ import os
8
+ import zipfile,requests
9
+ from typing import Any
10
+ from typing import Dict
11
+ from typing import List
12
+ from typing import Tuple
13
+
14
+ import numpy as np
15
+ import onnxruntime
16
+ onnxruntime.set_default_logger_severity(3)
17
+ from opencc import OpenCC
18
+ from transformers import AutoTokenizer
19
+ from pypinyin import pinyin
20
+ from pypinyin import Style
21
+
22
+ from .dataset import get_char_phoneme_labels
23
+ from .dataset import get_phoneme_labels
24
+ from .dataset import prepare_onnx_input
25
+ from .utils import load_config
26
+ from ..zh_normalization.char_convert import tranditional_to_simplified
27
+
28
+ model_version = '1.1'
29
+
30
+
31
+ def predict(session, onnx_input: Dict[str, Any],
32
+ labels: List[str]) -> Tuple[List[str], List[float]]:
33
+ all_preds = []
34
+ all_confidences = []
35
+ probs = session.run([], {
36
+ "input_ids": onnx_input['input_ids'],
37
+ "token_type_ids": onnx_input['token_type_ids'],
38
+ "attention_mask": onnx_input['attention_masks'],
39
+ "phoneme_mask": onnx_input['phoneme_masks'],
40
+ "char_ids": onnx_input['char_ids'],
41
+ "position_ids": onnx_input['position_ids']
42
+ })[0]
43
+
44
+ preds = np.argmax(probs, axis=1).tolist()
45
+ max_probs = []
46
+ for index, arr in zip(preds, probs.tolist()):
47
+ max_probs.append(arr[index])
48
+ all_preds += [labels[pred] for pred in preds]
49
+ all_confidences += max_probs
50
+
51
+ return all_preds, all_confidences
52
+
53
+
54
+ def download_and_decompress(model_dir: str='G2PWModel/'):
55
+ if not os.path.exists(model_dir):
56
+ parent_directory = os.path.dirname(model_dir)
57
+ zip_dir = os.path.join(parent_directory,"G2PWModel_1.1.zip")
58
+ extract_dir = os.path.join(parent_directory,"G2PWModel_1.1")
59
+ extract_dir_new = os.path.join(parent_directory,"G2PWModel")
60
+ print("Downloading g2pw model...")
61
+ modelscope_url = "https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip"
62
+ with requests.get(modelscope_url, stream=True) as r:
63
+ r.raise_for_status()
64
+ with open(zip_dir, 'wb') as f:
65
+ for chunk in r.iter_content(chunk_size=8192):
66
+ if chunk:
67
+ f.write(chunk)
68
+
69
+ print("Extracting g2pw model...")
70
+ with zipfile.ZipFile(zip_dir, "r") as zip_ref:
71
+ zip_ref.extractall(parent_directory)
72
+
73
+ os.rename(extract_dir, extract_dir_new)
74
+
75
+ return model_dir
76
+
77
+ class G2PWOnnxConverter:
78
+ def __init__(self,
79
+ model_dir: str='G2PWModel/',
80
+ style: str='bopomofo',
81
+ model_source: str=None,
82
+ enable_non_tradional_chinese: bool=False):
83
+ uncompress_path = download_and_decompress(model_dir)
84
+
85
+ sess_options = onnxruntime.SessionOptions()
86
+ sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
87
+ sess_options.execution_mode = onnxruntime.ExecutionMode.ORT_SEQUENTIAL
88
+ sess_options.intra_op_num_threads = 2
89
+ try:
90
+ self.session_g2pW = onnxruntime.InferenceSession(os.path.join(uncompress_path, 'g2pW.onnx'),sess_options=sess_options, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
91
+ except:
92
+ self.session_g2pW = onnxruntime.InferenceSession(os.path.join(uncompress_path, 'g2pW.onnx'),sess_options=sess_options, providers=['CPUExecutionProvider'])
93
+ self.config = load_config(
94
+ config_path=os.path.join(uncompress_path, 'config.py'),
95
+ use_default=True)
96
+
97
+ self.model_source = model_source if model_source else self.config.model_source
98
+ self.enable_opencc = enable_non_tradional_chinese
99
+
100
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_source)
101
+
102
+ polyphonic_chars_path = os.path.join(uncompress_path,
103
+ 'POLYPHONIC_CHARS.txt')
104
+ monophonic_chars_path = os.path.join(uncompress_path,
105
+ 'MONOPHONIC_CHARS.txt')
106
+ self.polyphonic_chars = [
107
+ line.split('\t')
108
+ for line in open(polyphonic_chars_path, encoding='utf-8').read()
109
+ .strip().split('\n')
110
+ ]
111
+ self.non_polyphonic = {
112
+ '一', '不', '和', '咋', '嗲', '剖', '差', '攢', '倒', '難', '奔', '勁', '拗',
113
+ '肖', '瘙', '誒', '泊', '听', '噢'
114
+ }
115
+ self.non_monophonic = {'似', '攢'}
116
+ self.monophonic_chars = [
117
+ line.split('\t')
118
+ for line in open(monophonic_chars_path, encoding='utf-8').read()
119
+ .strip().split('\n')
120
+ ]
121
+ self.labels, self.char2phonemes = get_char_phoneme_labels(
122
+ polyphonic_chars=self.polyphonic_chars
123
+ ) if self.config.use_char_phoneme else get_phoneme_labels(
124
+ polyphonic_chars=self.polyphonic_chars)
125
+
126
+ self.chars = sorted(list(self.char2phonemes.keys()))
127
+
128
+ self.polyphonic_chars_new = set(self.chars)
129
+ for char in self.non_polyphonic:
130
+ if char in self.polyphonic_chars_new:
131
+ self.polyphonic_chars_new.remove(char)
132
+
133
+ self.monophonic_chars_dict = {
134
+ char: phoneme
135
+ for char, phoneme in self.monophonic_chars
136
+ }
137
+ for char in self.non_monophonic:
138
+ if char in self.monophonic_chars_dict:
139
+ self.monophonic_chars_dict.pop(char)
140
+
141
+ self.pos_tags = [
142
+ 'UNK', 'A', 'C', 'D', 'I', 'N', 'P', 'T', 'V', 'DE', 'SHI'
143
+ ]
144
+
145
+ with open(
146
+ os.path.join(uncompress_path,
147
+ 'bopomofo_to_pinyin_wo_tune_dict.json'),
148
+ 'r',
149
+ encoding='utf-8') as fr:
150
+ self.bopomofo_convert_dict = json.load(fr)
151
+ self.style_convert_func = {
152
+ 'bopomofo': lambda x: x,
153
+ 'pinyin': self._convert_bopomofo_to_pinyin,
154
+ }[style]
155
+
156
+ with open(
157
+ os.path.join(uncompress_path, 'char_bopomofo_dict.json'),
158
+ 'r',
159
+ encoding='utf-8') as fr:
160
+ self.char_bopomofo_dict = json.load(fr)
161
+
162
+ if self.enable_opencc:
163
+ self.cc = OpenCC('s2tw')
164
+
165
+ def _convert_bopomofo_to_pinyin(self, bopomofo: str) -> str:
166
+ tone = bopomofo[-1]
167
+ assert tone in '12345'
168
+ component = self.bopomofo_convert_dict.get(bopomofo[:-1])
169
+ if component:
170
+ return component + tone
171
+ else:
172
+ print(f'Warning: "{bopomofo}" cannot convert to pinyin')
173
+ return None
174
+
175
+ def __call__(self, sentences: List[str]) -> List[List[str]]:
176
+ if isinstance(sentences, str):
177
+ sentences = [sentences]
178
+
179
+ if self.enable_opencc:
180
+ translated_sentences = []
181
+ for sent in sentences:
182
+ translated_sent = self.cc.convert(sent)
183
+ assert len(translated_sent) == len(sent)
184
+ translated_sentences.append(translated_sent)
185
+ sentences = translated_sentences
186
+
187
+ texts, query_ids, sent_ids, partial_results = self._prepare_data(
188
+ sentences=sentences)
189
+ if len(texts) == 0:
190
+ # sentences no polyphonic words
191
+ return partial_results
192
+
193
+ onnx_input = prepare_onnx_input(
194
+ tokenizer=self.tokenizer,
195
+ labels=self.labels,
196
+ char2phonemes=self.char2phonemes,
197
+ chars=self.chars,
198
+ texts=texts,
199
+ query_ids=query_ids,
200
+ use_mask=self.config.use_mask,
201
+ window_size=None)
202
+
203
+ preds, confidences = predict(
204
+ session=self.session_g2pW,
205
+ onnx_input=onnx_input,
206
+ labels=self.labels)
207
+ if self.config.use_char_phoneme:
208
+ preds = [pred.split(' ')[1] for pred in preds]
209
+
210
+ results = partial_results
211
+ for sent_id, query_id, pred in zip(sent_ids, query_ids, preds):
212
+ results[sent_id][query_id] = self.style_convert_func(pred)
213
+
214
+ return results
215
+
216
+ def _prepare_data(
217
+ self, sentences: List[str]
218
+ ) -> Tuple[List[str], List[int], List[int], List[List[str]]]:
219
+ texts, query_ids, sent_ids, partial_results = [], [], [], []
220
+ for sent_id, sent in enumerate(sentences):
221
+ # pypinyin works well for Simplified Chinese than Traditional Chinese
222
+ sent_s = tranditional_to_simplified(sent)
223
+ pypinyin_result = pinyin(
224
+ sent_s, neutral_tone_with_five=True, style=Style.TONE3)
225
+ partial_result = [None] * len(sent)
226
+ for i, char in enumerate(sent):
227
+ if char in self.polyphonic_chars_new:
228
+ texts.append(sent)
229
+ query_ids.append(i)
230
+ sent_ids.append(sent_id)
231
+ elif char in self.monophonic_chars_dict:
232
+ partial_result[i] = self.style_convert_func(
233
+ self.monophonic_chars_dict[char])
234
+ elif char in self.char_bopomofo_dict:
235
+ partial_result[i] = pypinyin_result[i][0]
236
+ # partial_result[i] = self.style_convert_func(self.char_bopomofo_dict[char][0])
237
+ else:
238
+ partial_result[i] = pypinyin_result[i][0]
239
+
240
+ partial_results.append(partial_result)
241
+ return texts, query_ids, sent_ids, partial_results
GPT_SoVITS/text/g2pw/polyphonic-fix.rep ADDED
The diff for this file is too large to render. See raw diff
 
GPT_SoVITS/text/g2pw/polyphonic.rep ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 湖泊: ['hu2','po1']
2
+ 地壳: ['di4','qiao4']
3
+ 柏树: ['bai3','shu4']
4
+ 曝光: ['bao4','guang1']
5
+ 弹力: ['tan2','li4']
6
+ 字帖: ['zi4','tie4']
7
+ 口吃: ['kou3','chi1']
8
+ 包扎: ['bao1','za1']
9
+ 哪吒: ['ne2','zha1']
10
+ 说服: ['shuo1','fu2']
11
+ 识字: ['shi2','zi4']
12
+ 骨头: ['gu3','tou5']
13
+ 对称: ['dui4','chen4']
14
+ 口供: ['kou3','gong4']
15
+ 抹布: ['ma1','bu4']
16
+ 露背: ['lu4','bei4']
17
+ 圈养: ['juan4', 'yang3']
18
+ 眼眶: ['yan3', 'kuang4']
19
+ 品行: ['pin3','xing2']
20
+ 颤抖: ['chan4','dou3']
21
+ 差不多: ['cha4','bu5','duo1']
22
+ 鸭绿江: ['ya1','lu4','jiang1']
23
+ 撒切尔: ['sa4','qie4','er3']
24
+ 比比皆是: ['bi3','bi3','jie1','shi4']
25
+ 身无长物: ['shen1','wu2','chang2','wu4']
26
+ 手里: ['shou2','li3']
27
+ 关卡: ['guan1','qia3']
28
+ 怀揣: ['huai2','chuai1']
29
+ 挑剔: ['tiao1','ti4']
30
+ 供称: ['gong4','cheng1']
31
+ 作坊: ['zuo1', 'fang5']
32
+ 中医: ['zhong1','yi1']
33
+ 嚷嚷: ['rang1','rang5']
34
+ 商厦: ['shang1','sha4']
35
+ 大厦: ['da4','sha4']
36
+ 刹车: ['sha1','che1']
37
+ 嘚瑟: ['de4','se5']
38
+ 朝鲜: ['chao2','xian3']
39
+ 阿房宫: ['e1','pang2','gong1']
40
+ 阿胶: ['e1','jiao1']
41
+ 咖喱: ['ga1','li5']
42
+ 时分: ['shi2','fen1']
43
+ 蚌埠: ['beng4','bu4']
44
+ 驯服: ['xun4','fu2']
45
+ 幸免于难: ['xing4','mian3','yu2','nan4']
46
+ 恶行: ['e4','xing2']
47
+ 唉: ['ai4']
48
+ 扎实: ['zha1','shi2']
49
+ 干将: ['gan4','jiang4']
50
+ 陈威行: ['chen2', 'wei1', 'hang2']
51
+ 郭晟: ['guo1', 'sheng4']
52
+ 中标: ['zhong4', 'biao1']
53
+ 抗住: ['kang2', 'zhu4']
GPT_SoVITS/text/g2pw/utils.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """
15
+ Credits
16
+ This code is modified from https://github.com/GitYCC/g2pW
17
+ """
18
+ import os
19
+ import re
20
+
21
+
22
+ def wordize_and_map(text: str):
23
+ words = []
24
+ index_map_from_text_to_word = []
25
+ index_map_from_word_to_text = []
26
+ while len(text) > 0:
27
+ match_space = re.match(r'^ +', text)
28
+ if match_space:
29
+ space_str = match_space.group(0)
30
+ index_map_from_text_to_word += [None] * len(space_str)
31
+ text = text[len(space_str):]
32
+ continue
33
+
34
+ match_en = re.match(r'^[a-zA-Z0-9]+', text)
35
+ if match_en:
36
+ en_word = match_en.group(0)
37
+
38
+ word_start_pos = len(index_map_from_text_to_word)
39
+ word_end_pos = word_start_pos + len(en_word)
40
+ index_map_from_word_to_text.append((word_start_pos, word_end_pos))
41
+
42
+ index_map_from_text_to_word += [len(words)] * len(en_word)
43
+
44
+ words.append(en_word)
45
+ text = text[len(en_word):]
46
+ else:
47
+ word_start_pos = len(index_map_from_text_to_word)
48
+ word_end_pos = word_start_pos + 1
49
+ index_map_from_word_to_text.append((word_start_pos, word_end_pos))
50
+
51
+ index_map_from_text_to_word += [len(words)]
52
+
53
+ words.append(text[0])
54
+ text = text[1:]
55
+ return words, index_map_from_text_to_word, index_map_from_word_to_text
56
+
57
+
58
+ def tokenize_and_map(tokenizer, text: str):
59
+ words, text2word, word2text = wordize_and_map(text=text)
60
+
61
+ tokens = []
62
+ index_map_from_token_to_text = []
63
+ for word, (word_start, word_end) in zip(words, word2text):
64
+ word_tokens = tokenizer.tokenize(word)
65
+
66
+ if len(word_tokens) == 0 or word_tokens == ['[UNK]']:
67
+ index_map_from_token_to_text.append((word_start, word_end))
68
+ tokens.append('[UNK]')
69
+ else:
70
+ current_word_start = word_start
71
+ for word_token in word_tokens:
72
+ word_token_len = len(re.sub(r'^##', '', word_token))
73
+ index_map_from_token_to_text.append(
74
+ (current_word_start, current_word_start + word_token_len))
75
+ current_word_start = current_word_start + word_token_len
76
+ tokens.append(word_token)
77
+
78
+ index_map_from_text_to_token = text2word
79
+ for i, (token_start, token_end) in enumerate(index_map_from_token_to_text):
80
+ for token_pos in range(token_start, token_end):
81
+ index_map_from_text_to_token[token_pos] = i
82
+
83
+ return tokens, index_map_from_text_to_token, index_map_from_token_to_text
84
+
85
+
86
+ def _load_config(config_path: os.PathLike):
87
+ import importlib.util
88
+ spec = importlib.util.spec_from_file_location('__init__', config_path)
89
+ config = importlib.util.module_from_spec(spec)
90
+ spec.loader.exec_module(config)
91
+ return config
92
+
93
+
94
+ default_config_dict = {
95
+ 'manual_seed': 1313,
96
+ 'model_source': 'bert-base-chinese',
97
+ 'window_size': 32,
98
+ 'num_workers': 2,
99
+ 'use_mask': True,
100
+ 'use_char_phoneme': False,
101
+ 'use_conditional': True,
102
+ 'param_conditional': {
103
+ 'affect_location': 'softmax',
104
+ 'bias': True,
105
+ 'char-linear': True,
106
+ 'pos-linear': False,
107
+ 'char+pos-second': True,
108
+ 'char+pos-second_lowrank': False,
109
+ 'lowrank_size': 0,
110
+ 'char+pos-second_fm': False,
111
+ 'fm_size': 0,
112
+ 'fix_mode': None,
113
+ 'count_json': 'train.count.json'
114
+ },
115
+ 'lr': 5e-5,
116
+ 'val_interval': 200,
117
+ 'num_iter': 10000,
118
+ 'use_focal': False,
119
+ 'param_focal': {
120
+ 'alpha': 0.0,
121
+ 'gamma': 0.7
122
+ },
123
+ 'use_pos': True,
124
+ 'param_pos ': {
125
+ 'weight': 0.1,
126
+ 'pos_joint_training': True,
127
+ 'train_pos_path': 'train.pos',
128
+ 'valid_pos_path': 'dev.pos',
129
+ 'test_pos_path': 'test.pos'
130
+ }
131
+ }
132
+
133
+
134
+ def load_config(config_path: os.PathLike, use_default: bool=False):
135
+ config = _load_config(config_path)
136
+ if use_default:
137
+ for attr, val in default_config_dict.items():
138
+ if not hasattr(config, attr):
139
+ setattr(config, attr, val)
140
+ elif isinstance(val, dict):
141
+ d = getattr(config, attr)
142
+ for dict_k, dict_v in val.items():
143
+ if dict_k not in d:
144
+ d[dict_k] = dict_v
145
+ return config
GPT_SoVITS/text/hindi.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from typing import List, Set
3
+
4
+ # Hindi text normalization rules
5
+ _hindi_numbers = "०१२३४५६७८९"
6
+ _english_numbers = "0123456789"
7
+ _number_map = str.maketrans(_hindi_numbers, _english_numbers)
8
+
9
+ # Common abbreviations and their expansions
10
+ _abbreviations = {
11
+ # Titles and honorifics
12
+ "डॉ": "डॉक्टर",
13
+ "श्री": "श्रीमान",
14
+ "श्रीमती": "श्रीमती",
15
+ "कु": "कुमारी",
16
+ "प्रो": "प्रोफेसर",
17
+ "चौ": "चौधरी",
18
+ "स्व": "स्वर्गीय",
19
+
20
+ # Common organizations
21
+ "भा": "भारत",
22
+ "सं": "संघ",
23
+ "वि": "विश्वविद्यालय",
24
+ "म": "महा",
25
+
26
+ # Common words
27
+ "क्र": "क्रमांक",
28
+ "रु": "रुपये",
29
+ "ज़ि": "ज़िला",
30
+ "उ": "उत्तर",
31
+ "द": "दक्षिण",
32
+ "पू": "पूर्व",
33
+ "प": "पश्चिम"
34
+ }
35
+
36
+ # Common conjunct consonants (consonant clusters)
37
+ _common_conjuncts = {
38
+ # क-based conjuncts
39
+ "क्क", "क्त", "क्र", "क्ल", "क्व", "क्ष", "क्स",
40
+ # ग-based conjuncts
41
+ "ग्र", "ग्ल", "ग्व", "ग्न", "ग्म",
42
+ # च-based conjuncts
43
+ "च्च", "च्छ", "च्य", "च्र",
44
+ # ज-based conjuncts
45
+ "ज्ज", "ज्ञ", "ज्य", "ज्र", "ज्व",
46
+ # त-based conjuncts
47
+ "त्त", "त्र", "त्य", "त्व", "त्न", "त्म",
48
+ # द-based conjuncts
49
+ "द्द", "द्य", "द्व", "द्र", "द्म", "द्ध",
50
+ # न-based conjuncts
51
+ "न्न", "न्त", "न्द", "न्य", "न्र", "न्व",
52
+ # प-based conjuncts
53
+ "प्प", "प्त", "प्र", "प्ल", "प्स",
54
+ # ब-based conjuncts
55
+ "ब्र", "ब्ल", "ब्ज",
56
+ # म-based conjuncts
57
+ "म्प", "म्ब", "म्म", "म्ल", "म्र",
58
+ # य-based conjuncts
59
+ "य्य", "य्र",
60
+ # र-based conjuncts (reph forms)
61
+ "र्क", "र्ग", "र्च", "र्ज", "र्त", "र्द", "र्प", "र्ब", "र्म", "र्य", "र्ल", "र्व", "र्श", "र्स", "र्ह",
62
+ # ल-based conjuncts
63
+ "ल्क", "ल्ग", "ल्ट", "ल्ड", "ल्प", "ल्म", "ल्ल", "ल्व",
64
+ # श-based conjuncts
65
+ "श्च", "श्न", "श्प", "श्म", "श्य", "श्र", "श्ल", "श्व",
66
+ # स-based conjuncts
67
+ "स्क", "स्ट", "स्त", "स्थ", "स्न", "स्प", "स्फ", "स्म", "स्य", "स्र", "स्व", "स्स",
68
+ # ह-based conjuncts
69
+ "ह्य", "ह्र", "ह्व", "ह्ल", "ह्न", "ह्म"
70
+ }
71
+
72
+ def _is_final_position(text: str, pos: int) -> bool:
73
+ """Check if the position is at the end of a word."""
74
+ return pos == len(text) - 1 or text[pos + 1] in {' ', ',', '.', '!', '?', '-'}
75
+
76
+ def _is_light_syllable(text: str, pos: int) -> bool:
77
+ """Check if the syllable at given position is light (no long vowels or conjuncts)."""
78
+ if pos >= len(text) - 1:
79
+ return True
80
+ next_char = text[pos + 1]
81
+ return not (('\u093E' <= next_char <= '\u094C') or next_char == '\u094D')
82
+
83
+ def get_schwa_deletion_positions(text: str) -> Set[int]:
84
+ """Determine positions where schwa should be deleted in Hindi words.
85
+ Enhanced with more accurate rules."""
86
+ positions = set()
87
+ words = text.split()
88
+
89
+ for word in words:
90
+ word_start = text.find(word)
91
+ length = len(word)
92
+
93
+ for i in range(length):
94
+ pos = word_start + i
95
+
96
+ # Basic conditions for schwa deletion
97
+ if ('\u0915' <= word[i] <= '\u0939' and # Current char is consonant
98
+ i < length - 1 and
99
+ not '\u093E' <= word[i + 1] <= '\u094D'): # Next char is not a vowel mark
100
+
101
+ # Rule 1: Delete schwa in final position of word
102
+ if i == length - 1:
103
+ positions.add(pos)
104
+ continue
105
+
106
+ # Rule 2: Delete schwa between consonants in non-final light syllables
107
+ if (i < length - 2 and
108
+ '\u0915' <= word[i + 1] <= '\u0939' and # Next char is consonant
109
+ _is_light_syllable(word, i)):
110
+ positions.add(pos)
111
+ continue
112
+
113
+ # Rule 3: Delete schwa in compound words at morpheme boundaries
114
+ if (i < length - 2 and
115
+ word[i + 1] == '\u094D' and # Virama
116
+ '\u0915' <= word[i + 2] <= '\u0939'): # Followed by consonant
117
+ positions.add(pos)
118
+ continue
119
+
120
+ return positions
121
+
122
+ def normalize_hindi_text(text: str) -> str:
123
+ """Normalize Hindi text by applying various rules."""
124
+ # Convert Hindi numbers to English numbers
125
+ text = text.translate(_number_map)
126
+
127
+ # Replace abbreviations with their full forms
128
+ for abbr, full in _abbreviations.items():
129
+ text = re.sub(rf'\b{abbr}\b', full, text)
130
+
131
+ # Remove extra spaces
132
+ text = re.sub(r'\s+', ' ', text)
133
+
134
+ # Normalize chandrabindu to anusvara
135
+ text = text.replace('\u0901', '\u0902')
136
+
137
+ # Normalize nukta variations
138
+ nukta_chars = {
139
+ 'क़': 'क', 'ख़': 'ख', 'ग़': 'ग', 'ज़': 'ज',
140
+ 'ड़': 'ड', 'ढ़': 'ढ', 'फ़': 'फ'
141
+ }
142
+ for nuk, base in nukta_chars.items():
143
+ text = text.replace(nuk, base)
144
+
145
+ # Remove any non-Devanagari characters except basic punctuation
146
+ text = re.sub(r'[^\u0900-\u097F\s.,!?-]', '', text)
147
+
148
+ return text.strip()
149
+
150
+ def hindi_to_phonemes(text: str) -> str:
151
+ """Convert Hindi text to phonemes."""
152
+ text = normalize_hindi_text(text)
153
+ phonemes = []
154
+
155
+ i = 0
156
+ while i < len(text):
157
+ char = text[i]
158
+
159
+ # Skip spaces and punctuation
160
+ if char in ' .,!?-':
161
+ if char == ' ':
162
+ phonemes.append('SP')
163
+ else:
164
+ phonemes.append(char)
165
+ i += 1
166
+ continue
167
+
168
+ # Handle consonants
169
+ if '\u0915' <= char <= '\u0939':
170
+ # Check for conjuncts
171
+ if i + 2 < len(text) and text[i + 1] == '\u094D':
172
+ # Look ahead for multi-consonant conjuncts
173
+ j = i + 2
174
+ conjunct = text[i:j + 1]
175
+ while (j < len(text) and text[j] != ' ' and
176
+ conjunct in _common_conjuncts):
177
+ j += 1
178
+ if j < len(text) and text[j-1] == '\u094D':
179
+ conjunct = text[i:j + 1]
180
+ else:
181
+ break
182
+
183
+ if conjunct[:-1] in _common_conjuncts:
184
+ phonemes.append(conjunct[:-1])
185
+ i = j
186
+ else:
187
+ # Handle as single consonant if not a known conjunct
188
+ phonemes.append(char)
189
+ i += 1
190
+ else:
191
+ # Single consonant
192
+ phonemes.append(char)
193
+
194
+ # Check for vowel marks
195
+ if i + 1 < len(text) and '\u093E' <= text[i + 1] <= '\u094C':
196
+ phonemes.append(text[i + 1])
197
+ i += 2
198
+ else:
199
+ # Add implicit 'अ' if no vowel mark and not a schwa deletion position
200
+ if i not in get_schwa_deletion_positions(text):
201
+ phonemes.append('अ')
202
+ i += 1
203
+
204
+ # Handle independent vowels
205
+ elif '\u0904' <= char <= '\u0914':
206
+ phonemes.append(char)
207
+ i += 1
208
+
209
+ # Handle anusvara and visarga
210
+ elif char in ['\u0902', '\u0903']:
211
+ phonemes.append(char)
212
+ i += 1
213
+
214
+ else:
215
+ i += 1
216
+
217
+ return ' '.join(phonemes)
218
+
219
+ def get_phoneme_sequence(text: str) -> List[str]:
220
+ """Convert text to a sequence of phonemes for the model."""
221
+ phoneme_string = hindi_to_phonemes(text)
222
+ return phoneme_string.split()
GPT_SoVITS/text/japanese.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # modified from https://github.com/CjangCjengh/vits/blob/main/text/japanese.py
2
+ import re
3
+ import os
4
+ import hashlib
5
+ try:
6
+ import pyopenjtalk
7
+ current_file_path = os.path.dirname(__file__)
8
+ def get_hash(fp: str) -> str:
9
+ hash_md5 = hashlib.md5()
10
+ with open(fp, "rb") as f:
11
+ for chunk in iter(lambda: f.read(4096), b""):
12
+ hash_md5.update(chunk)
13
+ return hash_md5.hexdigest()
14
+
15
+ USERDIC_CSV_PATH = os.path.join(current_file_path, "ja_userdic", "userdict.csv")
16
+ USERDIC_BIN_PATH = os.path.join(current_file_path, "ja_userdic", "user.dict")
17
+ USERDIC_HASH_PATH = os.path.join(current_file_path, "ja_userdic", "userdict.md5")
18
+ # 如果没有用户词典,就生成一个;如果有,就检查md5,如果不一样,就重新生成
19
+ if os.path.exists(USERDIC_CSV_PATH):
20
+ if not os.path.exists(USERDIC_BIN_PATH) or get_hash(USERDIC_CSV_PATH) != open(USERDIC_HASH_PATH, "r",encoding='utf-8').read():
21
+ pyopenjtalk.mecab_dict_index(USERDIC_CSV_PATH, USERDIC_BIN_PATH)
22
+ with open(USERDIC_HASH_PATH, "w", encoding='utf-8') as f:
23
+ f.write(get_hash(USERDIC_CSV_PATH))
24
+
25
+ if os.path.exists(USERDIC_BIN_PATH):
26
+ pyopenjtalk.update_global_jtalk_with_user_dict(USERDIC_BIN_PATH)
27
+ except Exception as e:
28
+ # print(e)
29
+ import pyopenjtalk
30
+ # failed to load user dictionary, ignore.
31
+ pass
32
+
33
+
34
+ from text.symbols import punctuation
35
+ # Regular expression matching Japanese without punctuation marks:
36
+ _japanese_characters = re.compile(
37
+ r"[A-Za-z\d\u3005\u3040-\u30ff\u4e00-\u9fff\uff11-\uff19\uff21-\uff3a\uff41-\uff5a\uff66-\uff9d]"
38
+ )
39
+
40
+ # Regular expression matching non-Japanese characters or punctuation marks:
41
+ _japanese_marks = re.compile(
42
+ r"[^A-Za-z\d\u3005\u3040-\u30ff\u4e00-\u9fff\uff11-\uff19\uff21-\uff3a\uff41-\uff5a\uff66-\uff9d]"
43
+ )
44
+
45
+ # List of (symbol, Japanese) pairs for marks:
46
+ _symbols_to_japanese = [(re.compile("%s" % x[0]), x[1]) for x in [("%", "パーセント")]]
47
+
48
+
49
+ # List of (consonant, sokuon) pairs:
50
+ _real_sokuon = [
51
+ (re.compile("%s" % x[0]), x[1])
52
+ for x in [
53
+ (r"Q([↑↓]*[kg])", r"k#\1"),
54
+ (r"Q([↑↓]*[tdjʧ])", r"t#\1"),
55
+ (r"Q([↑↓]*[sʃ])", r"s\1"),
56
+ (r"Q([↑↓]*[pb])", r"p#\1"),
57
+ ]
58
+ ]
59
+
60
+ # List of (consonant, hatsuon) pairs:
61
+ _real_hatsuon = [
62
+ (re.compile("%s" % x[0]), x[1])
63
+ for x in [
64
+ (r"N([↑↓]*[pbm])", r"m\1"),
65
+ (r"N([↑↓]*[ʧʥj])", r"n^\1"),
66
+ (r"N([↑↓]*[tdn])", r"n\1"),
67
+ (r"N([↑↓]*[kg])", r"ŋ\1"),
68
+ ]
69
+ ]
70
+
71
+
72
+ def post_replace_ph(ph):
73
+ rep_map = {
74
+ ":": ",",
75
+ ";": ",",
76
+ ",": ",",
77
+ "。": ".",
78
+ "!": "!",
79
+ "?": "?",
80
+ "\n": ".",
81
+ "·": ",",
82
+ "、": ",",
83
+ "...": "…",
84
+ }
85
+
86
+ if ph in rep_map.keys():
87
+ ph = rep_map[ph]
88
+ return ph
89
+
90
+
91
+ def replace_consecutive_punctuation(text):
92
+ punctuations = ''.join(re.escape(p) for p in punctuation)
93
+ pattern = f'([{punctuations}])([{punctuations}])+'
94
+ result = re.sub(pattern, r'\1', text)
95
+ return result
96
+
97
+
98
+ def symbols_to_japanese(text):
99
+ for regex, replacement in _symbols_to_japanese:
100
+ text = re.sub(regex, replacement, text)
101
+ return text
102
+
103
+
104
+ def preprocess_jap(text, with_prosody=False):
105
+ """Reference https://r9y9.github.io/ttslearn/latest/notebooks/ch10_Recipe-Tacotron.html"""
106
+ text = symbols_to_japanese(text)
107
+ # English words to lower case, should have no influence on japanese words.
108
+ text = text.lower()
109
+ sentences = re.split(_japanese_marks, text)
110
+ marks = re.findall(_japanese_marks, text)
111
+ text = []
112
+ for i, sentence in enumerate(sentences):
113
+ if re.match(_japanese_characters, sentence):
114
+ if with_prosody:
115
+ text += pyopenjtalk_g2p_prosody(sentence)[1:-1]
116
+ else:
117
+ p = pyopenjtalk.g2p(sentence)
118
+ text += p.split(" ")
119
+
120
+ if i < len(marks):
121
+ if marks[i] == " ":# 防止意外的UNK
122
+ continue
123
+ text += [marks[i].replace(" ", "")]
124
+ return text
125
+
126
+
127
+ def text_normalize(text):
128
+ # todo: jap text normalize
129
+
130
+ # 避免重复标点引起的参考泄露
131
+ text = replace_consecutive_punctuation(text)
132
+ return text
133
+
134
+ # Copied from espnet https://github.com/espnet/espnet/blob/master/espnet2/text/phoneme_tokenizer.py
135
+ def pyopenjtalk_g2p_prosody(text, drop_unvoiced_vowels=True):
136
+ """Extract phoneme + prosoody symbol sequence from input full-context labels.
137
+
138
+ The algorithm is based on `Prosodic features control by symbols as input of
139
+ sequence-to-sequence acoustic modeling for neural TTS`_ with some r9y9's tweaks.
140
+
141
+ Args:
142
+ text (str): Input text.
143
+ drop_unvoiced_vowels (bool): whether to drop unvoiced vowels.
144
+
145
+ Returns:
146
+ List[str]: List of phoneme + prosody symbols.
147
+
148
+ Examples:
149
+ >>> from espnet2.text.phoneme_tokenizer import pyopenjtalk_g2p_prosody
150
+ >>> pyopenjtalk_g2p_prosody("こんにちは。")
151
+ ['^', 'k', 'o', '[', 'N', 'n', 'i', 'ch', 'i', 'w', 'a', '$']
152
+
153
+ .. _`Prosodic features control by symbols as input of sequence-to-sequence acoustic
154
+ modeling for neural TTS`: https://doi.org/10.1587/transinf.2020EDP7104
155
+
156
+ """
157
+ labels = pyopenjtalk.make_label(pyopenjtalk.run_frontend(text))
158
+ N = len(labels)
159
+
160
+ phones = []
161
+ for n in range(N):
162
+ lab_curr = labels[n]
163
+
164
+ # current phoneme
165
+ p3 = re.search(r"\-(.*?)\+", lab_curr).group(1)
166
+ # deal unvoiced vowels as normal vowels
167
+ if drop_unvoiced_vowels and p3 in "AEIOU":
168
+ p3 = p3.lower()
169
+
170
+ # deal with sil at the beginning and the end of text
171
+ if p3 == "sil":
172
+ assert n == 0 or n == N - 1
173
+ if n == 0:
174
+ phones.append("^")
175
+ elif n == N - 1:
176
+ # check question form or not
177
+ e3 = _numeric_feature_by_regex(r"!(\d+)_", lab_curr)
178
+ if e3 == 0:
179
+ phones.append("$")
180
+ elif e3 == 1:
181
+ phones.append("?")
182
+ continue
183
+ elif p3 == "pau":
184
+ phones.append("_")
185
+ continue
186
+ else:
187
+ phones.append(p3)
188
+
189
+ # accent type and position info (forward or backward)
190
+ a1 = _numeric_feature_by_regex(r"/A:([0-9\-]+)\+", lab_curr)
191
+ a2 = _numeric_feature_by_regex(r"\+(\d+)\+", lab_curr)
192
+ a3 = _numeric_feature_by_regex(r"\+(\d+)/", lab_curr)
193
+
194
+ # number of mora in accent phrase
195
+ f1 = _numeric_feature_by_regex(r"/F:(\d+)_", lab_curr)
196
+
197
+ a2_next = _numeric_feature_by_regex(r"\+(\d+)\+", labels[n + 1])
198
+ # accent phrase border
199
+ if a3 == 1 and a2_next == 1 and p3 in "aeiouAEIOUNcl":
200
+ phones.append("#")
201
+ # pitch falling
202
+ elif a1 == 0 and a2_next == a2 + 1 and a2 != f1:
203
+ phones.append("]")
204
+ # pitch rising
205
+ elif a2 == 1 and a2_next == 2:
206
+ phones.append("[")
207
+
208
+ return phones
209
+
210
+ # Copied from espnet https://github.com/espnet/espnet/blob/master/espnet2/text/phoneme_tokenizer.py
211
+ def _numeric_feature_by_regex(regex, s):
212
+ match = re.search(regex, s)
213
+ if match is None:
214
+ return -50
215
+ return int(match.group(1))
216
+
217
+ def g2p(norm_text, with_prosody=True):
218
+ phones = preprocess_jap(norm_text, with_prosody)
219
+ phones = [post_replace_ph(i) for i in phones]
220
+ # todo: implement tones and word2ph
221
+ return phones
222
+
223
+
224
+ if __name__ == "__main__":
225
+ phones = g2p("Hello.こんにちは!今日もNiCe天気ですね!tokyotowerに行きましょう!")
226
+ print(phones)
GPT_SoVITS/text/korean.py ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # reference: https://github.com/ORI-Muchim/MB-iSTFT-VITS-Korean/blob/main/text/korean.py
2
+
3
+ import re
4
+ from jamo import h2j, j2hcj
5
+ import ko_pron
6
+ from g2pk2 import G2p
7
+
8
+ from text.symbols2 import symbols
9
+
10
+ # This is a list of Korean classifiers preceded by pure Korean numerals.
11
+ _korean_classifiers = '군데 권 개 그루 닢 대 두 마리 모 모금 뭇 발 발짝 방 번 벌 보루 살 수 술 시 쌈 움큼 정 짝 채 척 첩 축 켤레 톨 통'
12
+
13
+ # List of (hangul, hangul divided) pairs:
14
+ _hangul_divided = [(re.compile('%s' % x[0]), x[1]) for x in [
15
+ # ('ㄳ', 'ㄱㅅ'), # g2pk2, A Syllable-ending Rule
16
+ # ('ㄵ', 'ㄴㅈ'),
17
+ # ('ㄶ', 'ㄴㅎ'),
18
+ # ('ㄺ', 'ㄹㄱ'),
19
+ # ('ㄻ', 'ㄹㅁ'),
20
+ # ('ㄼ', 'ㄹㅂ'),
21
+ # ('ㄽ', 'ㄹㅅ'),
22
+ # ('ㄾ', 'ㄹㅌ'),
23
+ # ('ㄿ', 'ㄹㅍ'),
24
+ # ('ㅀ', 'ㄹㅎ'),
25
+ # ('ㅄ', 'ㅂㅅ'),
26
+ ('ㅘ', 'ㅗㅏ'),
27
+ ('ㅙ', 'ㅗㅐ'),
28
+ ('ㅚ', 'ㅗㅣ'),
29
+ ('ㅝ', 'ㅜㅓ'),
30
+ ('ㅞ', 'ㅜㅔ'),
31
+ ('ㅟ', 'ㅜㅣ'),
32
+ ('ㅢ', 'ㅡㅣ'),
33
+ ('ㅑ', 'ㅣㅏ'),
34
+ ('ㅒ', 'ㅣㅐ'),
35
+ ('ㅕ', 'ㅣㅓ'),
36
+ ('ㅖ', 'ㅣㅔ'),
37
+ ('ㅛ', 'ㅣㅗ'),
38
+ ('ㅠ', 'ㅣㅜ')
39
+ ]]
40
+
41
+ # List of (Latin alphabet, hangul) pairs:
42
+ _latin_to_hangul = [(re.compile('%s' % x[0], re.IGNORECASE), x[1]) for x in [
43
+ ('a', '에이'),
44
+ ('b', '비'),
45
+ ('c', '시'),
46
+ ('d', '디'),
47
+ ('e', '이'),
48
+ ('f', '에프'),
49
+ ('g', '지'),
50
+ ('h', '에이치'),
51
+ ('i', '아이'),
52
+ ('j', '제이'),
53
+ ('k', '케이'),
54
+ ('l', '엘'),
55
+ ('m', '엠'),
56
+ ('n', '엔'),
57
+ ('o', '오'),
58
+ ('p', '피'),
59
+ ('q', '큐'),
60
+ ('r', '아르'),
61
+ ('s', '에스'),
62
+ ('t', '티'),
63
+ ('u', '유'),
64
+ ('v', '브이'),
65
+ ('w', '더블유'),
66
+ ('x', '엑스'),
67
+ ('y', '와이'),
68
+ ('z', '제트')
69
+ ]]
70
+
71
+ # List of (ipa, lazy ipa) pairs:
72
+ _ipa_to_lazy_ipa = [(re.compile('%s' % x[0], re.IGNORECASE), x[1]) for x in [
73
+ ('t͡ɕ','ʧ'),
74
+ ('d͡ʑ','ʥ'),
75
+ ('ɲ','n^'),
76
+ ('ɕ','ʃ'),
77
+ ('ʷ','w'),
78
+ ('ɭ','l`'),
79
+ ('ʎ','ɾ'),
80
+ ('ɣ','ŋ'),
81
+ ('ɰ','ɯ'),
82
+ ('ʝ','j'),
83
+ ('ʌ','ə'),
84
+ ('ɡ','g'),
85
+ ('\u031a','#'),
86
+ ('\u0348','='),
87
+ ('\u031e',''),
88
+ ('\u0320',''),
89
+ ('\u0339','')
90
+ ]]
91
+
92
+
93
+ def fix_g2pk2_error(text):
94
+ new_text = ""
95
+ i = 0
96
+ while i < len(text) - 4:
97
+ if (text[i:i+3] == 'ㅇㅡㄹ' or text[i:i+3] == 'ㄹㅡㄹ') and text[i+3] == ' ' and text[i+4] == 'ㄹ':
98
+ new_text += text[i:i+3] + ' ' + 'ㄴ'
99
+ i += 5
100
+ else:
101
+ new_text += text[i]
102
+ i += 1
103
+
104
+ new_text += text[i:]
105
+ return new_text
106
+
107
+
108
+ def latin_to_hangul(text):
109
+ for regex, replacement in _latin_to_hangul:
110
+ text = re.sub(regex, replacement, text)
111
+ return text
112
+
113
+
114
+ def divide_hangul(text):
115
+ text = j2hcj(h2j(text))
116
+ for regex, replacement in _hangul_divided:
117
+ text = re.sub(regex, replacement, text)
118
+ return text
119
+
120
+
121
+ def hangul_number(num, sino=True):
122
+ '''Reference https://github.com/Kyubyong/g2pK'''
123
+ num = re.sub(',', '', num)
124
+
125
+ if num == '0':
126
+ return '영'
127
+ if not sino and num == '20':
128
+ return '스무'
129
+
130
+ digits = '123456789'
131
+ names = '일이삼사오육칠팔구'
132
+ digit2name = {d: n for d, n in zip(digits, names)}
133
+
134
+ modifiers = '한 두 세 네 다섯 여섯 일곱 여덟 아홉'
135
+ decimals = '열 스물 서른 마흔 쉰 예순 일흔 여든 아흔'
136
+ digit2mod = {d: mod for d, mod in zip(digits, modifiers.split())}
137
+ digit2dec = {d: dec for d, dec in zip(digits, decimals.split())}
138
+
139
+ spelledout = []
140
+ for i, digit in enumerate(num):
141
+ i = len(num) - i - 1
142
+ if sino:
143
+ if i == 0:
144
+ name = digit2name.get(digit, '')
145
+ elif i == 1:
146
+ name = digit2name.get(digit, '') + '십'
147
+ name = name.replace('일십', '십')
148
+ else:
149
+ if i == 0:
150
+ name = digit2mod.get(digit, '')
151
+ elif i == 1:
152
+ name = digit2dec.get(digit, '')
153
+ if digit == '0':
154
+ if i % 4 == 0:
155
+ last_three = spelledout[-min(3, len(spelledout)):]
156
+ if ''.join(last_three) == '':
157
+ spelledout.append('')
158
+ continue
159
+ else:
160
+ spelledout.append('')
161
+ continue
162
+ if i == 2:
163
+ name = digit2name.get(digit, '') + '백'
164
+ name = name.replace('일백', '백')
165
+ elif i == 3:
166
+ name = digit2name.get(digit, '') + '천'
167
+ name = name.replace('일천', '천')
168
+ elif i == 4:
169
+ name = digit2name.get(digit, '') + '만'
170
+ name = name.replace('일만', '만')
171
+ elif i == 5:
172
+ name = digit2name.get(digit, '') + '십'
173
+ name = name.replace('일십', '십')
174
+ elif i == 6:
175
+ name = digit2name.get(digit, '') + '백'
176
+ name = name.replace('일백', '백')
177
+ elif i == 7:
178
+ name = digit2name.get(digit, '') + '천'
179
+ name = name.replace('일천', '천')
180
+ elif i == 8:
181
+ name = digit2name.get(digit, '') + '억'
182
+ elif i == 9:
183
+ name = digit2name.get(digit, '') + '십'
184
+ elif i == 10:
185
+ name = digit2name.get(digit, '') + '백'
186
+ elif i == 11:
187
+ name = digit2name.get(digit, '') + '천'
188
+ elif i == 12:
189
+ name = digit2name.get(digit, '') + '조'
190
+ elif i == 13:
191
+ name = digit2name.get(digit, '') + '십'
192
+ elif i == 14:
193
+ name = digit2name.get(digit, '') + '백'
194
+ elif i == 15:
195
+ name = digit2name.get(digit, '') + '천'
196
+ spelledout.append(name)
197
+ return ''.join(elem for elem in spelledout)
198
+
199
+
200
+ def number_to_hangul(text):
201
+ '''Reference https://github.com/Kyubyong/g2pK'''
202
+ tokens = set(re.findall(r'(\d[\d,]*)([\uac00-\ud71f]+)', text))
203
+ for token in tokens:
204
+ num, classifier = token
205
+ if classifier[:2] in _korean_classifiers or classifier[0] in _korean_classifiers:
206
+ spelledout = hangul_number(num, sino=False)
207
+ else:
208
+ spelledout = hangul_number(num, sino=True)
209
+ text = text.replace(f'{num}{classifier}', f'{spelledout}{classifier}')
210
+ # digit by digit for remaining digits
211
+ digits = '0123456789'
212
+ names = '영일이삼사오육칠팔구'
213
+ for d, n in zip(digits, names):
214
+ text = text.replace(d, n)
215
+ return text
216
+
217
+
218
+ def korean_to_lazy_ipa(text):
219
+ text = latin_to_hangul(text)
220
+ text = number_to_hangul(text)
221
+ text=re.sub('[\uac00-\ud7af]+',lambda x:ko_pron.romanise(x.group(0),'ipa').split('] ~ [')[0],text)
222
+ for regex, replacement in _ipa_to_lazy_ipa:
223
+ text = re.sub(regex, replacement, text)
224
+ return text
225
+
226
+ _g2p=G2p()
227
+ def korean_to_ipa(text):
228
+ text = latin_to_hangul(text)
229
+ text = number_to_hangul(text)
230
+ text = _g2p(text)
231
+ text = fix_g2pk2_error(text)
232
+ text = korean_to_lazy_ipa(text)
233
+ return text.replace('ʧ','tʃ').replace('ʥ','dʑ')
234
+
235
+ def post_replace_ph(ph):
236
+ rep_map = {
237
+ ":": ",",
238
+ ";": ",",
239
+ ",": ",",
240
+ "。": ".",
241
+ "!": "!",
242
+ "?": "?",
243
+ "\n": ".",
244
+ "·": ",",
245
+ "、": ",",
246
+ "...": "…",
247
+ " ": "空",
248
+ }
249
+ if ph in rep_map.keys():
250
+ ph = rep_map[ph]
251
+ if ph in symbols:
252
+ return ph
253
+ if ph not in symbols:
254
+ ph = "停"
255
+ return ph
256
+
257
+ def g2p(text):
258
+ text = latin_to_hangul(text)
259
+ text = _g2p(text)
260
+ text = divide_hangul(text)
261
+ text = fix_g2pk2_error(text)
262
+ text = re.sub(r'([\u3131-\u3163])$', r'\1.', text)
263
+ # text = "".join([post_replace_ph(i) for i in text])
264
+ text = [post_replace_ph(i) for i in text]
265
+ return text
GPT_SoVITS/text/opencpop-strict.txt ADDED
@@ -0,0 +1,429 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ a AA a
2
+ ai AA ai
3
+ an AA an
4
+ ang AA ang
5
+ ao AA ao
6
+ ba b a
7
+ bai b ai
8
+ ban b an
9
+ bang b ang
10
+ bao b ao
11
+ bei b ei
12
+ ben b en
13
+ beng b eng
14
+ bi b i
15
+ bian b ian
16
+ biao b iao
17
+ bie b ie
18
+ bin b in
19
+ bing b ing
20
+ bo b o
21
+ bu b u
22
+ ca c a
23
+ cai c ai
24
+ can c an
25
+ cang c ang
26
+ cao c ao
27
+ ce c e
28
+ cei c ei
29
+ cen c en
30
+ ceng c eng
31
+ cha ch a
32
+ chai ch ai
33
+ chan ch an
34
+ chang ch ang
35
+ chao ch ao
36
+ che ch e
37
+ chen ch en
38
+ cheng ch eng
39
+ chi ch ir
40
+ chong ch ong
41
+ chou ch ou
42
+ chu ch u
43
+ chua ch ua
44
+ chuai ch uai
45
+ chuan ch uan
46
+ chuang ch uang
47
+ chui ch ui
48
+ chun ch un
49
+ chuo ch uo
50
+ ci c i0
51
+ cong c ong
52
+ cou c ou
53
+ cu c u
54
+ cuan c uan
55
+ cui c ui
56
+ cun c un
57
+ cuo c uo
58
+ da d a
59
+ dai d ai
60
+ dan d an
61
+ dang d ang
62
+ dao d ao
63
+ de d e
64
+ dei d ei
65
+ den d en
66
+ deng d eng
67
+ di d i
68
+ dia d ia
69
+ dian d ian
70
+ diao d iao
71
+ die d ie
72
+ ding d ing
73
+ diu d iu
74
+ dong d ong
75
+ dou d ou
76
+ du d u
77
+ duan d uan
78
+ dui d ui
79
+ dun d un
80
+ duo d uo
81
+ e EE e
82
+ ei EE ei
83
+ en EE en
84
+ eng EE eng
85
+ er EE er
86
+ fa f a
87
+ fan f an
88
+ fang f ang
89
+ fei f ei
90
+ fen f en
91
+ feng f eng
92
+ fo f o
93
+ fou f ou
94
+ fu f u
95
+ ga g a
96
+ gai g ai
97
+ gan g an
98
+ gang g ang
99
+ gao g ao
100
+ ge g e
101
+ gei g ei
102
+ gen g en
103
+ geng g eng
104
+ gong g ong
105
+ gou g ou
106
+ gu g u
107
+ gua g ua
108
+ guai g uai
109
+ guan g uan
110
+ guang g uang
111
+ gui g ui
112
+ gun g un
113
+ guo g uo
114
+ ha h a
115
+ hai h ai
116
+ han h an
117
+ hang h ang
118
+ hao h ao
119
+ he h e
120
+ hei h ei
121
+ hen h en
122
+ heng h eng
123
+ hong h ong
124
+ hou h ou
125
+ hu h u
126
+ hua h ua
127
+ huai h uai
128
+ huan h uan
129
+ huang h uang
130
+ hui h ui
131
+ hun h un
132
+ huo h uo
133
+ ji j i
134
+ jia j ia
135
+ jian j ian
136
+ jiang j iang
137
+ jiao j iao
138
+ jie j ie
139
+ jin j in
140
+ jing j ing
141
+ jiong j iong
142
+ jiu j iu
143
+ ju j v
144
+ jv j v
145
+ juan j van
146
+ jvan j van
147
+ jue j ve
148
+ jve j ve
149
+ jun j vn
150
+ jvn j vn
151
+ ka k a
152
+ kai k ai
153
+ kan k an
154
+ kang k ang
155
+ kao k ao
156
+ ke k e
157
+ kei k ei
158
+ ken k en
159
+ keng k eng
160
+ kong k ong
161
+ kou k ou
162
+ ku k u
163
+ kua k ua
164
+ kuai k uai
165
+ kuan k uan
166
+ kuang k uang
167
+ kui k ui
168
+ kun k un
169
+ kuo k uo
170
+ la l a
171
+ lai l ai
172
+ lan l an
173
+ lang l ang
174
+ lao l ao
175
+ le l e
176
+ lei l ei
177
+ leng l eng
178
+ li l i
179
+ lia l ia
180
+ lian l ian
181
+ liang l iang
182
+ liao l iao
183
+ lie l ie
184
+ lin l in
185
+ ling l ing
186
+ liu l iu
187
+ lo l o
188
+ long l ong
189
+ lou l ou
190
+ lu l u
191
+ luan l uan
192
+ lun l un
193
+ luo l uo
194
+ lv l v
195
+ lve l ve
196
+ ma m a
197
+ mai m ai
198
+ man m an
199
+ mang m ang
200
+ mao m ao
201
+ me m e
202
+ mei m ei
203
+ men m en
204
+ meng m eng
205
+ mi m i
206
+ mian m ian
207
+ miao m iao
208
+ mie m ie
209
+ min m in
210
+ ming m ing
211
+ miu m iu
212
+ mo m o
213
+ mou m ou
214
+ mu m u
215
+ na n a
216
+ nai n ai
217
+ nan n an
218
+ nang n ang
219
+ nao n ao
220
+ ne n e
221
+ nei n ei
222
+ nen n en
223
+ neng n eng
224
+ ni n i
225
+ nian n ian
226
+ niang n iang
227
+ niao n iao
228
+ nie n ie
229
+ nin n in
230
+ ning n ing
231
+ niu n iu
232
+ nong n ong
233
+ nou n ou
234
+ nu n u
235
+ nuan n uan
236
+ nun n un
237
+ nuo n uo
238
+ nv n v
239
+ nve n ve
240
+ o OO o
241
+ ou OO ou
242
+ pa p a
243
+ pai p ai
244
+ pan p an
245
+ pang p ang
246
+ pao p ao
247
+ pei p ei
248
+ pen p en
249
+ peng p eng
250
+ pi p i
251
+ pian p ian
252
+ piao p iao
253
+ pie p ie
254
+ pin p in
255
+ ping p ing
256
+ po p o
257
+ pou p ou
258
+ pu p u
259
+ qi q i
260
+ qia q ia
261
+ qian q ian
262
+ qiang q iang
263
+ qiao q iao
264
+ qie q ie
265
+ qin q in
266
+ qing q ing
267
+ qiong q iong
268
+ qiu q iu
269
+ qu q v
270
+ qv q v
271
+ quan q van
272
+ qvan q van
273
+ que q ve
274
+ qve q ve
275
+ qun q vn
276
+ qvn q vn
277
+ ran r an
278
+ rang r ang
279
+ rao r ao
280
+ re r e
281
+ ren r en
282
+ reng r eng
283
+ ri r ir
284
+ rong r ong
285
+ rou r ou
286
+ ru r u
287
+ rua r ua
288
+ ruan r uan
289
+ rui r ui
290
+ run r un
291
+ ruo r uo
292
+ sa s a
293
+ sai s ai
294
+ san s an
295
+ sang s ang
296
+ sao s ao
297
+ se s e
298
+ sen s en
299
+ seng s eng
300
+ sha sh a
301
+ shai sh ai
302
+ shan sh an
303
+ shang sh ang
304
+ shao sh ao
305
+ she sh e
306
+ shei sh ei
307
+ shen sh en
308
+ sheng sh eng
309
+ shi sh ir
310
+ shou sh ou
311
+ shu sh u
312
+ shua sh ua
313
+ shuai sh uai
314
+ shuan sh uan
315
+ shuang sh uang
316
+ shui sh ui
317
+ shun sh un
318
+ shuo sh uo
319
+ si s i0
320
+ song s ong
321
+ sou s ou
322
+ su s u
323
+ suan s uan
324
+ sui s ui
325
+ sun s un
326
+ suo s uo
327
+ ta t a
328
+ tai t ai
329
+ tan t an
330
+ tang t ang
331
+ tao t ao
332
+ te t e
333
+ tei t ei
334
+ teng t eng
335
+ ti t i
336
+ tian t ian
337
+ tiao t iao
338
+ tie t ie
339
+ ting t ing
340
+ tong t ong
341
+ tou t ou
342
+ tu t u
343
+ tuan t uan
344
+ tui t ui
345
+ tun t un
346
+ tuo t uo
347
+ wa w a
348
+ wai w ai
349
+ wan w an
350
+ wang w ang
351
+ wei w ei
352
+ wen w en
353
+ weng w eng
354
+ wo w o
355
+ wu w u
356
+ xi x i
357
+ xia x ia
358
+ xian x ian
359
+ xiang x iang
360
+ xiao x iao
361
+ xie x ie
362
+ xin x in
363
+ xing x ing
364
+ xiong x iong
365
+ xiu x iu
366
+ xu x v
367
+ xv x v
368
+ xuan x van
369
+ xvan x van
370
+ xue x ve
371
+ xve x ve
372
+ xun x vn
373
+ xvn x vn
374
+ ya y a
375
+ yan y En
376
+ yang y ang
377
+ yao y ao
378
+ ye y E
379
+ yi y i
380
+ yin y in
381
+ ying y ing
382
+ yo y o
383
+ yong y ong
384
+ you y ou
385
+ yu y v
386
+ yv y v
387
+ yuan y van
388
+ yvan y van
389
+ yue y ve
390
+ yve y ve
391
+ yun y vn
392
+ yvn y vn
393
+ za z a
394
+ zai z ai
395
+ zan z an
396
+ zang z ang
397
+ zao z ao
398
+ ze z e
399
+ zei z ei
400
+ zen z en
401
+ zeng z eng
402
+ zha zh a
403
+ zhai zh ai
404
+ zhan zh an
405
+ zhang zh ang
406
+ zhao zh ao
407
+ zhe zh e
408
+ zhei zh ei
409
+ zhen zh en
410
+ zheng zh eng
411
+ zhi zh ir
412
+ zhong zh ong
413
+ zhou zh ou
414
+ zhu zh u
415
+ zhua zh ua
416
+ zhuai zh uai
417
+ zhuan zh uan
418
+ zhuang zh uang
419
+ zhui zh ui
420
+ zhun zh un
421
+ zhuo zh uo
422
+ zi z i0
423
+ zong z ong
424
+ zou z ou
425
+ zu z u
426
+ zuan z uan
427
+ zui z ui
428
+ zun z un
429
+ zuo z uo
GPT_SoVITS/text/symbols.py ADDED
@@ -0,0 +1,427 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ # punctuation = ['!', '?', '…', ",", ".","@"]#@是SP停顿
4
+ punctuation = ["!", "?", "…", ",", "."] # @是SP停顿
5
+ punctuation.append("-")
6
+ pu_symbols = punctuation + ["SP", "SP2", "SP3", "UNK"]
7
+ # pu_symbols = punctuation + ["SP", 'SP2', 'SP3','SP4', "UNK"]
8
+ pad = "_"
9
+
10
+ c = [
11
+ "AA",
12
+ "EE",
13
+ "OO",
14
+ "b",
15
+ "c",
16
+ "ch",
17
+ "d",
18
+ "f",
19
+ "g",
20
+ "h",
21
+ "j",
22
+ "k",
23
+ "l",
24
+ "m",
25
+ "n",
26
+ "p",
27
+ "q",
28
+ "r",
29
+ "s",
30
+ "sh",
31
+ "t",
32
+ "w",
33
+ "x",
34
+ "y",
35
+ "z",
36
+ "zh",
37
+ ]
38
+ v = [
39
+ "E1",
40
+ "En1",
41
+ "a1",
42
+ "ai1",
43
+ "an1",
44
+ "ang1",
45
+ "ao1",
46
+ "e1",
47
+ "ei1",
48
+ "en1",
49
+ "eng1",
50
+ "er1",
51
+ "i1",
52
+ "i01",
53
+ "ia1",
54
+ "ian1",
55
+ "iang1",
56
+ "iao1",
57
+ "ie1",
58
+ "in1",
59
+ "ing1",
60
+ "iong1",
61
+ "ir1",
62
+ "iu1",
63
+ "o1",
64
+ "ong1",
65
+ "ou1",
66
+ "u1",
67
+ "ua1",
68
+ "uai1",
69
+ "uan1",
70
+ "uang1",
71
+ "ui1",
72
+ "un1",
73
+ "uo1",
74
+ "v1",
75
+ "van1",
76
+ "ve1",
77
+ "vn1",
78
+ "E2",
79
+ "En2",
80
+ "a2",
81
+ "ai2",
82
+ "an2",
83
+ "ang2",
84
+ "ao2",
85
+ "e2",
86
+ "ei2",
87
+ "en2",
88
+ "eng2",
89
+ "er2",
90
+ "i2",
91
+ "i02",
92
+ "ia2",
93
+ "ian2",
94
+ "iang2",
95
+ "iao2",
96
+ "ie2",
97
+ "in2",
98
+ "ing2",
99
+ "iong2",
100
+ "ir2",
101
+ "iu2",
102
+ "o2",
103
+ "ong2",
104
+ "ou2",
105
+ "u2",
106
+ "ua2",
107
+ "uai2",
108
+ "uan2",
109
+ "uang2",
110
+ "ui2",
111
+ "un2",
112
+ "uo2",
113
+ "v2",
114
+ "van2",
115
+ "ve2",
116
+ "vn2",
117
+ "E3",
118
+ "En3",
119
+ "a3",
120
+ "ai3",
121
+ "an3",
122
+ "ang3",
123
+ "ao3",
124
+ "e3",
125
+ "ei3",
126
+ "en3",
127
+ "eng3",
128
+ "er3",
129
+ "i3",
130
+ "i03",
131
+ "ia3",
132
+ "ian3",
133
+ "iang3",
134
+ "iao3",
135
+ "ie3",
136
+ "in3",
137
+ "ing3",
138
+ "iong3",
139
+ "ir3",
140
+ "iu3",
141
+ "o3",
142
+ "ong3",
143
+ "ou3",
144
+ "u3",
145
+ "ua3",
146
+ "uai3",
147
+ "uan3",
148
+ "uang3",
149
+ "ui3",
150
+ "un3",
151
+ "uo3",
152
+ "v3",
153
+ "van3",
154
+ "ve3",
155
+ "vn3",
156
+ "E4",
157
+ "En4",
158
+ "a4",
159
+ "ai4",
160
+ "an4",
161
+ "ang4",
162
+ "ao4",
163
+ "e4",
164
+ "ei4",
165
+ "en4",
166
+ "eng4",
167
+ "er4",
168
+ "i4",
169
+ "i04",
170
+ "ia4",
171
+ "ian4",
172
+ "iang4",
173
+ "iao4",
174
+ "ie4",
175
+ "in4",
176
+ "ing4",
177
+ "iong4",
178
+ "ir4",
179
+ "iu4",
180
+ "o4",
181
+ "ong4",
182
+ "ou4",
183
+ "u4",
184
+ "ua4",
185
+ "uai4",
186
+ "uan4",
187
+ "uang4",
188
+ "ui4",
189
+ "un4",
190
+ "uo4",
191
+ "v4",
192
+ "van4",
193
+ "ve4",
194
+ "vn4",
195
+ "E5",
196
+ "En5",
197
+ "a5",
198
+ "ai5",
199
+ "an5",
200
+ "ang5",
201
+ "ao5",
202
+ "e5",
203
+ "ei5",
204
+ "en5",
205
+ "eng5",
206
+ "er5",
207
+ "i5",
208
+ "i05",
209
+ "ia5",
210
+ "ian5",
211
+ "iang5",
212
+ "iao5",
213
+ "ie5",
214
+ "in5",
215
+ "ing5",
216
+ "iong5",
217
+ "ir5",
218
+ "iu5",
219
+ "o5",
220
+ "ong5",
221
+ "ou5",
222
+ "u5",
223
+ "ua5",
224
+ "uai5",
225
+ "uan5",
226
+ "uang5",
227
+ "ui5",
228
+ "un5",
229
+ "uo5",
230
+ "v5",
231
+ "van5",
232
+ "ve5",
233
+ "vn5",
234
+ ]
235
+
236
+ v_without_tone = [
237
+ "E",
238
+ "En",
239
+ "a",
240
+ "ai",
241
+ "an",
242
+ "ang",
243
+ "ao",
244
+ "e",
245
+ "ei",
246
+ "en",
247
+ "eng",
248
+ "er",
249
+ "i",
250
+ "i0",
251
+ "ia",
252
+ "ian",
253
+ "iang",
254
+ "iao",
255
+ "ie",
256
+ "in",
257
+ "ing",
258
+ "iong",
259
+ "ir",
260
+ "iu",
261
+ "o",
262
+ "ong",
263
+ "ou",
264
+ "u",
265
+ "ua",
266
+ "uai",
267
+ "uan",
268
+ "uang",
269
+ "ui",
270
+ "un",
271
+ "uo",
272
+ "v",
273
+ "van",
274
+ "ve",
275
+ "vn",
276
+ ]
277
+
278
+ # japanese
279
+ ja_symbols = [
280
+ "I",
281
+ "N",
282
+ "U",
283
+ "a",
284
+ "b",
285
+ "by",
286
+ "ch",
287
+ "cl",
288
+ "d",
289
+ "dy",
290
+ "e",
291
+ "f",
292
+ "g",
293
+ "gy",
294
+ "h",
295
+ "hy",
296
+ "i",
297
+ "j",
298
+ "k",
299
+ "ky",
300
+ "m",
301
+ "my",
302
+ "n",
303
+ "ny",
304
+ "o",
305
+ "p",
306
+ "py",
307
+ "r",
308
+ "ry",
309
+ "s",
310
+ "sh",
311
+ "t",
312
+ "ts",
313
+ "u",
314
+ "v",
315
+ "w",
316
+ "y",
317
+ "z",
318
+ # "[", #上升调型
319
+ # "]", #下降调型
320
+ # "$", #结束符
321
+ # "^", #开始符
322
+ ]
323
+
324
+ arpa = {
325
+ "AH0",
326
+ "S",
327
+ "AH1",
328
+ "EY2",
329
+ "AE2",
330
+ "EH0",
331
+ "OW2",
332
+ "UH0",
333
+ "NG",
334
+ "B",
335
+ "G",
336
+ "AY0",
337
+ "M",
338
+ "AA0",
339
+ "F",
340
+ "AO0",
341
+ "ER2",
342
+ "UH1",
343
+ "IY1",
344
+ "AH2",
345
+ "DH",
346
+ "IY0",
347
+ "EY1",
348
+ "IH0",
349
+ "K",
350
+ "N",
351
+ "W",
352
+ "IY2",
353
+ "T",
354
+ "AA1",
355
+ "ER1",
356
+ "EH2",
357
+ "OY0",
358
+ "UH2",
359
+ "UW1",
360
+ "Z",
361
+ "AW2",
362
+ "AW1",
363
+ "V",
364
+ "UW2",
365
+ "AA2",
366
+ "ER",
367
+ "AW0",
368
+ "UW0",
369
+ "R",
370
+ "OW1",
371
+ "EH1",
372
+ "ZH",
373
+ "AE0",
374
+ "IH2",
375
+ "IH",
376
+ "Y",
377
+ "JH",
378
+ "P",
379
+ "AY1",
380
+ "EY0",
381
+ "OY2",
382
+ "TH",
383
+ "HH",
384
+ "D",
385
+ "ER0",
386
+ "CH",
387
+ "AO1",
388
+ "AE1",
389
+ "AO2",
390
+ "OY1",
391
+ "AY2",
392
+ "IH1",
393
+ "OW0",
394
+ "L",
395
+ "SH",
396
+ }
397
+
398
+ # Hindi phonemes
399
+ hi_consonants = [
400
+ "क", "ख", "ग", "घ", "ङ",
401
+ "च", "छ", "ज", "झ", "ञ",
402
+ "ट", "ठ", "ड", "ढ", "ण",
403
+ "त", "थ", "द", "ध", "न",
404
+ "प", "फ", "ब", "भ", "म",
405
+ "य", "र", "ल", "व",
406
+ "श", "ष", "स", "ह",
407
+ "क्ष", "त्र", "ज्ञ"
408
+ ]
409
+
410
+ hi_vowels = [
411
+ "अ", "आ", "इ", "ई", "उ", "ऊ",
412
+ "ए", "ऐ", "ओ", "औ",
413
+ "ऋ", "ॠ",
414
+ "ं", "ः", "ँ", # anusvara, visarga, chandrabindu
415
+ "्", # virama
416
+ "ा", "ि", "ी", "ु", "ू",
417
+ "े", "ै", "ो", "ौ",
418
+ "ृ", "ॄ",
419
+ "ॉ", "ऑ" # Added candra o and o with candra
420
+ ]
421
+
422
+ hi_numbers = ["०", "१", "२", "३", "४", "५", "६", "७", "८", "९"]
423
+
424
+ symbols = [pad] + c + v + ja_symbols + pu_symbols + list(arpa) + hi_consonants + hi_vowels + hi_numbers
425
+ symbols = sorted(set(symbols))
426
+ if __name__ == "__main__":
427
+ print(len(symbols))
GPT_SoVITS/text/symbols2.py ADDED
@@ -0,0 +1,444 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ # punctuation = ['!', '?', '…', ",", ".","@"]#@是SP停顿
4
+ punctuation = ["!", "?", "…", ",", "."] # @是SP停顿
5
+ punctuation.append("-")
6
+ pu_symbols = punctuation + ["SP", "SP2", "SP3", "UNK"]
7
+ # pu_symbols = punctuation + ["SP", 'SP2', 'SP3','SP4', "UNK"]
8
+ pad = "_"
9
+
10
+ c = [
11
+ "AA",
12
+ "EE",
13
+ "OO",
14
+ "b",
15
+ "c",
16
+ "ch",
17
+ "d",
18
+ "f",
19
+ "g",
20
+ "h",
21
+ "j",
22
+ "k",
23
+ "l",
24
+ "m",
25
+ "n",
26
+ "p",
27
+ "q",
28
+ "r",
29
+ "s",
30
+ "sh",
31
+ "t",
32
+ "w",
33
+ "x",
34
+ "y",
35
+ "z",
36
+ "zh",
37
+ ]
38
+ v = [
39
+ "E1",
40
+ "En1",
41
+ "a1",
42
+ "ai1",
43
+ "an1",
44
+ "ang1",
45
+ "ao1",
46
+ "e1",
47
+ "ei1",
48
+ "en1",
49
+ "eng1",
50
+ "er1",
51
+ "i1",
52
+ "i01",
53
+ "ia1",
54
+ "ian1",
55
+ "iang1",
56
+ "iao1",
57
+ "ie1",
58
+ "in1",
59
+ "ing1",
60
+ "iong1",
61
+ "ir1",
62
+ "iu1",
63
+ "o1",
64
+ "ong1",
65
+ "ou1",
66
+ "u1",
67
+ "ua1",
68
+ "uai1",
69
+ "uan1",
70
+ "uang1",
71
+ "ui1",
72
+ "un1",
73
+ "uo1",
74
+ "v1",
75
+ "van1",
76
+ "ve1",
77
+ "vn1",
78
+ "E2",
79
+ "En2",
80
+ "a2",
81
+ "ai2",
82
+ "an2",
83
+ "ang2",
84
+ "ao2",
85
+ "e2",
86
+ "ei2",
87
+ "en2",
88
+ "eng2",
89
+ "er2",
90
+ "i2",
91
+ "i02",
92
+ "ia2",
93
+ "ian2",
94
+ "iang2",
95
+ "iao2",
96
+ "ie2",
97
+ "in2",
98
+ "ing2",
99
+ "iong2",
100
+ "ir2",
101
+ "iu2",
102
+ "o2",
103
+ "ong2",
104
+ "ou2",
105
+ "u2",
106
+ "ua2",
107
+ "uai2",
108
+ "uan2",
109
+ "uang2",
110
+ "ui2",
111
+ "un2",
112
+ "uo2",
113
+ "v2",
114
+ "van2",
115
+ "ve2",
116
+ "vn2",
117
+ "E3",
118
+ "En3",
119
+ "a3",
120
+ "ai3",
121
+ "an3",
122
+ "ang3",
123
+ "ao3",
124
+ "e3",
125
+ "ei3",
126
+ "en3",
127
+ "eng3",
128
+ "er3",
129
+ "i3",
130
+ "i03",
131
+ "ia3",
132
+ "ian3",
133
+ "iang3",
134
+ "iao3",
135
+ "ie3",
136
+ "in3",
137
+ "ing3",
138
+ "iong3",
139
+ "ir3",
140
+ "iu3",
141
+ "o3",
142
+ "ong3",
143
+ "ou3",
144
+ "u3",
145
+ "ua3",
146
+ "uai3",
147
+ "uan3",
148
+ "uang3",
149
+ "ui3",
150
+ "un3",
151
+ "uo3",
152
+ "v3",
153
+ "van3",
154
+ "ve3",
155
+ "vn3",
156
+ "E4",
157
+ "En4",
158
+ "a4",
159
+ "ai4",
160
+ "an4",
161
+ "ang4",
162
+ "ao4",
163
+ "e4",
164
+ "ei4",
165
+ "en4",
166
+ "eng4",
167
+ "er4",
168
+ "i4",
169
+ "i04",
170
+ "ia4",
171
+ "ian4",
172
+ "iang4",
173
+ "iao4",
174
+ "ie4",
175
+ "in4",
176
+ "ing4",
177
+ "iong4",
178
+ "ir4",
179
+ "iu4",
180
+ "o4",
181
+ "ong4",
182
+ "ou4",
183
+ "u4",
184
+ "ua4",
185
+ "uai4",
186
+ "uan4",
187
+ "uang4",
188
+ "ui4",
189
+ "un4",
190
+ "uo4",
191
+ "v4",
192
+ "van4",
193
+ "ve4",
194
+ "vn4",
195
+ "E5",
196
+ "En5",
197
+ "a5",
198
+ "ai5",
199
+ "an5",
200
+ "ang5",
201
+ "ao5",
202
+ "e5",
203
+ "ei5",
204
+ "en5",
205
+ "eng5",
206
+ "er5",
207
+ "i5",
208
+ "i05",
209
+ "ia5",
210
+ "ian5",
211
+ "iang5",
212
+ "iao5",
213
+ "ie5",
214
+ "in5",
215
+ "ing5",
216
+ "iong5",
217
+ "ir5",
218
+ "iu5",
219
+ "o5",
220
+ "ong5",
221
+ "ou5",
222
+ "u5",
223
+ "ua5",
224
+ "uai5",
225
+ "uan5",
226
+ "uang5",
227
+ "ui5",
228
+ "un5",
229
+ "uo5",
230
+ "v5",
231
+ "van5",
232
+ "ve5",
233
+ "vn5",
234
+ ]
235
+
236
+ v_without_tone = [
237
+ "E",
238
+ "En",
239
+ "a",
240
+ "ai",
241
+ "an",
242
+ "ang",
243
+ "ao",
244
+ "e",
245
+ "ei",
246
+ "en",
247
+ "eng",
248
+ "er",
249
+ "i",
250
+ "i0",
251
+ "ia",
252
+ "ian",
253
+ "iang",
254
+ "iao",
255
+ "ie",
256
+ "in",
257
+ "ing",
258
+ "iong",
259
+ "ir",
260
+ "iu",
261
+ "o",
262
+ "ong",
263
+ "ou",
264
+ "u",
265
+ "ua",
266
+ "uai",
267
+ "uan",
268
+ "uang",
269
+ "ui",
270
+ "un",
271
+ "uo",
272
+ "v",
273
+ "van",
274
+ "ve",
275
+ "vn",
276
+ ]
277
+
278
+ # japanese
279
+ ja_symbols = [
280
+ "I",
281
+ "N",
282
+ "U",
283
+ "a",
284
+ "b",
285
+ "by",
286
+ "ch",
287
+ "cl",
288
+ "d",
289
+ "dy",
290
+ "e",
291
+ "f",
292
+ "g",
293
+ "gy",
294
+ "h",
295
+ "hy",
296
+ "i",
297
+ "j",
298
+ "k",
299
+ "ky",
300
+ "m",
301
+ "my",
302
+ "n",
303
+ "ny",
304
+ "o",
305
+ "p",
306
+ "py",
307
+ "r",
308
+ "ry",
309
+ "s",
310
+ "sh",
311
+ "t",
312
+ "ts",
313
+ "u",
314
+ "v",
315
+ "w",
316
+ "y",
317
+ "z",
318
+ ###楼下2个留到后面加
319
+ # "[", #上升调型
320
+ # "]", #下降调型
321
+ # "$", #结束符
322
+ # "^", #开始符
323
+ ]
324
+
325
+ arpa = {
326
+ "AH0",
327
+ "S",
328
+ "AH1",
329
+ "EY2",
330
+ "AE2",
331
+ "EH0",
332
+ "OW2",
333
+ "UH0",
334
+ "NG",
335
+ "B",
336
+ "G",
337
+ "AY0",
338
+ "M",
339
+ "AA0",
340
+ "F",
341
+ "AO0",
342
+ "ER2",
343
+ "UH1",
344
+ "IY1",
345
+ "AH2",
346
+ "DH",
347
+ "IY0",
348
+ "EY1",
349
+ "IH0",
350
+ "K",
351
+ "N",
352
+ "W",
353
+ "IY2",
354
+ "T",
355
+ "AA1",
356
+ "ER1",
357
+ "EH2",
358
+ "OY0",
359
+ "UH2",
360
+ "UW1",
361
+ "Z",
362
+ "AW2",
363
+ "AW1",
364
+ "V",
365
+ "UW2",
366
+ "AA2",
367
+ "ER",
368
+ "AW0",
369
+ "UW0",
370
+ "R",
371
+ "OW1",
372
+ "EH1",
373
+ "ZH",
374
+ "AE0",
375
+ "IH2",
376
+ "IH",
377
+ "Y",
378
+ "JH",
379
+ "P",
380
+ "AY1",
381
+ "EY0",
382
+ "OY2",
383
+ "TH",
384
+ "HH",
385
+ "D",
386
+ "ER0",
387
+ "CH",
388
+ "AO1",
389
+ "AE1",
390
+ "AO2",
391
+ "OY1",
392
+ "AY2",
393
+ "IH1",
394
+ "OW0",
395
+ "L",
396
+ "SH",
397
+ }
398
+
399
+ ko_symbols='ㄱㄴㄷㄹㅁㅂㅅㅇㅈㅊㅋㅌㅍㅎㄲㄸㅃㅆㅉㅏㅓㅗㅜㅡㅣㅐㅔ空停'
400
+ # ko_symbols='ㄱㄴㄷㄹㅁㅂㅅㅇㅈㅊㅋㅌㅍㅎㄲㄸㅃㅆㅉㅏㅓㅗㅜㅡㅣㅐㅔ '
401
+
402
+ yue_symbols={'Yeot3', 'Yip1', 'Yyu3', 'Yeng4', 'Yut5', 'Yaan5', 'Ym5', 'Yaan6', 'Yang1', 'Yun4', 'Yon2', 'Yui5', 'Yun2', 'Yat3', 'Ye', 'Yeot1', 'Yoeng5', 'Yoek2', 'Yam2', 'Yeon6', 'Yu6', 'Yiu3', 'Yaang6', 'Yp5', 'Yai4', 'Yoek4', 'Yit6', 'Yam5', 'Yoeng6', 'Yg1', 'Yk3', 'Yoe4', 'Yam3', 'Yc', 'Yyu4', 'Yyut1', 'Yiu4', 'Ying3', 'Yip3', 'Yaap3', 'Yau3', 'Yan4', 'Yau1', 'Yap4', 'Yk6', 'Yok3', 'Yai1', 'Yeot6', 'Yan2', 'Yoek6', 'Yt1', 'Yoi1', 'Yit5', 'Yn4', 'Yaau3', 'Yau4', 'Yuk6', 'Ys', 'Yuk', 'Yin6', 'Yung6', 'Ya', 'You', 'Yaai5', 'Yau5', 'Yoi3', 'Yaak3', 'Yaat3', 'Ying2', 'Yok5', 'Yeng2', 'Yyut3', 'Yam1', 'Yip5', 'You1', 'Yam6', 'Yaa5', 'Yi6', 'Yek4', 'Yyu2', 'Yuk5', 'Yaam1', 'Yang2', 'Yai', 'Yiu6', 'Yin4', 'Yok4', 'Yot3', 'Yui2', 'Yeoi5', 'Yyun6', 'Yyu5', 'Yoi5', 'Yeot2', 'Yim4', 'Yeoi2', 'Yaan1', 'Yang6', 'Yong1', 'Yaang4', 'Yung5', 'Yeon1', 'Yin2', 'Ya3', 'Yaang3', 'Yg', 'Yk2', 'Yaau5', 'Yut1', 'Yt5', 'Yip4', 'Yung4', 'Yj', 'Yong3', 'Ya1', 'Yg6', 'Yaau6', 'Yit3', 'Yun3', 'Ying1', 'Yn2', 'Yg4', 'Yl', 'Yp3', 'Yn3', 'Yak1', 'Yang5', 'Yoe6', 'You2', 'Yap2', 'Yak2', 'Yt3', 'Yot5', 'Yim2', 'Yi1', 'Yn6', 'Yaat5', 'Yaam3', 'Yoek5', 'Ye3', 'Yeon4', 'Yaa2', 'Yu3', 'Yim6', 'Ym', 'Yoe3', 'Yaai2', 'Ym2', 'Ya6', 'Yeng6', 'Yik4', 'Yot4', 'Yaai4', 'Yyun3', 'Yu1', 'Yoeng1', 'Yaap2', 'Yuk3', 'Yoek3', 'Yeng5', 'Yeoi1', 'Yiu2', 'Yok1', 'Yo1', 'Yoek1', 'Yoeng2', 'Yeon5', 'Yiu1', 'Yoeng4', 'Yuk2', 'Yat4', 'Yg5', 'Yut4', 'Yan6', 'Yin3', 'Yaa6', 'Yap1', 'Yg2', 'Yoe5', 'Yt4', 'Ya5', 'Yo4', 'Yyu1', 'Yak3', 'Yeon2', 'Yong4', 'Ym1', 'Ye2', 'Yaang5', 'Yoi2', 'Yeng3', 'Yn', 'Yyut4', 'Yau', 'Yaak2', 'Yaan4', 'Yek2', 'Yin1', 'Yi5', 'Yoe2', 'Yei5', 'Yaat6', 'Yak5', 'Yp6', 'Yok6', 'Yei2', 'Yaap1', 'Yyut5', 'Yi4', 'Yim1', 'Yk5', 'Ye4', 'Yok2', 'Yaam6', 'Yat2', 'Yon6', 'Yei3', 'Yyu6', 'Yeot5', 'Yk4', 'Yai6', 'Yd', 'Yg3', 'Yei6', 'Yau2', 'Yok', 'Yau6', 'Yung3', 'Yim5', 'Yut6', 'Yit1', 'Yon3', 'Yat1', 'Yaam2', 'Yyut2', 'Yui6', 'Yt2', 'Yek6', 'Yt', 'Ye6', 'Yang3', 'Ying6', 'Yaau1', 'Yeon3', 'Yng', 'Yh', 'Yang4', 'Ying5', 'Yaap6', 'Yoeng3', 'Yyun4', 'You3', 'Yan5', 'Yat5', 'Yot1', 'Yun1', 'Yi3', 'Yaa1', 'Yaap4', 'You6', 'Yaang2', 'Yaap5', 'Yaa3', 'Yaak6', 'Yeng1', 'Yaak1', 'Yo5', 'Yoi4', 'Yam4', 'Yik1', 'Ye1', 'Yai5', 'Yung1', 'Yp2', 'Yui4', 'Yaak4', 'Yung2', 'Yak4', 'Yaat4', 'Yeoi4', 'Yut2', 'Yin5', 'Yaau4', 'Yap6', 'Yb', 'Yaam4', 'Yw', 'Yut3', 'Yong2', 'Yt6', 'Yaai6', 'Yap5', 'Yik5', 'Yun6', 'Yaam5', 'Yun5', 'Yik3', 'Ya2', 'Yyut6', 'Yon4', 'Yk1', 'Yit4', 'Yak6', 'Yaan2', 'Yuk1', 'Yai2', 'Yik2', 'Yaat2', 'Yo3', 'Ykw', 'Yn5', 'Yaa', 'Ye5', 'Yu4', 'Yei1', 'Yai3', 'Yyun5', 'Yip2', 'Yaau2', 'Yiu5', 'Ym4', 'Yeoi6', 'Yk', 'Ym6', 'Yoe1', 'Yeoi3', 'Yon', 'Yuk4', 'Yaai3', 'Yaa4', 'Yot6', 'Yaang1', 'Yei4', 'Yek1', 'Yo', 'Yp', 'Yo6', 'Yp4', 'Yan3', 'Yoi', 'Yap3', 'Yek3', 'Yim3', 'Yz', 'Yot2', 'Yoi6', 'Yit2', 'Yu5', 'Yaan3', 'Yan1', 'Yon5', 'Yp1', 'Yong5', 'Ygw', 'Yak', 'Yat6', 'Ying4', 'Yu2', 'Yf', 'Ya4', 'Yon1', 'You4', 'Yik6', 'Yui1', 'Yaat1', 'Yeot4', 'Yi2', 'Yaai1', 'Yek5', 'Ym3', 'Yong6', 'You5', 'Yyun1', 'Yn1', 'Yo2', 'Yip6', 'Yui3', 'Yaak5', 'Yyun2'}
403
+
404
+ # Hindi consonants
405
+ hi_consonants = [
406
+ "क", "ख", "ग", "घ", "ङ",
407
+ "च", "छ", "ज", "झ", "ञ",
408
+ "ट", "ठ", "ड", "ढ", "ण",
409
+ "त", "थ", "द", "ध", "न",
410
+ "प", "फ", "ब", "भ", "म",
411
+ "य", "र", "ल", "व",
412
+ "श", "ष", "स", "ह",
413
+ "क्ष", "त्र", "ज्ञ"
414
+ ]
415
+
416
+ # Hindi vowels and modifiers
417
+ hi_vowels = [
418
+ "अ", "आ", "इ", "ई", "उ", "ऊ",
419
+ "ए", "ऐ", "ओ", "औ",
420
+ "ऋ", "ॠ",
421
+ "ं", "ः", "ँ", # anusvara, visarga, chandrabindu
422
+ "्", # virama
423
+ "ा", "ि", "ी", "ु", "ू",
424
+ "े", "ै", "ो", "ौ",
425
+ "ृ", "ॄ",
426
+ "ॉ", "ऑ" # candra o and o with candra
427
+ ]
428
+
429
+ # Hindi numbers
430
+ hi_numbers = ["०", "१", "२", "३", "४", "५", "६", "७", "८", "९"]
431
+
432
+ # Add Hindi symbols to symbols list
433
+ symbols = [pad] + c + v + ja_symbols + pu_symbols + list(arpa) + hi_consonants + hi_vowels + hi_numbers
434
+ symbols = sorted(set(symbols))
435
+ # print(len(symbols))
436
+
437
+ if __name__ == "__main__":
438
+ print(len(symbols))
439
+ '''
440
+ 粤语:
441
+ 732-353=379
442
+ 韩文+粤语:
443
+ 732-322=410
444
+ '''
GPT_SoVITS/text/text_processing.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from typing import List, Optional
3
+ from .symbols import symbols
4
+ from .hindi import get_phoneme_sequence as hindi_to_phonemes
5
+
6
+ def is_hindi_text(text: str) -> bool:
7
+ """Check if the text contains Hindi characters."""
8
+ return bool(re.search(r'[\u0900-\u097F]', text))
9
+
10
+ def text_to_sequence(text: str, language: Optional[str] = None) -> List[str]:
11
+ """Convert text to sequence of symbols."""
12
+ if language is None:
13
+ # Auto-detect language
14
+ if is_hindi_text(text):
15
+ language = 'hi'
16
+ # Add more language detection as needed
17
+ else:
18
+ language = 'en' # Default to English
19
+
20
+ # Convert text to phonemes based on language
21
+ if language == 'hi':
22
+ phonemes = hindi_to_phonemes(text)
23
+ else:
24
+ # Handle other languages or use default
25
+ phonemes = ['UNK'] # Replace with proper handling for other languages
26
+
27
+ # Verify all phonemes are in the symbols list
28
+ for p in phonemes:
29
+ if p not in symbols:
30
+ print(f"Warning: phoneme '{p}' not in symbols list")
31
+
32
+ # Filter out unknown phonemes
33
+ return [p for p in phonemes if p in symbols]
34
+
35
+ def sequence_to_text(sequence: List[str]) -> str:
36
+ """Convert sequence of symbols back to text."""
37
+ return ' '.join(sequence)
GPT_SoVITS/text/tone_sandhi.py ADDED
@@ -0,0 +1,807 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ from typing import List
15
+ from typing import Tuple
16
+
17
+ import jieba_fast as jieba
18
+ from pypinyin import lazy_pinyin
19
+ from pypinyin import Style
20
+
21
+
22
+ class ToneSandhi:
23
+ def __init__(self):
24
+ self.must_neural_tone_words = {
25
+ "麻烦",
26
+ "麻利",
27
+ "鸳鸯",
28
+ "高粱",
29
+ "骨头",
30
+ "骆驼",
31
+ "马虎",
32
+ "首饰",
33
+ "馒头",
34
+ "馄饨",
35
+ "风筝",
36
+ "难为",
37
+ "队伍",
38
+ "阔气",
39
+ "闺女",
40
+ "门道",
41
+ "锄头",
42
+ "铺盖",
43
+ "铃铛",
44
+ "铁匠",
45
+ "钥匙",
46
+ "里脊",
47
+ "里头",
48
+ "部分",
49
+ "那么",
50
+ "道士",
51
+ "造化",
52
+ "迷糊",
53
+ "连累",
54
+ "这么",
55
+ "这个",
56
+ "运气",
57
+ "过去",
58
+ "软和",
59
+ "转悠",
60
+ "踏实",
61
+ "跳蚤",
62
+ "跟头",
63
+ "趔趄",
64
+ "财主",
65
+ "豆腐",
66
+ "讲究",
67
+ "记性",
68
+ "记号",
69
+ "认识",
70
+ "规矩",
71
+ "见识",
72
+ "裁缝",
73
+ "补丁",
74
+ "衣裳",
75
+ "衣服",
76
+ "衙门",
77
+ "街坊",
78
+ "行李",
79
+ "行当",
80
+ "蛤蟆",
81
+ "蘑菇",
82
+ "薄荷",
83
+ "葫芦",
84
+ "葡萄",
85
+ "萝卜",
86
+ "荸荠",
87
+ "苗条",
88
+ "苗头",
89
+ "苍蝇",
90
+ "芝麻",
91
+ "舒服",
92
+ "舒坦",
93
+ "舌头",
94
+ "自在",
95
+ "膏药",
96
+ "脾气",
97
+ "脑袋",
98
+ "脊梁",
99
+ "能耐",
100
+ "胳膊",
101
+ "胭脂",
102
+ "胡萝",
103
+ "胡琴",
104
+ "胡同",
105
+ "聪明",
106
+ "耽误",
107
+ "耽搁",
108
+ "耷拉",
109
+ "耳朵",
110
+ "老爷",
111
+ "老实",
112
+ "老婆",
113
+ "老头",
114
+ "老太",
115
+ "翻腾",
116
+ "罗嗦",
117
+ "罐头",
118
+ "编辑",
119
+ "结实",
120
+ "红火",
121
+ "累赘",
122
+ "糨糊",
123
+ "糊涂",
124
+ "精神",
125
+ "粮食",
126
+ "簸箕",
127
+ "篱笆",
128
+ "算计",
129
+ "算盘",
130
+ "答应",
131
+ "笤帚",
132
+ "笑语",
133
+ "笑话",
134
+ "窟窿",
135
+ "窝囊",
136
+ "窗户",
137
+ "稳当",
138
+ "稀罕",
139
+ "称呼",
140
+ "秧歌",
141
+ "秀气",
142
+ "秀才",
143
+ "福气",
144
+ "祖宗",
145
+ "砚台",
146
+ "码头",
147
+ "石榴",
148
+ "石头",
149
+ "石匠",
150
+ "知识",
151
+ "眼睛",
152
+ "眯缝",
153
+ "眨巴",
154
+ "眉毛",
155
+ "相声",
156
+ "盘算",
157
+ "白净",
158
+ "痢疾",
159
+ "痛快",
160
+ "疟疾",
161
+ "疙瘩",
162
+ "疏忽",
163
+ "畜生",
164
+ "生意",
165
+ "甘蔗",
166
+ "琵琶",
167
+ "琢磨",
168
+ "琉璃",
169
+ "玻璃",
170
+ "玫瑰",
171
+ "玄乎",
172
+ "狐狸",
173
+ "状元",
174
+ "特务",
175
+ "牲口",
176
+ "牙碜",
177
+ "牌楼",
178
+ "爽快",
179
+ "爱人",
180
+ "热闹",
181
+ "烧饼",
182
+ "烟筒",
183
+ "烂糊",
184
+ "点心",
185
+ "炊帚",
186
+ "灯笼",
187
+ "火候",
188
+ "漂亮",
189
+ "滑溜",
190
+ "溜达",
191
+ "温和",
192
+ "清楚",
193
+ "消息",
194
+ "浪头",
195
+ "活泼",
196
+ "比方",
197
+ "正经",
198
+ "欺负",
199
+ "模糊",
200
+ "槟榔",
201
+ "棺材",
202
+ "棒槌",
203
+ "棉花",
204
+ "核桃",
205
+ "栅栏",
206
+ "柴火",
207
+ "架势",
208
+ "枕头",
209
+ "���杷",
210
+ "机灵",
211
+ "本事",
212
+ "木头",
213
+ "木匠",
214
+ "朋友",
215
+ "月饼",
216
+ "月亮",
217
+ "暖和",
218
+ "明白",
219
+ "时候",
220
+ "新鲜",
221
+ "故事",
222
+ "收拾",
223
+ "收成",
224
+ "提防",
225
+ "挖苦",
226
+ "挑剔",
227
+ "指甲",
228
+ "指头",
229
+ "拾掇",
230
+ "拳头",
231
+ "拨弄",
232
+ "招牌",
233
+ "招呼",
234
+ "抬举",
235
+ "护士",
236
+ "折腾",
237
+ "扫帚",
238
+ "打量",
239
+ "打算",
240
+ "打点",
241
+ "打扮",
242
+ "打听",
243
+ "打发",
244
+ "扎实",
245
+ "扁担",
246
+ "戒指",
247
+ "懒得",
248
+ "意识",
249
+ "意思",
250
+ "情形",
251
+ "悟性",
252
+ "怪物",
253
+ "思量",
254
+ "怎么",
255
+ "念头",
256
+ "念叨",
257
+ "快活",
258
+ "忙活",
259
+ "志气",
260
+ "心思",
261
+ "得罪",
262
+ "张罗",
263
+ "弟兄",
264
+ "开通",
265
+ "应酬",
266
+ "庄稼",
267
+ "干事",
268
+ "帮手",
269
+ "帐篷",
270
+ "希罕",
271
+ "师父",
272
+ "师傅",
273
+ "巴结",
274
+ "巴掌",
275
+ "差事",
276
+ "工夫",
277
+ "岁数",
278
+ "屁股",
279
+ "尾巴",
280
+ "少爷",
281
+ "小气",
282
+ "小伙",
283
+ "将就",
284
+ "对头",
285
+ "对付",
286
+ "寡妇",
287
+ "家伙",
288
+ "客气",
289
+ "实在",
290
+ "官司",
291
+ "学问",
292
+ "学生",
293
+ "字号",
294
+ "嫁妆",
295
+ "媳妇",
296
+ "媒人",
297
+ "婆家",
298
+ "娘家",
299
+ "委屈",
300
+ "姑娘",
301
+ "姐夫",
302
+ "妯娌",
303
+ "妥当",
304
+ "妖精",
305
+ "奴才",
306
+ "女婿",
307
+ "头发",
308
+ "太阳",
309
+ "大爷",
310
+ "大方",
311
+ "大意",
312
+ "大夫",
313
+ "多少",
314
+ "多么",
315
+ "外甥",
316
+ "壮实",
317
+ "地道",
318
+ "地方",
319
+ "在乎",
320
+ "困难",
321
+ "嘴巴",
322
+ "嘱咐",
323
+ "嘟囔",
324
+ "嘀咕",
325
+ "喜欢",
326
+ "喇嘛",
327
+ "喇叭",
328
+ "商量",
329
+ "唾沫",
330
+ "哑巴",
331
+ "哈欠",
332
+ "哆嗦",
333
+ "咳嗽",
334
+ "和尚",
335
+ "告诉",
336
+ "告示",
337
+ "含糊",
338
+ "吓唬",
339
+ "后头",
340
+ "名字",
341
+ "名堂",
342
+ "合同",
343
+ "吆喝",
344
+ "叫唤",
345
+ "口袋",
346
+ "厚道",
347
+ "厉害",
348
+ "千斤",
349
+ "包袱",
350
+ "包涵",
351
+ "匀称",
352
+ "勤快",
353
+ "动静",
354
+ "动弹",
355
+ "功夫",
356
+ "力气",
357
+ "前头",
358
+ "刺猬",
359
+ "刺激",
360
+ "别扭",
361
+ "利落",
362
+ "利索",
363
+ "利害",
364
+ "分析",
365
+ "出息",
366
+ "凑合",
367
+ "凉快",
368
+ "冷战",
369
+ "冤枉",
370
+ "冒失",
371
+ "养活",
372
+ "关系",
373
+ "先生",
374
+ "兄弟",
375
+ "便宜",
376
+ "使唤",
377
+ "佩服",
378
+ "作坊",
379
+ "体面",
380
+ "位置",
381
+ "似的",
382
+ "伙计",
383
+ "休息",
384
+ "什么",
385
+ "人家",
386
+ "亲戚",
387
+ "亲家",
388
+ "交情",
389
+ "云彩",
390
+ "事情",
391
+ "买卖",
392
+ "主意",
393
+ "丫头",
394
+ "丧气",
395
+ "两口",
396
+ "东西",
397
+ "东家",
398
+ "世故",
399
+ "不由",
400
+ "不在",
401
+ "下水",
402
+ "下巴",
403
+ "上头",
404
+ "上司",
405
+ "丈夫",
406
+ "丈人",
407
+ "一辈",
408
+ "那个",
409
+ "菩萨",
410
+ "父亲",
411
+ "母亲",
412
+ "咕噜",
413
+ "邋遢",
414
+ "费用",
415
+ "冤家",
416
+ "甜头",
417
+ "介绍",
418
+ "荒唐",
419
+ "大人",
420
+ "泥鳅",
421
+ "幸福",
422
+ "熟悉",
423
+ "计划",
424
+ "扑腾",
425
+ "蜡烛",
426
+ "姥爷",
427
+ "照顾",
428
+ "喉咙",
429
+ "吉他",
430
+ "弄堂",
431
+ "蚂蚱",
432
+ "凤凰",
433
+ "拖沓",
434
+ "寒碜",
435
+ "糟蹋",
436
+ "倒腾",
437
+ "报复",
438
+ "逻辑",
439
+ "盘缠",
440
+ "喽啰",
441
+ "牢骚",
442
+ "咖喱",
443
+ "扫把",
444
+ "惦记",
445
+ }
446
+ self.must_not_neural_tone_words = {
447
+ "男子",
448
+ "女子",
449
+ "分子",
450
+ "原子",
451
+ "量子",
452
+ "莲子",
453
+ "石子",
454
+ "瓜子",
455
+ "电子",
456
+ "人人",
457
+ "虎虎",
458
+ "幺幺",
459
+ "干嘛",
460
+ "学子",
461
+ "哈哈",
462
+ "数数",
463
+ "袅袅",
464
+ "局地",
465
+ "以下",
466
+ "娃哈哈",
467
+ "花花草草",
468
+ "留得",
469
+ "耕地",
470
+ "想想",
471
+ "熙熙",
472
+ "攘攘",
473
+ "卵子",
474
+ "死死",
475
+ "冉冉",
476
+ "恳恳",
477
+ "佼佼",
478
+ "吵吵",
479
+ "打打",
480
+ "考考",
481
+ "整整",
482
+ "莘莘",
483
+ "落地",
484
+ "算子",
485
+ "家家户户",
486
+ "青青",
487
+ }
488
+ self.punc = ":,;。?!“”‘’':,;.?!"
489
+
490
+ # the meaning of jieba pos tag: https://blog.csdn.net/weixin_44174352/article/details/113731041
491
+ # e.g.
492
+ # word: "家里"
493
+ # pos: "s"
494
+ # finals: ['ia1', 'i3']
495
+ def _neural_sandhi(self, word: str, pos: str, finals: List[str]) -> List[str]:
496
+ # reduplication words for n. and v. e.g. 奶奶, 试试, 旺旺
497
+ for j, item in enumerate(word):
498
+ if (
499
+ j - 1 >= 0
500
+ and item == word[j - 1]
501
+ and pos[0] in {"n", "v", "a"}
502
+ and word not in self.must_not_neural_tone_words
503
+ ):
504
+ finals[j] = finals[j][:-1] + "5"
505
+ ge_idx = word.find("个")
506
+ if len(word) >= 1 and word[-1] in "吧呢哈啊呐噻嘛吖嗨呐哦哒额滴哩哟喽啰耶喔诶":
507
+ finals[-1] = finals[-1][:-1] + "5"
508
+ elif len(word) >= 1 and word[-1] in "的地得":
509
+ finals[-1] = finals[-1][:-1] + "5"
510
+ # e.g. 走了, 看着, 去过
511
+ elif len(word) == 1 and word in "了着过" and pos in {"ul", "uz", "ug"}:
512
+ finals[-1] = finals[-1][:-1] + "5"
513
+ elif (
514
+ len(word) > 1
515
+ and word[-1] in "们子"
516
+ and pos in {"r", "n"}
517
+ and word not in self.must_not_neural_tone_words
518
+ ):
519
+ finals[-1] = finals[-1][:-1] + "5"
520
+ # e.g. 桌上, 地下, 家里
521
+ elif len(word) > 1 and word[-1] in "上下里" and pos in {"s", "l", "f"}:
522
+ finals[-1] = finals[-1][:-1] + "5"
523
+ # e.g. 上来, 下去
524
+ elif len(word) > 1 and word[-1] in "来去" and word[-2] in "上下进出回过起开":
525
+ finals[-1] = finals[-1][:-1] + "5"
526
+ # 个做量词
527
+ elif (
528
+ ge_idx >= 1
529
+ and (word[ge_idx - 1].isnumeric() or word[ge_idx - 1] in "几有两半多各整每做是")
530
+ ) or word == "个":
531
+ finals[ge_idx] = finals[ge_idx][:-1] + "5"
532
+ else:
533
+ if (
534
+ word in self.must_neural_tone_words
535
+ or word[-2:] in self.must_neural_tone_words
536
+ ):
537
+ finals[-1] = finals[-1][:-1] + "5"
538
+
539
+ word_list = self._split_word(word)
540
+ finals_list = [finals[: len(word_list[0])], finals[len(word_list[0]) :]]
541
+ for i, word in enumerate(word_list):
542
+ # conventional neural in Chinese
543
+ if (
544
+ word in self.must_neural_tone_words
545
+ or word[-2:] in self.must_neural_tone_words
546
+ ):
547
+ finals_list[i][-1] = finals_list[i][-1][:-1] + "5"
548
+ finals = sum(finals_list, [])
549
+ return finals
550
+
551
+ def _bu_sandhi(self, word: str, finals: List[str]) -> List[str]:
552
+ # e.g. 看不懂
553
+ if len(word) == 3 and word[1] == "不":
554
+ finals[1] = finals[1][:-1] + "5"
555
+ else:
556
+ for i, char in enumerate(word):
557
+ # "不" before tone4 should be bu2, e.g. 不怕
558
+ if char == "不" and i + 1 < len(word) and finals[i + 1][-1] == "4":
559
+ finals[i] = finals[i][:-1] + "2"
560
+ return finals
561
+
562
+ def _yi_sandhi(self, word: str, finals: List[str]) -> List[str]:
563
+ # "一" in number sequences, e.g. 一零零, 二一零
564
+ if word.find("一") != -1 and all(
565
+ [item.isnumeric() for item in word if item != "一"]
566
+ ):
567
+ return finals
568
+ # "一" between reduplication words shold be yi5, e.g. 看一看
569
+ elif len(word) == 3 and word[1] == "一" and word[0] == word[-1]:
570
+ finals[1] = finals[1][:-1] + "5"
571
+ # when "一" is ordinal word, it should be yi1
572
+ elif word.startswith("第一"):
573
+ finals[1] = finals[1][:-1] + "1"
574
+ else:
575
+ for i, char in enumerate(word):
576
+ if char == "一" and i + 1 < len(word):
577
+ # "一" before tone4 should be yi2, e.g. 一段
578
+ if finals[i + 1][-1] == "4":
579
+ finals[i] = finals[i][:-1] + "2"
580
+ # "一" before non-tone4 should be yi4, e.g. 一天
581
+ else:
582
+ # "一" 后面如果是标点,还读一声
583
+ if word[i + 1] not in self.punc:
584
+ finals[i] = finals[i][:-1] + "4"
585
+ return finals
586
+
587
+ def _split_word(self, word: str) -> List[str]:
588
+ word_list = jieba.cut_for_search(word)
589
+ word_list = sorted(word_list, key=lambda i: len(i), reverse=False)
590
+ first_subword = word_list[0]
591
+ first_begin_idx = word.find(first_subword)
592
+ if first_begin_idx == 0:
593
+ second_subword = word[len(first_subword) :]
594
+ new_word_list = [first_subword, second_subword]
595
+ else:
596
+ second_subword = word[: -len(first_subword)]
597
+ new_word_list = [second_subword, first_subword]
598
+ return new_word_list
599
+
600
+ def _three_sandhi(self, word: str, finals: List[str]) -> List[str]:
601
+ if len(word) == 2 and self._all_tone_three(finals):
602
+ finals[0] = finals[0][:-1] + "2"
603
+ elif len(word) == 3:
604
+ word_list = self._split_word(word)
605
+ if self._all_tone_three(finals):
606
+ # disyllabic + monosyllabic, e.g. 蒙古/包
607
+ if len(word_list[0]) == 2:
608
+ finals[0] = finals[0][:-1] + "2"
609
+ finals[1] = finals[1][:-1] + "2"
610
+ # monosyllabic + disyllabic, e.g. 纸/老虎
611
+ elif len(word_list[0]) == 1:
612
+ finals[1] = finals[1][:-1] + "2"
613
+ else:
614
+ finals_list = [finals[: len(word_list[0])], finals[len(word_list[0]) :]]
615
+ if len(finals_list) == 2:
616
+ for i, sub in enumerate(finals_list):
617
+ # e.g. 所有/人
618
+ if self._all_tone_three(sub) and len(sub) == 2:
619
+ finals_list[i][0] = finals_list[i][0][:-1] + "2"
620
+ # e.g. 好/喜欢
621
+ elif (
622
+ i == 1
623
+ and not self._all_tone_three(sub)
624
+ and finals_list[i][0][-1] == "3"
625
+ and finals_list[0][-1][-1] == "3"
626
+ ):
627
+ finals_list[0][-1] = finals_list[0][-1][:-1] + "2"
628
+ finals = sum(finals_list, [])
629
+ # split idiom into two words who's length is 2
630
+ elif len(word) == 4:
631
+ finals_list = [finals[:2], finals[2:]]
632
+ finals = []
633
+ for sub in finals_list:
634
+ if self._all_tone_three(sub):
635
+ sub[0] = sub[0][:-1] + "2"
636
+ finals += sub
637
+
638
+ return finals
639
+
640
+ def _all_tone_three(self, finals: List[str]) -> bool:
641
+ return all(x[-1] == "3" for x in finals)
642
+
643
+ # merge "不" and the word behind it
644
+ # if don't merge, "不" sometimes appears alone according to jieba, which may occur sandhi error
645
+ def _merge_bu(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
646
+ new_seg = []
647
+ last_word = ""
648
+ for word, pos in seg:
649
+ if last_word == "不":
650
+ word = last_word + word
651
+ if word != "不":
652
+ new_seg.append((word, pos))
653
+ last_word = word[:]
654
+ if last_word == "不":
655
+ new_seg.append((last_word, "d"))
656
+ last_word = ""
657
+ return new_seg
658
+
659
+ # function 1: merge "一" and reduplication words in it's left and right, e.g. "听","一","听" ->"听一听"
660
+ # function 2: merge single "一" and the word behind it
661
+ # if don't merge, "一" sometimes appears alone according to jieba, which may occur sandhi error
662
+ # e.g.
663
+ # input seg: [('听', 'v'), ('一', 'm'), ('听', 'v')]
664
+ # output seg: [['听一听', 'v']]
665
+ def _merge_yi(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
666
+ new_seg = []
667
+ # function 1
668
+ for i, (word, pos) in enumerate(seg):
669
+ if (
670
+ i - 1 >= 0
671
+ and word == "一"
672
+ and i + 1 < len(seg)
673
+ and seg[i - 1][0] == seg[i + 1][0]
674
+ and seg[i - 1][1] == "v"
675
+ and seg[i + 1][1] == "v"
676
+ ):
677
+ new_seg[i - 1][0] = new_seg[i - 1][0] + "一" + new_seg[i - 1][0]
678
+ else:
679
+ if (
680
+ i - 2 >= 0
681
+ and seg[i - 1][0] == "一"
682
+ and seg[i - 2][0] == word
683
+ and pos == "v"
684
+ and seg[i - 2][1] == "v"
685
+ ):
686
+ continue
687
+ else:
688
+ new_seg.append([word, pos])
689
+ seg = new_seg
690
+ new_seg = []
691
+ # function 2
692
+ for i, (word, pos) in enumerate(seg):
693
+ if new_seg and new_seg[-1][0] == "一":
694
+ new_seg[-1][0] = new_seg[-1][0] + word
695
+ else:
696
+ new_seg.append([word, pos])
697
+ return new_seg
698
+
699
+ # the first and the second words are all_tone_three
700
+ def _merge_continuous_three_tones(
701
+ self, seg: List[Tuple[str, str]]
702
+ ) -> List[Tuple[str, str]]:
703
+ new_seg = []
704
+ sub_finals_list = [
705
+ lazy_pinyin(word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
706
+ for (word, pos) in seg
707
+ ]
708
+ assert len(sub_finals_list) == len(seg)
709
+ merge_last = [False] * len(seg)
710
+ for i, (word, pos) in enumerate(seg):
711
+ if (
712
+ i - 1 >= 0
713
+ and self._all_tone_three(sub_finals_list[i - 1])
714
+ and self._all_tone_three(sub_finals_list[i])
715
+ and not merge_last[i - 1]
716
+ ):
717
+ # if the last word is reduplication, not merge, because reduplication need to be _neural_sandhi
718
+ if (
719
+ not self._is_reduplication(seg[i - 1][0])
720
+ and len(seg[i - 1][0]) + len(seg[i][0]) <= 3
721
+ ):
722
+ new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
723
+ merge_last[i] = True
724
+ else:
725
+ new_seg.append([word, pos])
726
+ else:
727
+ new_seg.append([word, pos])
728
+
729
+ return new_seg
730
+
731
+ def _is_reduplication(self, word: str) -> bool:
732
+ return len(word) == 2 and word[0] == word[1]
733
+
734
+ # the last char of first word and the first char of second word is tone_three
735
+ def _merge_continuous_three_tones_2(
736
+ self, seg: List[Tuple[str, str]]
737
+ ) -> List[Tuple[str, str]]:
738
+ new_seg = []
739
+ sub_finals_list = [
740
+ lazy_pinyin(word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
741
+ for (word, pos) in seg
742
+ ]
743
+ assert len(sub_finals_list) == len(seg)
744
+ merge_last = [False] * len(seg)
745
+ for i, (word, pos) in enumerate(seg):
746
+ if (
747
+ i - 1 >= 0
748
+ and sub_finals_list[i - 1][-1][-1] == "3"
749
+ and sub_finals_list[i][0][-1] == "3"
750
+ and not merge_last[i - 1]
751
+ ):
752
+ # if the last word is reduplication, not merge, because reduplication need to be _neural_sandhi
753
+ if (
754
+ not self._is_reduplication(seg[i - 1][0])
755
+ and len(seg[i - 1][0]) + len(seg[i][0]) <= 3
756
+ ):
757
+ new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
758
+ merge_last[i] = True
759
+ else:
760
+ new_seg.append([word, pos])
761
+ else:
762
+ new_seg.append([word, pos])
763
+ return new_seg
764
+
765
+ def _merge_er(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
766
+ new_seg = []
767
+ for i, (word, pos) in enumerate(seg):
768
+ if i - 1 >= 0 and word == "儿" and seg[i - 1][0] != "#":
769
+ new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
770
+ else:
771
+ new_seg.append([word, pos])
772
+ return new_seg
773
+
774
+ def _merge_reduplication(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
775
+ new_seg = []
776
+ for i, (word, pos) in enumerate(seg):
777
+ if new_seg and word == new_seg[-1][0]:
778
+ new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
779
+ else:
780
+ new_seg.append([word, pos])
781
+ return new_seg
782
+
783
+ def pre_merge_for_modify(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
784
+ seg = self._merge_bu(seg)
785
+ try:
786
+ seg = self._merge_yi(seg)
787
+ except:
788
+ print("_merge_yi failed")
789
+ seg = self._merge_reduplication(seg)
790
+ try:
791
+ seg = self._merge_continuous_three_tones(seg)
792
+ except:
793
+ print("_merge_continuous_three_tones failed")
794
+ try:
795
+ seg = self._merge_continuous_three_tones_2(seg)
796
+ except:
797
+ print("_merge_continuous_three_tones_2 failed")
798
+
799
+ seg = self._merge_er(seg)
800
+ return seg
801
+
802
+ def modified_tone(self, word: str, pos: str, finals: List[str]) -> List[str]:
803
+ finals = self._bu_sandhi(word, finals)
804
+ finals = self._yi_sandhi(word, finals)
805
+ finals = self._neural_sandhi(word, pos, finals)
806
+ finals = self._three_sandhi(word, finals)
807
+ return finals
GPT_SoVITS/text/zh_normalization/README.md ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Supported NSW (Non-Standard-Word) Normalization
2
+
3
+ |NSW type|raw|normalized|
4
+ |:--|:-|:-|
5
+ |serial number|电影中梁朝伟扮演的陈永仁的编号27149|电影中梁朝伟扮演的陈永仁的编号二七一四九|
6
+ |cardinal|这块黄金重达324.75克<br>我们班的最高总分为583分|这块黄金重达三百二十四点七五克<br>我们班的最高总分为五百八十三分|
7
+ |numeric range |12\~23<br>-1.5\~2|十二到二十三<br>负一点五到二|
8
+ |date|她出生于86年8月18日,她弟弟出生于1995年3月1日|她出生于八六年八月十八日, 她弟弟出生于一九九五年三月一日|
9
+ |time|等会请在12:05请通知我|等会请在十二点零五分请通知我
10
+ |temperature|今天的最低气温达到-10°C|今天的最低气温达到零下十度
11
+ |fraction|现场有7/12的观众投出了赞成票|现场有十二分之七的观众投出了赞成票|
12
+ |percentage|明天有62%的概率降雨|明天有百分之六十二的概率降雨|
13
+ |money|随便来几个价格12块5,34.5元,20.1万|随便来几个价格十二块五,三十四点五元,二十点一万|
14
+ |telephone|这是固话0421-33441122<br>这是手机+86 18544139121|这是固话零四二一三三四四一一二二<br>这是手机八六一八五四四一三九一二一|
15
+ ## References
16
+ [Pull requests #658 of DeepSpeech](https://github.com/PaddlePaddle/DeepSpeech/pull/658/files)
GPT_SoVITS/text/zh_normalization/__init__.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ from text.zh_normalization.text_normlization import *
GPT_SoVITS/text/zh_normalization/char_convert.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Traditional and simplified Chinese conversion, a simplified character may correspond to multiple traditional characters.
16
+ """
17
+ simplified_charcters = '制咖片型超声盘鉴定仔点他命书歌粉巾字帐恤手指记忆棒形转弯沟光○〇㐄㐅㐆㐌㐖毒㐜㐡㐤㐰㐺㑇㑳㒳㒸㔾㗂㗎㝵㞎㞙㞞以㢲㢴㤅㥁㥯㨗㫺㬎㮎㮚㮸㲋㲱㲾㳮涧㵪㶸㷖㷭㹢㹴犬㺢狓㺵碗㽮㿝䍃䔢䖟䖸䗈䗥䗪䝓射䥯䦉䯝鲃鱼䲔䳗鹅䵹鼄䶑一对应映射丁不识下儿子做二休世丘之貉并中台原则串为甚谓干净了百事无成八变五十些人得道鸡升天代如并来去个国政策劲幽灵在欧洲游荡接样萝卜坑侧化传价元论醇共再准刀两断切分耕耘收获钱货物向看旧就绪险刻千金动劳永逸匙零夜半卡通回复返影踪反常态口咬气句话同吐快吹周味呼诺呜品红锅哄而散起唱和问三知生熟团漆黑火糟堆场空块面塌糊涂尘染壁厢夔已足多情露水大早到晚夫妻当关万莫开失古恨套所料既往孔见提师要家主审寸阴难买斗牛小撮部阵局展身层巴掌帆风顺席地带过年计于春头载四季期被蛇怕井绳度愿式份弹顷深前律径心意念差愁孤行俱全房厅交遮打技长把抓死拿眼泪鼻涕钥锁折段抿拍即合扫排掬挥拨拥上入击洞掷揽改故辙败文值名斑方面旁族日秋餐隔雅里终父旦时晌会霎间晃暴寒曝更月望垠际朝夕本正经利杯羹东西板枝独秀根筋杆进条龙服务概模次函数又性程总付步脚印趋登毛拔呵氧氮碳决雌雄波未平派谎言流清楚白准溜烟潭有获闻是处降琴鹤甲病发可拾沙目然了直以相眨穿睹瞥瞬矢的解石鸟神教秉虔诚秘种窝蜂穷窍笑置笔苟勾销抹杀煞等奖箍节吃箭仇双雕诗筹箩筐系列纸级士官统丝毫挂维网尽线微吭响股脑胎脉承腔臂力致效资源址器举功投般说讲规贸易叶障着慎满皆输号木电池衣倾钟高低视仁觉醒览遗角银币触溃九鼎蔽抄出驷马追重语破贫洗贯走路安蹴至几蹶振跃役胆汗较辈轮辞赞退六连遍递边针血锤音错门思闪真倒项栽雾类保护川先惊乍体哄鳞爪鸣滴泡邻域党专鼓作齐炒丑烯亥克内酯冬加奴卯肝炎基尺梁街裤镐客宠庭巳汝昌烷玲磊糖肇酉醛啷青县韪良香骨鲷丂七集河市弦喜嘴张舌堵区工业姊妹星架构巧彩扭歪拼凑余热曜武州爷浮屠美乡老阶树荤素碎落能魄鳃鳗珠丄丅丆万俟丈尚摸母娘量管群亚虎必我堂令申件装伏位博侠义界表女墟台戏臭皮匠胜诸葛亮赛顶倍催请运算包立叉戟离疫苗土史志演围揭瓦晒夷姑婆帝村宝烂尖杉碱屉桌山岔岛由纪峡坝库镇废从德后拗汤治旬食明昧曹朋友框栏极权幂曲归依猫民氟硼氯磷铁江侗自旅法司洋浦梅园温暖湾焦班幸用田略番叠皇炮捶硝苯酸腺苷棱草镜穗跳远索锦纲聚氰胺联店胚膲爱色堇紫罗兰芝茶饭菱云虫藏藩乱叛苏亲债凳学座恐恋柱测肌腹衩锥系貂企乌跪叩军车农题迭都甘油屯奏键短阿姨陪姐只顾茅庐槽驾魂鲜鹿页其菜单乘任供势午齿汉组织吊调泻唇坡城报坟外夸将尉建筑岸岗公床扬新剑升杭林栗校楼标款汽社浣海商馆剧院钢华港机械广媒环球融第医科证券综财乐育游涨犹岭疏瘾睑确兵领导缴肢膛船艾瑟尔苍蔡虞效衫覆访诉课谕议轨述野钩限敌鞋颌颔颚饶首龈站例修凡划垂届属崽颏厨拜挫摆放旋削棋榻槛礼沉注滑营狱画确仪聘花葬诏员跌辖周达酒锚闸陷陆雨雪飞威丌于丹久乏予理评产亢卑亦乎舞己悲矩圆词害志但住佞佳便俗信票案幅翁倦伦假偏倚斜亏鬼敲停备伤脾胃仅此像俭匮免宜穴焉戴兼容许冻伯仲负彼昼皂轩轾实刊划颠卫战哥比省非好黄饰别拘束掩奶睬选择摇扰烦苦枚写协厌及格受欢迎约只估侵犯割状告或缺抗拒挽撤救药喻磨灭端倪少逆逾越避靠适吉誉吝玉含延咎歹听啻渊善谋均匀堪忍够太惹妙妥妨孕症孝术室完纳推冠积宣疑辩栗碴称屈挠屑干涉衡待很忙恶忿怎么怠急耻恭息悦惑惜惟想愉愧怍慌愤启懂懈怀材才紧招认扣抵拉舍也罢插揣冒搭撞南墙扩核支攻敢雷攀敬里吗需景智暇曾罪遇朽枉止况竞争辱求愈渝溶济左右袒困补爽特寂寞示弱找谢畏强疾徐痛痒冤符眠睦瞅董何厚云措活疲羞者轻玻璃祥兆禁���稂莠稳佛换答简结果盟绝缕途给谈否羁翼耐肖胫毋宁兴舒若菲莱痕迹窠臼虚衰脸兔撒鹰棺范该详讳抬泰让须眉象众赀账费灰赖奇虑训辍辨菽麦辛近送透逞徒速续逮捕遂遑违逊斧钺艰醉锈随观弃显饱脂肪使丏丐帮丒且慢末丕替桃宗王尊凉爵各图屋脊粮署录坛吾禄职胄袭君厦丗北壑桐疹损逢陵鹬丙寅戌氨腈唑纶辰酮脱氢酶醚丞丢现掉纱帽弄扯炮碗丠両丣坐存激肩臻蒂莲悖序驱丨丩丫挺杈髻鬟细介俄伊犁京尼布订普渡央委监察检查剂圈设警队斯督剩震境航舶革防托播促质版蝾螈锋研艺历残消频谱精密制造陲邮候埔坚压坜凹汇执府究邦俘摄寮彬狼岳肺肿庸英讯诊埋粒胞括控码韩暑枪枢砥澳哇牟寿甸钻探篇签缀缝继耳肯照妇埃悬璧轴柜台辣搁浅邪跑纤阮阳私囊魔丮丰姿采丱烧丳丵丶丷丸参寨朗桂瑞砂衷霞貌凤仆舰因嫌宰峰干络牌持旨祭祷簿编罚宾办丼丿乀乂乃乄仰慕盛旷留考验阔乆乇么丑麽乊湖燃乑乒乓乕乖僻忤戾离谬迕乗危肥劫除隙浪婿乙炔肠酰吡咯盐乚乛乜嘢卿玄宫尾狐龟塔嶷兄弟泉章霄钉耙乞扎哀怜恕讨乢乣乤乥乧乨乩童乪乫乭乳晕汁液瑶浆牙癌突窦罩腐胶猪酪蛋糕菌瘤乴乵乶乷乸乹乺乼乾俸冰嘉哕嚎坤妈尸垒旱枯涸俐渴潮涩煸豆燥爹瘦瘪癣瞪袋脆姜贝隆馏乿亀亁叫咕攘扔搞男砸窜蓬麻亃亄亅却亇迟典今临繁累卵奉婚聪躬巨与迁添裂副宿岁怪恶尕仑愣杆硅硫钛铀锰芑杂异钠砷胂磺琥珀舱棍簧胡茬盗浩盆贩郎腿亍洪亐互欠助勉惠操斥诿系户译亓墓碑刑铃卅渠缤纷斗米旗宪钒灯徽瘟祖拳福谷丰脏腑绑肉腌苓蕴桥铺霸颜闹判喷冈底蛙陉矿亖亘亜罕们娜桑那努哈喀弗烈曼松森杜氏杯奥琛敦戊穆圣裔汇薛孙亟亡佚虏羊牢奋释卷卸契媾感额睫缠谊趾塞挤纽阻还配驰庄亨洛祚亪享津沪畿郊慈菴枇杷膏亭阁锃丽亳亶亹诛初责翻疯偶杰丛稠妖拖寰居吸授慧蜗吞壮魅狗矛盾益渣患忧稀描猿梦暂涯畜祸缘沸搜引擎臣横纭谁混援蒸兽狮税剖亻亼亽亡什献刹邡么仂仃仄仆富怨仈仉毕昔晨壳绍仍仏仒仕宦仗欺恃腰叹叹炬梓讫施仙后琼逝仚仝仞仟悔仡佬偿填泊拓扑簇羔购顿钦佩发棻阃驭养亿儆尤借帧赈凌叙帖李柔刚沃眦睚戒讹取飨读仨仫仮著泳卧躺韶夏裁仳仵唯贤凭钓诞仿似宋佛讽伀硕盼鹅伄儅伈伉俪柯始娃迈戈坦堡帕茨萨庙玛莉莎藤霍姆伋伍奢胥廷芳豪伎俩侍汛勒希羲雏伐憩整谟闲闲伕伙伴颐伜伝伢叔恒兹恩翰伱伲侣伶俜悧鼬伸懒缩喇叭伹伺伻伽倻辐伾似佃伫布乔妮墨佉卢佌贷劣廉昂档浓矮伞洼缓耗胸谷迷挡率龋宅沫舍疗佐贰佑占优据铧尝呢须鲁晓佗佘余坪寺瓜铳僧蒙芒陀龛哼呕坊奸孽弊揖祟茧缚誓贼佝偻瞀佟你夺赶佡佢佣佤佧贾佪佫佯佰佱洁绩酿肴佴卷佶佷佸佹佺佻佼佽佾具唤窘坏娱怒慨硬习惯聋膨胀蔓骇贵痹侀侁侂侃侄侅鸿燕侇侈糜靡侉侌妾侏儒仓鼠侐侑侔仑侘侚链侜偎傍钴循柳葫芦附価侮骂蔑侯岩截蚀局贴壶嬛宴捷携桶笺酌俣狭膝狄俅俉俊俏俎俑俓俔谚俚俛黎健呈固墒增守康箱湿祐镖镳杠盒靖膜龄俞豹猎噪孚封札筒托衍鸽剪撰稿炼厂禊练缮葺俯瞰撑冲效俳俴俵俶俷俺备俾伥倂倅储卒惶敷猝逃颉蓄崇隐倌倏忽刺蜡烛噍嚼坍扁抽毙葱楣灌灶粪背薮卖赔闭霉腾倓倔幸倘倜傥倝借箸挹浇阅倡狂倢倣値倥偬倨傲倩匡嗣冲柝珍倬倭寇猩倮倶倷倹勤赞偁偃充伪吏嗓寐惺扮拱芫茜藉虢钞偈伟晶偌宕距析滤殿疼瘫注颇偓偕鸭歇滞偝偟偢忘怡旺偨偩逼偫偭偯偰偱偲侦缉蹄偷减惰漏窥窃偸偺迹傀儡傅傈僳骂篱傎奎琳迪叟芭傒傔傕伧悉荒傜傞傢傣芽逼佣婢傮睨寄檄诵谣颂伛担辜弓惨蒿悼疤傺傻屄臆巢泄箧羡盖轧颓傿㑩僄僇佥僊働僎侨僔僖僚僝伪僣僤侥僦猴偾僩僬僭僮僯僰雇僵殖签静僾僿征陇儁侬儃儇侩朴薄儊儋儌儍傧儓俦侪拟尽儜儞儤儦儩汰哉寡渥裕酷儭儱罐儳儵儹傩俨儽兀臬臲鹫允勋勋宙宵帅憝彝谐嫂阋畅沛溢盈饥赫凶悍狠猛顽愚妣斩秦遣鞭耀敏荣槃泽爆碟磁秃缆辉霁卤朵娄孜烽酱勃汀箕裘钳耶蒙蕾彻兑软遭黜兎児韵媳爸兕觥兖兙兛兜售鍪肚兝兞兟兡兢兣樽殓涅睡禀籍赘泌啡肽奸幕涵涝熵疚眷稃衬讧赴焕椒歼植跏没试误猜栖窗肋袖颊兪卦撇胡岐廓轿疸枫茴珑厕秩募勺吨寓斤历亩迫筷厘最淫螺韬兮宽匪筛襄赢轭复兲诈刃堰戎痞蚁饷它冀铸冂冃円冇冉册嫁厉砺竭醮冏牧冑冓冔冕冖冗冘冞冢窄抑诬冥冫烘菇蛰冷凝坨橇淇淋炭饼砖碛窖醋雕雹霜冱冶炉艳嘲峻滩淡漠煖飕饮冼冽凃凄怆梗凅凇净凊凋敝蒙凔凛遵汞脢凞几凢処凰凯凵凶焰凸折刷纹预丧喽奔巡榜殡芙蓉租笼辑鞘萃凼锯镬刁蛮刂娩崩批拆摊掰蘖骤歧颗秒袂赃勿嘱忌磋琢肤刈羽刎讼戮舂桨艇刓刖霹雳刜创犊刡恙墅帜筵致劫劫刨昏默攸尿欲熏润薰圭删刮痧铲刱刲刳刴刵踏磅戳柏槐绣芹苋猬舟铭鹄鹜劫剁剃辫刭锉履铅克剌姻咽哨廊掠桅沿召瞻翅赵卜渺茫郭剒剔剕沥剚愎毅讷才剜剥啄采剞剟剡剣剤䌽剐肾驶黏剰袍剀紊铲剸剺剽剿劁劂札劈啪柴扳啦刘奭姥夼昫涓熙禅禹锡翔雁鹗刽刿弩柄蜻蛉劒劓劖劘劙澜篑赏矶釜晋甜薪逐劦熔纣虐赤囚劬劭労劵效劻劼劾峭艮勅勇励勍勐腊脖庞漫饲荡粥辄勖勗勘骄馁碌泮雇捐竹骑殊阱绩朴恳谨剿勧勩勯勰劢勋勷劝惩慰诫谏勹芡践阑匁庇拯粟扎袱裹饺匆遽匈匉匊匋匍匐茎匏匕妆痰脓蛹斋苑烤蹈塘羌熊阀螳螂疆碚竿纬荷茵邙魏匚匜匝匟扶稷匣匦拢匸匹耦匽匾匿卂叮疮禧轸堤棚迢钧炼卄卆遐卉瓷盲瓶当胱腱裸卋卌卍卐怯污贱鄙龌龊陋卓溪唐梯渔陈枣泥漳浔涧梨芬谯赡辕迦郑単驴弈洽鳌卛占筮卝卞卟吩啉屎翠厄卣卨卪卬卮榫袄玺绶钮蚤惧殆笃耸卲帘帙绕恤卼卽厂厎厓厔厖厗奚厘厍厜厝谅厕厤厥厪腻孢厮厰厳厣厹厺粕垢芜菁厼厾叁悟茸薯叄吵笄悌哺讥坫垄弧芯杠潜婴刍袁诘贪谍煽馈驳収岳缔灾贿骗叚叡吻拦蘑蜜诀燧玩砚筝椎蔺铜逗骊另觅叨唠谒杵姓喊嚷嚣咚咛塑寻恼憎擦只泣渗蝠叱吒咄咤喝籀黛舵舷叵叶铎懿昭穰苴辽叻叼吁堑嫖赌瞧爬众抒吅吆夥卺橡涤抱纵摩郡唁坠扇篮膀袜颈吋忾谘酬哭妓媛暗表缰迩妃羿絮蕃浑拐葵暮隅吔吖啶嗪戚吜啬噬咽吟哦咏吠吧唧嗒咐吪隽咀征燐苞茹钙哧吮吰吱嘎吲哚吴栋娇窟孟箫忠晗淞阖闾趼宇呐睛嘘拂捧疵熄竽笛糠吼吽呀吕韦蒙呃呆笨呇贡呉罄呋喃呎呏呔呠呡痴呣呤呦呧瑛眩扒晬淑姬瑜璇鹃呪呫哔嚅嗫呬呯呰呱呲咧噌钝呴呶呷呸呺呻哱咻啸噜吁坎坷逻呿咁咂咆哮咇咈咋蟹煦珅蔼咍咑咒诅咔哒嚓咾哝哩喱咗咠咡咢咣咥咦咨嗟询咩咪咫啮啮咭咮咱咲咳呛嗽咴啕咸咹咺呙喉咿婉恸悯赋矜绿茗蓝哂抢瞒哆嗦啰噻啾滨彗哋哌哎唷哟哏哐哞哢哤哪里哫啼喘哰哲萎蚌哳咩哽哿呗唅唆唈唉唎唏哗尧棣殇璜睿肃唔睇唕吣唞唣喳唪唬唰喏唲唳唵嘛唶唸唹唻唼唾唿啁啃鹦鹉啅埠栈榷祺铺鞅飙啊啍啎啐啓啕啖啗啜哑祈啢衔啤啥啫啱啲啵啺饥啽噶昆沁喁喂喆裙喈咙喋喌喎喑喒喓喔粗喙幛庆滋鹊喟喣喤喥喦喧骚喨喩梆吃葡萄喭驼挑吓碰枞瓣纯疱藻趟铬喵営喹喺喼喿嗀嗃嗄嗅嗈嗉嗊嗍嗐嗑嗔诟嗕嗖嗙嗛嗜痂癖嗝嗡嗤嗥嗨唢嗬嗯嗰嗲嗵叽嗷嗹嗾嗿嘀嘁嘂嘅惋嘈峪禾荫啀嘌嘏嘐嘒啯啧嘚唛嘞嘟囔嘣嘥嘦嘧嘬嘭这谑严敞馋松哓嘶嗥呒虾嘹嘻啴嘿噀噂噅噇噉噎噏噔噗噘噙噚咝噞噢噤蝉皿噩噫噭嗳噱哙噳嚏涌洒欲巫霏噷噼嚃嚄嚆抖哜尝嚔苏嚚嚜嚞嚟呖嚬嚭嚮嚯亸喾饬按竣苛嚵嘤啭冁呓膪谦囍囒囓囗囘萧酚飘溅谛囝溯眸纥銮鹘囟殉囡団囤囥囧囨囱囫囵囬囮囯囲図囶囷囸囹圄圉拟囻囿圀圂圃圊粹蠹赦圌垦圏滚鲱凿枘圕圛圜圞坯埂壤骸炕祠窑豚绅魠鲮鳖圧握圩圪垯圬圮圯炸岬幔毯祇窨菩溉圳圴圻圾坂坆沾坋坌舛壈昆垫墩椅坒坓坩埚坭坰坱坳坴坵坻坼杨挣涎帘垃垈垌垍垓垔垕垗垚垛垝垣垞垟垤垧垮垵垺垾垿埀畔埄埆埇埈埌殃隍埏埒埕埗埜垭埤埦埧埭埯埰埲埳埴埵埶绋埸培怖桩础辅埼埽堀诃侄庑堃堄摧磐贞韧砌堈堉垩堋堌堍堎垴堙堞堠礁堧堨舆堭堮蜓摘堲堳堽堿塁塄塈煤茔棵塍垲埘塓绸塕鸦沽虱塙冢塝缪塡坞埙塥塩塬塱场螨塼塽塾塿墀墁墈墉墐夯増毁墝墠墦渍钵墫墬堕墰墺墙橱壅壆壊壌壎壒榨蒜壔壕壖圹垆壜壝垅壡壬壭壱売壴壹壻壸寝壿夂夅夆変夊夌漱邑夓腕泄甥御骼夗夘夙衮瑙妊娠醣枭珊莺鹭戗幻魇夤蹀秘擂鸫姚宛闺屿庾挞拇賛蛤裨菠氅漓捞湄蚊霆鲨箐篆篷荆肆舅荔鲆巷惭骰辟邱镕镰阪漂烩鲵鲽鳄鸨胪鹏妒峨谭枰晏玑癸祝秤竺牡籁恢罡蝼蝎赐绒御梭夬夭砣榆怙枕夶夹馅奄崛葩谲奈贺祀赠奌奂奓奕䜣詝奘奜奠奡奣陶奨奁魁奫奬奰娲孩贬隶酥宄狡猾她姹嫣妁毡荼皋膻蝇嫔妄妍嫉媚娆妗趣妚妞妤碍妬娅妯娌妲妳妵妺姁姅姉姗姒姘姙姜姝姞姣姤姧姫姮娥姱姸姺姽婀娀诱慑胁娉婷娑娓娟娣娭娯娵娶娸娼婊婐婕婞婤婥溪孺婧婪婬婹婺婼婽媁媄媊媕媞媟媠媢媬媮妫媲媵媸媺媻媪眯媿嫄嫈袅嫏嫕妪嫘嫚嫜嫠嫡嫦嫩嫪毐嫫嫬嫰妩嫺娴嫽嫿妫嬃嬅嬉耍婵痴艳嬔嬖嬗嫱袅嫒嬢嬷嬦嬬嬭幼嬲嬴婶嬹嬾嬿孀娘孅娈孏曰癫屏孑孓雀孖斟篓谜摺孛矻鸠崮轲祜鸾孥邈毓棠膑孬孭孰孱孳孵泛罔衔孻孪宀宁冗拙株薇掣抚琪瓿榴谧弥宊濂祁瑕宍宏碁宓邸谳実潢町宥宧宨宬徵崎骏掖阙臊煮禽蚕宸豫寀寁寥寃檐庶寎暄碜寔寖寘寙寛寠苫寤肘洱滥蒗陕核寪弘绰螽宝擅疙瘩晷対檐専尃尅赎绌缭畴衅尌峙醌襟痲碧屁昊槌淘恵瀑牝畑莓缸羚觑蔻脏躁尔尓锐尗尙尜尟尢��尨尪尬尭尰擒尲尶尴尸尹潽蠖蛾尻扣梢蚴鳍脬蹲屇屌蚵屐屃挪屖屘屙屛屝屡屣峦嶂岩舄屧屦屩屪屃屮戍驻钾崖嵛巅旮旯楂榄榉芋茱萸靛麓屴屹屺屼岀岊岌岍阜岑彭巩岒岝岢岚岣岧岨岫岱岵岷峁峇峋峒峓峞峠嵋峨峰峱岘峹峿崀崁崆祯崋崌崃岖昆崒崔嵬巍萤颢崚崞崟崠峥巆崤崦崧殂岽崱崳崴崶崿嵂嵇嵊泗嵌嵎嵒嵓岁嵙嵞嵡嵩嵫嵯嵴嵼嵾嵝崭崭晴嶋嶌嶒嶓嵚崂嶙嶝嶞峤嶡嶢峄嶨嶭嶮嶰嶲岙嵘巂巃巇巉岿巌巓巘巛滇芎巟巠弋回巣巤炊擘蜥蟒蛊觋巰蜀彦淖杏茂甫楞巻巽帼巿帛斐鲫蕊帑帔帗帚琉汶帟帡帣帨裙帯帰帷帹暆帏幄帮幋幌幏帻幙帮幞幠幡幢幦幨幩幪帱幭幯幰遥蹉跎馀庚鉴幵幷稚邃庀庁広庄庈庉笠庋跋庖牺庠庤庥鲸庬庱庳庴庵馨衢庹庿廃厩廆廋廌廎廏廐廑廒荫廖廛厮搏锣廞弛袤廥廧廨廪廱绵踵髓廸迫瓯邺廻廼廾廿躔弁皱弇弌弍弎弐弑吊诡憾荐弝弢弣弤弨弭弮弰弪霖繇焘斌旭溥骞弶弸弼弾彀彄别累纠强彔彖彘彟彟陌彤贻彧绘虹彪炳雕蔚鸥彰瘅彲彳彴仿彷徉徨彸彽踩敛旆徂徇徊渭畲铉裼従筌徘徙徜徕膳苏萌渐徬徭醺徯徳徴潘徻徼忀瘁胖燎怦悸颤扉犀澎湃砰恍惚绞隘忉惮挨饿忐忑忒忖応忝忞耿忡忪忭忮忱忸怩忻悠懑怏遏怔怗怚怛怞怼黍讶怫怭懦怱怲恍怵惕怸怹恁恂恇恉恌恏恒恓恔恘恚恛恝恞恟恠恣恧眄恪恫恬澹恰恿悀悁悃悄悆悊悐悒晦悚悛悜悝悤您悩悪悮悰悱凄恻德悴怅惘闷悻悾惄愫钟蒐惆惇惌惎惏惓惔惙惛耄惝疟浊恿惦德恽惴蠢惸拈愀愃愆愈愊愍愐愑愒愓愔愕恪氓蠢騃昵惬赧悫愬愮愯恺愼慁恿慅慆慇霭慉慊愠慝慥怄怂慬慱悭慴慵慷戚焚憀灼郁憃惫憋憍眺捏轼愦憔憖憙憧憬憨憪憭怃憯憷憸憹憺懃懅懆邀懊懋怿懔懐懞懠懤懥恹懫懮懰懱毖懵遁梁雍忏懽戁戄戆戉戋戕戛戝戛戠戡戢戣戤戥戦戬戭戯轰戱披菊牖戸戹戺戻卯戽锹扂楔扃扆扈扊杖牵绢铐镯赉扐搂搅烊盹瞌跟趸镲靶鼾払扗玫腮扛扞扠扡扢盔押扤扦扱罾揄绥鞍郤窾扻扼扽抃抆抈抉抌抏瞎抔缳缢擞抜拗択抨摔歉蹿牾抶抻搐泵菸拃拄拊髀抛拌脯拎拏拑擢秧沓曳挛迂拚拝拠拡拫拭拮踢拴拶拷攒拽掇芥橐簪摹疔挈瓢骥捺蹻挌挍挎挐拣挓挖掘浚挙揍聩挲挶挟挿捂捃捄捅捆捉捋胳膊揎捌捍捎躯蛛捗捘捙捜捥捩扪捭据捱捻捼捽掀掂抡臀膘掊掎掏掐笙掔掗掞棉芍掤搪阐掫掮掯揉掱掲掽掾揃揅揆搓揌诨揕揗揘揜揝揞揠揥揩揪揫橥遒麈揰揲揵揶揸背揺搆搉搊搋搌搎搔搕撼橹捣搘搠搡搢搣搤搥搦搧搨搬楦裢讪赸掏搰搲搳搴揾搷搽搾搿摀摁摂摃摎掴摒摓跤摙摛掼摞摠摦喉羯摭摮挚摰摲抠摴抟摷掺摽撂撃撅稻撊撋挦锏泼撕撙撚㧑挢撢掸撦撅撩撬撱朔揿蚍蜉挝捡擀掳闯擉缶觚擐擕擖擗擡擣擤澡腚擧擨擩擫擭摈拧撷擸撸擽擿攃摅撵攉攥攐攓撄搀撺每攩攫辔澄攮攰攲攴轶攷砭讦攽碘敁敃敇敉叙敎筏敔敕敖闰诲敜煌敧敪敳敹敺敻敿斁衽斄牒绉诌斉斎斓鹑谰驳鳢斒筲斛斝斞斠斡斢斨斫斮晾沂潟颖绛邵斲斸釳於琅斾斿旀旗旃旄涡旌旎旐旒旓旖旛旝旟旡旣浴旰獭魃旴时旻旼旽昀昃昄昇昉晰躲澈熹皎皓矾昑昕昜昝昞昡昤晖笋昦昨是昱昳昴昶昺昻晁蹇隧蔬髦晄晅晒晛晜晞晟晡晢晤晥曦晩萘莹顗晿暁暋暌暍暐暔暕煅旸暝暠暡曚暦暨暪朦胧昵暲殄冯暵暸暹暻暾曀晔昙曈曌曏曐暧曘曙曛叠昽曩骆曱甴肱曷牍禺锟曽沧耽朁朅朆杪栓夸竟粘绦朊膺朏朐朓朕朘朙瞄觐溘饔飧朠朢朣栅椆淀虱朩朮朰朱炆璋钰炽鹮朳槿朵朾朿杅杇杌陧欣钊湛漼楷瀍煜玟缨翱肇舜贽适逵杓杕杗杙荀蘅杝杞脩珓筊杰榔狍閦颦缅莞杲杳眇杴杶杸杻杼枋枌枒枓衾葄翘纾逋枙狸桠枟槁枲枳枴枵枷枸橼枹枻柁柂柃柅柈柊柎某柑橘柒柘柙柚柜柞栎柟柢柣柤柩柬柮柰柲橙柶柷柸柺査柿栃栄栒栔栘栝栟柏栩栫栭栱栲栳栴檀栵栻桀骜桁镁桄桉桋桎梏椹葚桓桔桕桜桟桫椤桭杯桯桲桴桷桹湘溟梃梊梍梐潼栀枧梜梠梡梣梧梩梱梲梳梴梵梹棁棃樱棐棑棕榈簑绷蓑枨棘棜棨棩棪棫棬棯棰棱棳棸棹椁棼碗椄苕椈椊椋椌椐椑椓椗検椤椪椰椳椴椵椷椸椽椿楀匾楅篪楋楍楎楗楘楙楛楝楟楠楢楥桢楩楪楫楬楮楯楰梅楸楹楻楽榀榃榊榎槺榕榖榘榛狉莽搒笞榠榡榤榥榦榧杩榭榰榱梿霰榼榾桤槊闩槎槑槔槖様槜槢槥椠槪槭椮槱槲槻槼槾樆樊樏樑樕樗樘樛樟樠樧樨権樲樴樵猢狲桦樻罍樾樿橁橄橆桡笥龠橕橚橛辆椭橤橧竖膈跨橾橿檩檃檇柽檍檎檑檖檗桧槚檠樯檨檫檬梼槟檴檵柠棹櫆櫌栉櫜椟櫡槠栌枥榇栊櫹棂茄櫽欀欂欃欐欑栾欙棂溴欨欬欱欵欶欷歔欸欹欻欼欿歁歃歆艎歈歊莳蝶歓歕歘歙歛歜欤歠蹦诠镶蹒跚升陟歩歮歯歰歳歴璞歺瞑歾殁夭殈殍殑殗殜殙殛殒殢殣殥殪殚僵殰殳荃殷殸殹蛟殻肴谤殴毈毉喂毎���蕈毗毘毚茛邓毧毬毳毷毹毽毾毵牦氄氆靴氉氊氇氍氐聊氕氖気氘氙氚氛氜氝氡汹焊痉氤氲氥氦铝锌氪烃氩铵痤汪浒漉痘盂碾菖蒲蕹蛭螅氵冰氹氺氽烫氾氿渚汆汊汋汍汎汏汐汔汕褟汙汚汜蓠沼秽蔑汧汨汩汭汲汳汴堤汾沄沅沆瀣沇沈葆浸沦湎溺痼疴沌沍沏沐沔沕沘浜畹砾沚沢沬沭沮沰沱灢沴沷籽沺烹濡洄泂肛泅泆涌肓泐泑泒泓泔泖泙泚泜泝泠漩馍涛粼泞藓鳅泩泫泭泯铢泱泲洇洊泾琵琶荽蓟箔洌洎洏洑潄濯洙洚洟洢洣洧洨洩痢滔洫洮洳洴洵洸洹洺洼洿淌蜚浄浉浙赣渫浠浡浤浥淼瀚浬浭翩萍浯浰蜃淀苔蛞蝓蜇螵蛸煲鲤浃浼浽溦涂涊涐涑涒涔滂莅涘涙涪涫涬涮涴涶涷涿淄淅淆淊凄黯淓淙涟淜淝淟淠淢淤渌淦淩猥藿亵淬淮淯淰淳诣涞纺淸淹炖癯绮渇済渉渋渓渕涣渟渢滓渤澥渧渨渮渰渲渶渼湅湉湋湍湑湓湔黔湜湝浈湟湢湣湩湫湮麟湱湲湴涅満沩溍溎溏溛舐漭溠溤溧驯溮溱溲溳溵溷溻溼溽溾滁滃滉滊荥滏稽滕滘汇滝滫滮羼耷卤滹浐煎漈漊漎绎漕漖漘漙沤漜漪漾漥漦漯漰溆漶漷濞潀颍潎潏潕潗潚潝潞潠潦祉疡潲潵滗潸潺潾涠澁澂澃澉澌澍澐澒澔澙渑澣澦澧澨澫澬浍澰澴澶澼熏郁濆濇濈濉濊貊濔疣濜濠濩觞浚濮盥潍濲泺瀁滢渎渖瀌浏瀒瀔濒泸瀛潇潆瀡潴泷濑瀬弥潋瀳瀵瀹瀺瀼沣滠灉灋灒漓灖灏灞灠滦灥灨滟灪蜴灮烬獴灴灸灺炁炅鱿炗炘炙炤炫疽烙钎炯炰炱炲炴炷毁炻烀烋瘴鲳烓烔焙烜烝烳饪烺焃焄耆焌焐焓焗焜焞焠焢焮焯焱焼煁煃煆煇煊熠煍熬煐炜煕暖熏硷霾煚煝煟煠茕矸煨琐炀萁煳煺煻熀熅熇熉罴荧穹炝熘熛熜稔谙烁熤熨熯熰眶蚂颎熳熸熿燀烨燂燄盏燊燋燏燔隼燖焖燠燡灿燨燮燹燻燽燿爇爊爓爚爝爟爨蟾爯爰为爻丬爿牀牁牂牄牋窗牏牓窗釉牚腩蒡虻牠虽蛎牣牤牮牯牲牳牴牷牸牼绊牿靬犂犄犆犇犉犍犎犒荦犗犛犟犠犨犩犪犮犰狳犴犵犺狁甩狃狆狎狒獾狘狙黠狨狩狫狴狷狺狻豕狈蜘猁猇猈猊猋猓猖獗猗猘狰狞犸猞猟獕猭猱猲猳猷猸猹猺玃獀獃獉獍獏獐獒毙獙獚獜獝獞獠獢獣獧鼇蹊狯猃獬豸狝獯鬻獳犷猕猡玁菟玅玆玈珉糁禛郅玍玎玓瓅玔玕玖玗玘玞玠玡玢玤玥玦珏瑰玭玳瑁玶玷玹玼珂珇珈瑚珌馐馔珔珖珙珛珞珡珣珥珧珩珪佩珶珷珺珽琀琁陨玡琇琖琚琠琤琦琨琫琬琭琮琯琰琱琲琅琴珐珲瑀瑂瑄瑉玮瑑瑔瑗瑢瑭瑱瑲瑳瑽瑾瑿璀璨璁璅璆璈琏璊璐璘璚璝璟璠璡璥瑷璩璪璫璯璲玙璸璺璿瓀璎瓖瓘瓒瓛脐瓞瓠瓤瓧瓩瓮瓰瓱瓴瓸瓻瓼甀甁甃甄甇甋甍甎甏甑甒甓甔瓮甖甗饴蔗甙诧钜粱盎锈团甡褥産甪甬甭甮宁铠甹甽甾甿畀畁畇畈畊畋畎畓畚畛畟鄂畤畦畧荻畯畳畵畷畸畽畾疃叠疋疍疎箪疐疒疕疘疝疢疥疧疳疶疿痁痄痊痌痍痏痐痒痔痗瘢痚痠痡痣痦痩痭痯痱痳痵痻痿瘀痖瘃瘈瘉瘊瘌瘏瘐痪瘕瘖瘙瘚瘛疭瘜瘝瘗瘠瘥瘨瘭瘆瘯瘰疬瘳疠瘵瘸瘺瘘瘼癃痨痫癈癎癐癔癙癜癠疖症癞蟆癪瘿痈発踔绀蔫酵皙砬砒翎翳蔹钨镴皑鹎驹暨粤褶皀皁荚皃镈皈皌皋皒朱皕皖皘皜皝皞皤皦皨皪皫皭糙绽皴皲皻皽盅盋碗盍盚盝踞盦盩秋千盬盭眦睁瞤盯盱眙裰盵盻睐眂眅眈眊県眑眕眚眛眞眢眣眭眳眴眵眹瞓眽郛睃睅睆睊睍睎困睒睖睙睟睠睢睥睪睾睯睽睾眯瞈瞋瞍逛瞏瞕瞖眍䁖瞟瞠瞢瞫瞭瞳瞵瞷瞹瞽阇瞿眬矉矍铄矔矗矙瞩矞矟矠矣矧矬矫矰矱硪碇磙罅舫阡、矼矽礓砃砅砆砉砍砑砕砝砟砠砢砦砧砩砫砮砳艏砵砹砼硇硌硍硎硏硐硒硜硖砗磲茚钡硭硻硾碃碉碏碣碓碔碞碡碪碫碬砀碯碲砜碻礴磈磉磎硙磔磕磖磛磟磠磡磤磥蹭磪磬磴磵磹磻硗礀硚礅礌礐礚礜礞礤礧礮砻礲礵礽礿祂祄祅祆禳祊祍祏祓祔祕祗祘祛祧祫祲祻祼饵脔锢禂禇禋祦禔祎隋禖禘禚禜禝禠祃禢禤禥禨禫祢禴禸秆秈秊闱飒秋秏秕笈蘵赁秠秣秪秫秬秭秷秸稊稌稍稑稗稙稛稞稬秸稲稹稼颡稿穂穄穇穈穉穋稣贮穏穜穟秾穑穣穤穧穨穭穮穵穸窿阒窀窂窅窆窈窕窊窋窌窒窗窔窞窣窬黩蹙窑窳窴窵窭窸窗竁竃竈竑竜并竦竖篦篾笆鲛竾笉笊笎笏笐靥笓笤箓笪笫笭笮笰笱笲笳笵笸笻筀筅筇筈筎筑筘筠筤筥筦笕筒筭箸筰筱筳筴宴筸箂个箊箎箑箒箘箙箛箜篌箝箠箬镞箯箴箾篁筼筜篘篙篚篛篜篝篟篠篡篢篥篧篨篭篰篲筚篴篶篹篼箦簁簃簆簉簋簌簏簜簟簠簥簦簨簬簰簸簻籊藤籒籓籔签籚篯箨籣籥籧笾簖籫籯芾麴籵籸籹籼粁秕粋粑粔粝粛粞粢粧粨粲粳稗粻粽辟粿糅糆糈糌糍糒糔萼糗蛆蹋糢糨糬粽糯糱籴粜糸糺紃蹼鲣霉纡纨绔纫闽襻紑纰纮锭鸢鹞纴紞紟扎紩紬绂绁纻紽紾绐絁絃絅経絍绗絏缡褵絓絖絘絜绚絣螯絪絫聒絰絵绝絺絻絿綀绡綅绠绨绣綌綍綎捆綖綘継続缎绻綦綪线綮綯绾罟蝽綷縩绺绫緁绲緅緆缁绯緌緎総緑绱緖缃缄缂绵缗緤褓缌纂緪緰缑缈缏缇縁縃縄萦缙缒縏缣縕缞縚缜缟缛縠縡縢縦绦縯縰骋缧縳纤缦絷缥縻衙縿繄缫繈繊繋繐缯繖繘繙繠缋繣繨缰缲繸繻缱纁纆纇缬缵纩纑纕缵纙纚纛缾罃罆坛罋罂罎罏罖罘罛罝罠罣罥罦罨罫罭锾罳罶罹罻罽罿羂羃羇芈蕉51鸵羑羖羌羜羝羢羣羟羧羭羮羰羱羵羶羸藜鲐翀翃翅翊翌翏翕翛翟翡翣翥翦跹翪翫翚翮翯翱翽翾翿板饕鸹锨耋耇耎耏专耒耜耔耞耡耤耨耩耪耧耰鬓耵聍聃聆聎聝聡聦聱聴聂聼阈聿肄肏肐肕腋肙肜肟肧胛肫肬肭肰肴肵肸肼胊胍胏胑胔胗胙胝胠铨胤胦胩胬胭胯胰胲胴胹胻胼胾脇脘脝脞脡脣脤脥脧脰脲脳腆腊腌臜腍腒腓胨腜腠脶腥腧腬腯踝蹬镣腴腶蠕诽膂腽嗉膇膋膔腘膗膙膟黐膣膦膫膰膴膵膷脍臃臄臇臈臌臐臑臓膘臖臙臛臝臞臧蓐诩臽臾臿舀舁鳑鲏舋舎舔舗馆舝舠舡舢舨舭舲舳舴舸舺艁艄艅艉艋艑艕艖艗艘艚艜艟艣舣艨艩舻艬艭荏艴艳艸艹艻艿芃芄芊萰陂藭芏芔芘芚蕙芟芣芤茉芧芨芩芪芮芰鲢芴芷芸荛豢芼芿苄苒苘苙苜蓿苠苡苣荬苤苎苪镑苶苹苺苻苾茀茁范蠡萣茆茇茈茌茍茖茞茠茢茥茦菰茭茯茳藨茷藘茼荁荄荅荇荈菅蜢鸮荍荑荘豆荵荸荠莆莒莔莕莘莙莚莛莜莝莦莨菪莩莪莭莰莿菀菆菉菎菏菐菑菓菔芲菘菝菡菢菣菥蓂菧菫毂蓥菶菷菹醢菺菻菼菾萅萆苌萋萏萐萑萜萩萱萴莴扁萻葇葍葎葑荭葖葙葠葥苇葧葭药葳葴葶葸葹葽蒄蒎莼茏薹莅蒟蒻蒢蒦蒨蒭藁蒯蒱鉾蒴蒹蒺蒽荪蓁蓆蓇蓊蓌蓍蓏蓓蓖蓧蓪蓫荜跣藕苁蓰蓱莼蓷蓺蓼蔀蔂蔃蔆蔇蔉蔊蔋蔌蔎蔕蔘蔙蒌蔟锷蒋雯茑蔯蔳麻蔵蔸蔾荨蒇蕋蕍荞蕐蕑芸莸蕖蕗蕝蕞蕠蕡蒉蕣蕤蕨蕳蓣蕸蕺蕻薀薁薃薅薆荟薉芗薏薐蔷薖薘剃谔钗薜薠薢薤薧薨薫薬薳薶薷薸薽薾薿藄藇藋荩藐藙藚藟藦藳藴苈藷藾蘀蘁蕲苹蘗蘘蘝蘤蘧蘩蘸蘼虀虆虍蟠虒虓虖虡虣虥虩虬虰蛵蛇虷鳟虺虼蚆蚈蚋蚓蚔蚖蚘蚜蚡蚣蚧蚨蚩蚪蚯蚰蜒蚱蚳蚶蚹蚺蚻蚿蛀蛁蛄蛅蝮蛌蛍蛐蟮蛑蛓蛔蛘蛚蛜蛡蛣蜊蛩蛱蜕螫蜅蚬蜈蝣蜋蜍蜎蜑蠊蜛饯蜞蜣蜨蜩蜮蜱蜷蜺蜾蜿蝀蝃蝋蝌蝍蝎蝏蝗蝘蝙蝝鲼蝡蝤蝥猿蝰虻蝲蝴蝻螃蠏蛳螉螋螒螓螗螘螙螚蟥螟螣螥螬螭䗖螾螀蟀蟅蝈蟊蟋蟑蟓蟛蟜蟟蟢虮蟨蟪蟭蛲蟳蛏蟷蟺蟿蠁蠂蠃虿蠋蛴蠓蚝蠗蠙蠚蠛蠜蠧蟏蠩蜂蠮蠰蠲蠵蠸蠼蠽衁衄衄衇衈衉衋衎衒同衖胡衞裳钩衭衲衵衹衺衿袈裟袗袚袟袢袪袮袲袴袷袺袼褙袽裀裉袅裋夹裍裎裒裛裯裱裲裴裾褀褂褉褊裈褎褐褒褓褔褕袆褚褡褢褦褧褪褫袅褯褰褱裆褛褽褾襁褒襆裥襉襋襌襏襚襛襜裣襞襡襢褴襦襫襬襭襮襕襶襼襽襾覂覃覅霸覉覊覌覗觇覚覜觍觎覧覩觊觏覰観觌觔觕觖觜觽觝觡酲觩觫觭觱觳觯觷觼觾觿言赅讣訇訏訑訒诂讬訧訬訳訹证訾詀詅诋毁詈詊讵詑诒诐詗诎察詨诜詶詸詹詻诙诖誂誃诔锄诓誋诳诶悖誙诮诰誧説読誯谇訚谄谆諆諌诤诹诼諕谂谀諝谝諟喧谥諴諵谌谖誊謆謇歌謍謏謑谡谥謡謦謪谪讴謷謼谩哗譅譆譈譊讹譒撰谮鑫譞噪譩谵譬譱譲谴譸譹谫讅讆詟䜩雠讐谗谶讙谠讟谽豁豉豇岂豊豋豌豏豔豞豖豗豜豝豣豦豨豭豱豳豵豶豷豺豻貅貆狸猊貔貘䝙貜貤餍贳餸贶贲赂賏赊赇赒賝赓赕賨赍斗賮賵賸赚赙赜赟贉赆赑贕赝赬赭赱赳迄趁趂趄趐趑趒趔趡趦趫趮趯趱趴趵趷趹趺趿跁跂跅跆踬跄跐跕跖跗跙跛跦跧跩跫跬跮跱跲跴跺跼跽踅踆踈踉踊踒踖踘踜踟躇蹰踠踡踣踤踥踦踧跷踫踮逾踱踊踶踹踺踼踽躞蹁蹂躏蹎蹐蹓蹔跸蹚蹜蹝迹蹠蹡蹢跶蹧蹩蹪蹯鞠蹽躃躄躅踌跻躐踯跞躘躙躗躝躠蹑躜躧躩躭躰躬躶軃軆辊軏轫軘軜軝腭転軥軨軭軱轱辘軷轵轺軽軿輀輂辇辂辁輈挽輗辄辎辋輠輤輬輭輮辏輴輵輶輹輼辗辒轇轏轑轒辚轕轖轗轘轙轝轞轹轳罪辣辞辵辶辺込辿迅迋迍麿迓迣迤逦迥迨迮迸迺迻迿逄逅逌逍逑逓迳逖逡逭逯逴逶逹遄遅侦遘遛遝遢遨遫遯遰遴绕遹遻邂邅邉邋邎邕邗邘邛邠邢邧邨邯郸邰邲邳邴邶邷邽邾邿郃郄郇郈郔郕郗郙郚郜郝郞郏郠郢郪郫郯郰郲郳郴郷郹郾郿鄀鄄郓鄇鄈鄋鄍鄎鄏鄐鄑邹邬鄕郧鄗鄘鄚鄜鄞鄠鄢鄣鄤鄦鄩鄫鄬鄮鄯鄱郐鄷鄹邝鄻鄾鄿酃酅酆酇郦酊酋酎酏酐酣酔酕醄酖酗酞酡酢酤酩酴酹酺醁醅醆醊醍醐醑醓醖醝酝醡醤醨醪醭醯醰酦醲醴醵醸醹醼醽醾釂酾酽釆釈鲈镏阊钆钇钌钯钋鼢鼹钐钏釪釬釭釱钍釸钕钫鈃钭鈆鈇钚鈊鈌钤钣鈒鈤钬钪鈬铌铈钶铛钹铍钸钿鉄鉆铊铇鉌铋鉏铂钷铆钵鉥钲鉨钼钽鉱鉲鉶铰铒鉼铪銍銎铣銕镂铫铦铑铷銤铱铟銧铥铕铯銭銰焊銶锑锉汞鋂锒鋆鋈鋊铤鋍铗鋐鋑鋕鋘鋙锊锓锔锇铓鋭铖锆锂铽鋳鋹鋺鉴镚钎錀锞锖锫锩錍铔锕錔锱铮锛錞锬锜錤錩錬録铼錼锝钔锴鍉镀鍏鍐铡鍚锻锽锸锲锘鍫鍭鍱鍴锶鍹锗针锺锿镅鎉鎋鎌鎍鎏鎒鎓鎗镉鎚鎞镃鎤铩锼鎭鎯镒镍鎴镓��鎹镎镟鏊镆镠镝鏖铿锵鏚镗镘镛鏠鏦錾镤鏸镪鏻鏽鏾铙鐄鐇鐏铹镦镡鐗馗镫镢镨鐡锎镄鐩镌鐬鐱镭鐶鐻鐽镱鑀鑅镔鑐鑕鑚鑛鑢鑤镥鑪镧鑯鑱鑴鑵镊镢钃镻闫闬闶闳閒闵閗閟阂関合閤哄阆閲阉閺阎阏阍阌暗闉阕阗闑闒闿闘闚阚闟闠闤闼阞阢阤阨阬阯阹阼阽陁陑陔陛陜陡陥陬骘陴険陼陾阴隃隈隒隗隞隠隣隤隩隮隰颧隳隷隹雂雈雉雊雎雑雒雗雘雚雝雟雩雰雱驿霂霅霈霊沾霒霓霙霝霢霣霤霨霩霪霫霮靁叇叆靑靓靣腼靪靮靰靳靷靸靺靼靿鞀鞃鞄鞍鞗鞙鞚鞝鞞鞡鞣鞨鞫鞬鞮鞶鞹鞾鞑韅鞯驮韍韎韔韖韘韝韫韡韣韭韭韱韹韺頀刮頄顸顼頍颀颃颁頖頞頠頫頬颅頯頲颕頼悴顋顑颙颛颜顕顚顜颟顣颥颞飐飑台飓颸飏飖颽颾颿飀飂飚飌翻飡飣饲飥饨饫飮飧飶餀餂饸饹餇餈饽哺馂餖餗餚馄馃餟餠餤餧餩餪餫糊餮糇餲饧馎糕饩馈馊馌馒饇馑馓膳饎饐饘饟馕馘馥馝馡馣骝骡馵馹駃駄駅駆駉駋驽駓驵駗骀驸駜骂骈駪駬骃駴骎駹駽駾騂騄骓騆騉騋骒骐麟騑騒験騕骛騠騢騣騤騧骧騵驺骟騺蓦骖骠骢驆驈骅驌骁驎骣驒驔驖驙驦驩驫骺鲠骫骭肮骱骴骶骷髅骾髁髂髄髆膀髇髑髌髋髙髝髞髟髡髣髧髪髫髭髯髲髳髹髺髽髾鬁鬃鬅鬈鬋鬎鬏鬐鬑鬒鬖鬗鬘鬙鬠鬣斗鬫鬬阄鬯鬰鬲鬵鬷魆魈魊魋魍魉魑魖鳔魛魟魣魦魨魬鲂魵魸鮀鲅鮆鲧鲇鲍鲋鮓鲒鲕鮟鱇鮠鮦鮨鲔鲑鮶鮸鮿鲧鯄鯆鲩鯈鲻鯕鲭鲞鯙鯠鲲鯥鲰鲶鳀鯸鳊鲗䲠鹣鳇鰋鳄鳆鰕鰛鰜鲥鰤鳏鰦鳎鳐鳁鳓鰶鲦鲡鰼鰽鱀鱄鳙鱆鳕鱎鱐鳝鳝鳜鲟鲎鱠鳣鱨鲚鱮鱲鱵鱻鲅鳦凫鳯鳲鳷鳻鴂鴃鴄鸩鴈鴎鸰鴔鴗鸳鸯鸲鹆鸱鴠鴢鸪鴥鸸鹋鴳鸻鴷鴽鵀鵁鸺鹁鵖鵙鹈鹕鹅鵟鵩鹌鵫鵵鵷鵻鹍鶂鶊鶏鶒鹙鶗鶡鶤鶦鶬鶱鹟鶵鶸鶹鹡鶿鹚鷁鷃鷄鷇䴘䴘鷊鷏鹧鷕鹥鸷鷞鷟鸶鹪鹩鷩鷫鷭鹇鹇鸴鷾䴙鸂鸇䴙鸏鸑鸒鸓鸬鹳鸜鹂鹸咸鹾麀麂麃麄麇麋麌麐麑麒麚麛麝麤麸面麫麮麯麰麺麾黁黈黉黢黒黓黕黙黝黟黥黦黧黮黰黱黪黶黹黻黼黾鼋鼂鼃鼅鼈鼍鼏鼐鼒冬鼖鼙鼚鼛鼡鼩鼱鼪鼫鼯鼷鼽齁齆齇齈齉齌赍齑龀齕齗龅齚龇齞龃龉龆齢出齧齩齮齯齰齱齵齾厐龑龒龚龖龘龝龡龢龤'
18
+
19
+ traditional_characters = '制咖片型超聲盤鑒定仔點他命書歌粉巾字帳恤手指記憶棒形轉彎溝光○〇㐄㐅㐆㐌㐖毒㐜㐡㐤㐰㐺㑇㑳㒳㒸㔾㗂㗎㝵㞎㞙㞞㠯㢲㢴㤅㥁㥯㨗㫺㬎㮎㮚㮸㲋㲱㲾㳮㵎㵪㶸㷖㷭㹢㹴犬㺢狓㺵㼝㽮㿝䍃䔢䖟䖸䗈䗥䗪䝓䠶䥯䦉䯝䰾魚䲔䳗䳘䵹鼄䶑一對應映射丁不識下兒子做二休世丘之貉並中台原則串為甚謂乾淨了百事無成八變五十些人得道雞升天代如併來去個國政策勁幽靈在歐洲遊蕩接樣蘿蔔坑側化傳價元論醇共再准刀兩斷切分耕耘收穫錢貨物向看舊就緒險刻千金動勞永逸匙零夜半卡通回復返影蹤反常態口咬氣句話同吐快吹周味呼諾嗚品紅鍋哄而散起唱和問三知生熟團漆黑火糟堆場空塊麵塌糊塗塵染壁廂夔已足多情露水大早到晚夫妻當關萬莫開失古恨套所料既往孔見提師要家主審寸陰難買鬥牛小撮部陣局展身層巴掌帆風順席地帶過年計於春頭載四季期被蛇怕井繩度願式份彈頃深前律徑心意念差愁孤行俱全房廳交遮打技長把抓死拿眼淚鼻涕鑰鎖折段抿拍即合掃排掬揮撥擁上入擊洞擲攬改故轍敗文值名斑方面旁族日秋餐隔雅里終父旦時晌會霎間晃暴寒曝更月望垠際朝夕本正經利杯羹東西板枝獨秀根筋桿進條龍服務概模次函數又性程總付步腳印趨登毛拔呵氧氮碳決雌雄波未平派謊言流清楚白準溜煙潭有獲聞是處降琴鶴甲病發可拾沙目然瞭直以相眨穿睹瞥瞬矢的解石鳥神教秉虔誠秘種窩蜂窮竅笑置筆苟勾銷抹殺煞等獎箍節吃箭仇雙鵰詩籌籮筐系列紙級士官統絲毫掛維網盡線微吭響股腦胎脈承腔臂力致效資源址器舉功投般說講規貿易葉障著慎滿皆輸號木電池衣傾鐘高低視仁覺醒覽遺角銀幣觸潰九鼎蔽抄出駟馬追重語破貧洗貫走路安蹴至幾蹶振躍役膽汗較輩輪辭贊退六連遍遞邊針血錘音錯門思閃真倒項栽霧類保護川先驚乍體鬨鱗爪鳴滴泡鄰域黨專鼓作齊炒丑烯亥克內酯冬加奴卯肝炎基尺梁街褲鎬客寵庭巳汝昌烷玲磊糖肇酉醛啷青縣韙良香骨鯛丂七集河市弦喜嘴張舌堵區工業姊妹星架構巧彩扭歪拼湊餘熱曜武州爺浮屠美鄉老階樹葷素碎落能魄鰓鰻珠丄丅丆万俟丈尚摸母娘量管群亞虎必我堂令申件裝伏位博俠義界表女墟臺戲臭皮匠勝諸葛亮賽頂倍催請運算包立叉戟離疫苗土史志演圍揭瓦曬夷姑婆帝村寶爛尖杉鹼屜桌山岔島由紀峽壩庫鎮廢從德後拗湯治旬食明昧曹朋友框欄極權冪曲歸依貓民氟硼氯磷鐵江侗自旅法司洋浦梅園溫暖灣焦班幸用田略番疊皇炮捶硝苯酸腺苷稜草鏡穗跳遠索錦綱聚氰胺聯店胚膲愛色堇紫羅蘭芝茶飯菱雲蟲藏藩亂叛蘇親債凳學座恐戀柱測肌腹衩錐係貂企烏跪叩軍車農題迭都甘油屯奏鍵短阿姨陪姐隻顧茅廬槽駕魂鮮鹿頁其菜單乘任供勢午齒漢組織吊調瀉唇坡城報墳外夸將尉建築岸崗公床揚新劍昇杭林栗校樓標款汽社浣海商館劇院鋼華港機械廣媒環球融第醫科證券綜財樂育游漲猶嶺疏癮瞼確兵領導繳肢膛船艾瑟爾蒼蔡虞傚衫覆訪訴課諭議軌述野鉤限敵鞋頜頷顎饒首齦站例修凡劃垂屆屬崽頦廚拜挫擺放旋削棋榻檻禮沉注滑營獄畫确儀聘花葬詔員跌轄週達酒錨閘陷陸雨雪飛威丌于丹久乏予理評產亢卑亦乎舞己悲矩圓詞害誌但住佞佳便俗信票案幅翁倦倫假偏倚斜虧鬼敲停備傷脾胃僅此像儉匱免宜穴焉戴兼容許凍伯仲負彼晝皂軒輊實刊划顛衛戰哥比省非好黃飾別拘束掩奶睬選擇搖擾煩苦枚寫協厭及格受歡迎約只估侵犯割狀告或缺抗拒挽撤救藥喻磨滅端倪少逆逾越避靠適吉譽吝玉含延咎歹聽啻淵善謀均勻堪忍夠太惹妙妥妨孕症孝術室完納推冠積宣疑辯慄碴稱屈撓屑干涉衡待很忙惡忿怎麼怠急恥恭息悅惑惜惟想愉愧怍慌憤啟懂懈懷材才緊招認扣抵拉捨也罷插揣冒搭撞南牆擴核支攻敢雷攀敬裡嗎需景智暇曾罪遇朽枉止況競爭辱求癒渝溶濟左右袒困補爽特寂寞示弱找謝畏強疾徐痛癢冤符眠睦瞅董何厚云措活疲羞者輕玻璃祥兆禁移稂莠穩佛換答簡結果盟絕縷途給談否羈翼耐肖脛毋寧興舒若菲萊痕跡窠臼虛衰臉兔撒鷹棺範該詳諱抬泰讓鬚眉象眾貲賬費灰賴奇慮訓輟辨菽麥辛近送透逞徒速續逮捕遂遑違遜斧鉞艱醉鏽隨觀棄顯飽脂肪使丏丐幫丒且慢末丕替桃宗王尊涼爵各圖屋脊糧署錄壇吾祿職胄襲君廈丗北壑桐疹損逢陵鷸丙寅戌氨腈唑綸辰酮脫氫酶醚丞丟現掉紗帽弄扯砲碗丠両丣坐存激肩臻蒂蓮悖序驅丨丩丫挺杈髻鬟細介俄伊犁京尼布訂普渡央委監察檢查劑圈設警隊斯督剩震境航舶革防托播促質版蠑螈鋒研藝歷殘消頻譜精密製造陲郵候埔堅壓壢凹匯執府究邦俘攝寮彬狼嶽肺腫庸英訊診埋粒胞括控碼韓暑槍樞砥澳哇牟壽甸鑽探篇簽綴縫繼耳肯照婦埃懸璧軸櫃檯辣擱淺邪跑纖阮陽私囊魔丮丰姿采丱燒丳丵丶丷丸參寨朗桂瑞砂衷霞貌鳳僕艦因嫌宰峰幹絡牌持旨祭禱簿編罰賓辦丼丿乀乂乃乄仰慕盛曠留考驗闊乆乇么醜麼乊湖燃乑乒乓乕乖僻忤戾离謬迕乗危肥劫除隙浪婿乙炔腸酰吡咯鹽乚乛乜嘢卿玄宮尾狐龜塔嶷兄弟泉章霄釘耙乞扎哀憐恕討乢乣乤乥乧乨乩童乪乫乭乳暈汁液瑤漿牙癌突竇罩腐膠豬酪蛋糕菌瘤乴乵乶乷乸乹乺乼乾俸冰嘉噦嚎坤媽屍壘旱枯涸俐渴潮澀煸豆燥爹瘦癟癬瞪袋脆薑貝隆餾乿亀亁叫咕攘扔搞男砸竄蓬麻亃亄亅卻亇遲典今臨繁累卵奉婚聰躬巨與遷添裂副宿歲怪噁尕崙愣杆硅硫鈦鈾錳芑雜異鈉砷胂磺琥珀艙棍簧胡茬盜浩盆販郎腿亍洪亐互欠助勉惠操斥諉繫戶譯亓墓碑刑鈴卅渠繽紛斗米旗憲釩燈徽瘟祖拳福穀豐臟腑綁肉醃苓蘊橋鋪霸顏鬧判噴岡底蛙陘礦亖亙亜罕們娜桑那努哈喀弗烈曼松森杜氏盃奧琛敦戊穆聖裔彙薛孫亟亡佚虜羊牢奮釋卷卸契媾感額睫纏誼趾塞擠紐阻還配馳莊亨洛祚亪享津滬畿郊慈菴枇杷膏亭閣鋥麗亳亶亹誅初責翻瘋偶傑叢稠妖拖寰居吸授慧蝸吞壯魅狗矛盾益渣患憂稀描猿夢暫涯畜禍緣沸搜引擎臣橫紜誰混援蒸獸獅稅剖亻亼亽亾什獻剎邡麽仂仃仄仆富怨仈仉畢昔晨殼紹仍仏仒仕宦仗欺恃腰嘆歎炬梓訖施仙后瓊逝仚仝仞仟悔仡佬償填泊拓撲簇羔購頓欽佩髮棻閫馭養億儆尤藉幀賑凌敘帖李柔剛沃眥睚戒訛取饗讀仨仫仮著泳臥躺韶夏裁仳仵唯賢憑釣誕仿似宋彿諷伀碩盼鵝伄儅伈伉儷柯始娃邁戈坦堡帕茨薩廟瑪莉莎藤霍姆伋伍奢胥廷芳豪伎倆侍汛勒希羲雛伐憩整謨閑閒伕伙伴頤伜伝伢叔恆茲恩翰伱伲侶伶俜悧鼬伸懶縮喇叭伹伺伻伽倻輻伾佀佃佇佈喬妮墨佉盧佌貸劣廉昂檔濃矮傘窪緩耗胸谷迷擋率齲宅沫舍療佐貳佑佔優據鏵嘗呢須魯曉佗佘余坪寺瓜銃僧蒙芒陀龕哼嘔坊姦孽弊揖祟繭縛誓賊佝僂瞀佟你奪趕佡佢佣佤佧賈佪佫佯佰佱潔績釀餚佴捲佶佷佸佹佺佻佼佽佾具喚窘壞娛怒慨硬習慣聾膨脹蔓駭貴痺侀侁侂侃侄侅鴻燕侇侈糜靡侉侌妾侏儒倉鼠侐侑侔侖侘侚鏈侜偎傍鈷循柳葫蘆附価侮罵蔑侯岩截蝕侷貼壺嬛宴捷攜桶箋酌俁狹膝狄俅俉俊俏俎俑俓俔諺俚俛黎健呈固墒增守康箱濕祐鏢鑣槓盒靖膜齡俞豹獵噪孚封札筒託衍鴿剪撰稿煉廠禊練繕葺俯瞰撐衝俲俳俴俵俶俷俺俻俾倀倂倅儲卒惶敷猝逃頡蓄崇隱倌倏忽刺蠟燭噍嚼坍扁抽斃蔥楣灌灶糞背藪賣賠閉霉騰倓倔倖倘倜儻倝借箸挹澆閱倡狂倢倣値倥傯倨��倩匡嗣沖柝珍倬倭寇猩倮倶倷倹勤讚偁偃充偽吏嗓寐惺扮拱芫茜藉虢鈔偈偉晶偌宕距析濾殿疼癱註頗偓偕鴨歇滯偝偟偢忘怡旺偨偩偪偫偭偯偰偱偲偵緝蹄偷減惰漏窺竊偸偺迹傀儡傅傈僳傌籬傎奎琳迪叟芭傒傔傕傖悉荒傜傞傢傣芽逼傭婢傮睨寄檄誦謠頌傴擔辜弓慘蒿悼疤傺傻屄臆巢洩篋羨蓋軋頹傿儸僄僇僉僊働僎僑僔僖僚僝僞僣僤僥僦猴僨僩僬僭僮僯僰僱僵殖籤靜僾僿征隴儁儂儃儇儈朴薄儊儋儌儍儐儓儔儕儗儘儜儞儤儦儩汰哉寡渥裕酷儭儱罐儳儵儹儺儼儽兀臬臲鷲允勛勳宙宵帥憝彞諧嫂鬩暢沛溢盈飢赫兇悍狠猛頑愚妣斬秦遣鞭耀敏榮槃澤爆碟磁禿纜輝霽鹵朵婁孜烽醬勃汀箕裘鉗耶懞蕾徹兌軟遭黜兎児韻媳爸兕觥兗兙兛兜售鍪肚兝兞兟兡兢兣樽殮涅睡稟籍贅泌啡肽奸幕涵澇熵疚眷稃襯訌赴煥椒殲植跏沒試誤猜棲窗肋袖頰兪卦撇鬍岐廓轎疸楓茴瓏廁秩募勺噸寓斤曆畝迫筷釐最淫螺韜兮寬匪篩襄贏軛複兲詐刃堰戎痞蟻餉它冀鑄冂冃円冇冉冊嫁厲礪竭醮冏牧冑冓冔冕冖冗冘冞冢窄抑誣冥冫烘菇蟄冷凝坨橇淇淋炭餅磚磧窖醋雕雹霜冱冶爐艷嘲峻灘淡漠煖颼飲冼冽凃凄愴梗凅凇凈凊凋敝濛凔凜遵汞脢凞几凢処凰凱凵凶焰凸摺刷紋預喪嘍奔巡榜殯芙蓉租籠輯鞘萃凼鋸鑊刁蠻刂娩崩批拆攤掰櫱驟歧顆秒袂贓勿囑忌磋琢膚刈羽刎訟戮舂槳艇刓刖霹靂刜創犢刡恙墅幟筵緻刦刧刨昏默攸尿慾薰潤薰圭刪刮痧鏟刱刲刳刴刵踏磅戳柏槐繡芹莧蝟舟銘鵠鶩刼剁剃辮剄剉履鉛剋剌姻咽哨廊掠桅沿召瞻翅趙卜渺茫郭剒剔剕瀝剚愎毅訥纔剜剝啄採剞剟剡剣剤綵剮腎駛黏剰袍剴紊剷剸剺剽剿劁劂劄劈啪柴扳啦劉奭姥夼昫涓熙禪禹錫翔雁鶚劊劌弩柄蜻蛉劒劓劖劘劙瀾簣賞磯釜晉甜薪逐劦熔紂虐赤囚劬劭労劵効劻劼劾峭艮勅勇勵勍勐臘脖龐漫飼盪粥輒勖勗勘驕餒碌泮雇捐竹騎殊阱勣樸懇謹勦勧勩勯勰勱勲勷勸懲慰誡諫勹芡踐闌匁庇拯粟紮袱裹餃匆遽匈匉匊匋匍匐莖匏匕妝痰膿蛹齋苑烤蹈塘羌熊閥螳螂疆碚竿緯荷茵邙魏匚匜匝匟扶稷匣匭攏匸匹耦匽匾匿卂叮瘡禧軫堤棚迢鈞鍊卄卆遐卉瓷盲瓶噹胱腱裸卋卌卍卐怯污賤鄙齷齪陋卓溪唐梯漁陳棗泥漳潯澗梨芬譙贍轅迦鄭単驢弈洽鰲卛占筮卝卞卟吩啉屎翠厄卣卨卪卬卮榫襖璽綬鈕蚤懼殆篤聳卲帘帙繞卹卼卽厂厎厓厔厖厗奚厘厙厜厝諒厠厤厥厪膩孢厮厰厳厴厹厺粕垢蕪菁厼厾叁悟茸薯叄吵笄悌哺譏坫壟弧芯杠潛嬰芻袁詰貪諜煽饋駁収岳締災賄騙叚叡吻攔蘑蜜訣燧玩硯箏椎藺銅逗驪另覓叨嘮謁杵姓喊嚷囂咚嚀塑尋惱憎擦祇泣滲蝠叱吒咄咤喝籀黛舵舷叵叶鐸懿昭穰苴遼叻叼吁塹嫖賭瞧爬衆抒吅吆夥巹橡滌抱縱摩郡唁墜扇籃膀襪頸吋愾諮酬哭妓媛暗錶韁邇妃羿絮蕃渾拐葵暮隅吔吖啶嗪戚吜嗇噬嚥吟哦詠吠吧唧嗒咐吪雋咀徵燐苞茹鈣哧吮吰吱嘎吲哚吳棟嬌窟孟簫忠晗淞闔閭趼宇吶睛噓拂捧疵熄竽笛糠吼吽呀呂韋矇呃呆笨呇貢呉罄呋喃呎呏呔呠呡癡呣呤呦呧瑛眩扒晬淑姬瑜璇鵑呪呫嗶嚅囁呬呯呰呱呲咧噌鈍呴呶呷呸呺呻哱咻嘯嚕籲坎坷邏呿咁咂咆哮咇咈咋蟹煦珅藹咍咑咒詛咔噠嚓咾噥哩喱咗咠咡咢咣咥咦咨嗟詢咩咪咫嚙齧咭咮咱咲咳嗆嗽咴咷咸咹咺咼喉咿婉慟憫賦矜綠茗藍哂搶瞞哆嗦囉噻啾濱彗哋哌哎唷喲哏哐哞哢哤哪裏哫啼喘哰哲萎蚌哳哶哽哿唄唅唆唈唉唎唏嘩堯棣殤璜睿肅唔睇唕唚唞唣喳唪唬唰喏唲唳唵嘛唶唸唹唻唼唾唿啁啃鸚鵡啅埠棧榷祺舖鞅飆啊啍啎啐啓啕啖啗啜啞祈啢啣啤啥啫啱啲啵啺饑啽噶崑沁喁喂喆裙喈嚨喋喌喎喑喒喓喔粗喙幛慶滋鵲喟喣喤喥喦喧騷喨喩梆喫葡萄喭駝挑嚇碰樅瓣純皰藻趟鉻喵営喹喺喼喿嗀嗃嗄嗅嗈嗉嗊嗍嗐嗑嗔詬嗕嗖嗙嗛嗜痂癖嗝嗡嗤嗥嗨嗩嗬嗯嗰嗲嗵嘰嗷嗹嗾嗿嘀嘁嘂嘅惋嘈峪禾蔭嘊嘌嘏嘐嘒嘓嘖嘚嘜嘞嘟囔嘣嘥嘦嘧嘬嘭這謔嚴敞饞鬆嘵嘶嘷嘸蝦嘹嘻嘽嘿噀噂噅噇噉噎噏噔噗噘噙噚噝噞噢噤蟬皿噩噫噭噯噱噲噳嚏涌灑欲巫霏噷噼嚃嚄嚆抖嚌嚐嚔囌嚚嚜嚞嚟嚦嚬嚭嚮嚯嚲嚳飭按竣苛嚵嚶囀囅囈膪謙囍囒囓囗囘蕭酚飄濺諦囝溯眸紇鑾鶻囟殉囡団囤囥囧囨囪囫圇囬囮囯囲図囶囷囸囹圄圉擬囻囿圀圂圃圊粹蠹赦圌墾圏滾鯡鑿枘圕圛圜圞坯埂壤骸炕祠窯豚紳魠鯪鱉圧握圩圪垯圬圮圯炸岬幔毯祇窨菩溉圳圴圻圾坂坆沾坋坌舛壈昆墊墩椅坒坓坩堝坭坰坱坳坴坵坻坼楊掙涎簾垃垈垌垍垓垔垕垗垚垛垝垣垞垟垤垧垮垵垺垾垿埀畔埄埆埇埈埌殃隍埏埒埕埗埜埡埤埦埧埭埯埰埲埳埴埵埶紼埸培怖樁礎輔埼埽堀訶姪廡堃堄摧磐貞韌砌堈堉堊堋堌堍堎堖堙堞堠礁堧堨輿堭堮蜓摘堲堳堽堿塁塄塈煤塋棵塍塏塒塓綢���鴉沽虱塙塚塝繆塡塢塤塥塩塬塱塲蟎塼塽塾塿墀墁墈墉墐夯増毀墝墠墦漬缽墫墬墮墰墺墻櫥壅壆壊壌壎壒榨蒜壔壕壖壙壚壜壝壠壡壬壭壱売壴壹壻壼寢壿夂夅夆変夊夌漱邑夓腕泄甥禦骼夗夘夙袞瑙妊娠醣梟珊鶯鷺戧幻魘夤蹀祕擂鶇姚宛閨嶼庾撻拇賛蛤裨菠氅漓撈湄蚊霆鯊箐篆篷荊肆舅荔鮃巷慚骰辟邱鎔鐮阪漂燴鯢鰈鱷鴇臚鵬妒峨譚枰晏璣癸祝秤竺牡籟恢罡螻蠍賜絨御梭夬夭砣榆怙枕夶夾餡奄崛葩譎奈賀祀贈奌奐奓奕訢詝奘奜奠奡奣陶奨奩魁奫奬奰媧孩貶隸酥宄狡猾她奼嫣妁氈荼皋膻蠅嬪妄妍嫉媚嬈妗趣妚妞妤礙妬婭妯娌妲妳妵妺姁姅姉姍姒姘姙姜姝姞姣姤姧姫姮娥姱姸姺姽婀娀誘懾脅娉婷娑娓娟娣娭娯娵娶娸娼婊婐婕婞婤婥谿孺婧婪婬婹婺婼婽媁媄媊媕媞媟媠媢媬媮媯媲媵媸媺媻媼眯媿嫄嫈嫋嫏嫕嫗嫘嫚嫜嫠嫡嫦嫩嫪毐嫫嫬嫰嫵嫺嫻嫽嫿嬀嬃嬅嬉耍嬋痴豔嬔嬖嬗嬙嬝嬡嬢嬤嬦嬬嬭幼嬲嬴嬸嬹嬾嬿孀孃孅孌孏曰癲屏孑孓雀孖斟簍謎摺孛矻鳩崮軻祜鸞孥邈毓棠臏孬孭孰孱孳孵泛罔銜孻孿宀宁宂拙株薇掣撫琪瓿榴謐彌宊濂祁瑕宍宏碁宓邸讞実潢町宥宧宨宬徵崎駿掖闕臊煮禽蠶宸豫寀寁寥寃簷庶寎暄磣寔寖寘寙寛寠苫寤肘洱濫蒗陝覈寪弘綽螽寳擅疙瘩晷対檐専尃尅贖絀繚疇釁尌峙醌襟痲碧屁昊槌淘恵瀑牝畑莓缸羚覷蔻髒躁尒尓銳尗尙尜尟尢尥尨尪尬尭尰擒尲尶尷尸尹潽蠖蛾尻釦梢蚴鰭脬蹲屇屌蚵屐屓挪屖屘屙屛屝屢屣巒嶂巖舄屧屨屩屪屭屮戍駐鉀崖嵛巔旮旯楂欖櫸芋茱萸靛麓屴屹屺屼岀岊岌岍阜岑彭鞏岒岝岢嵐岣岧岨岫岱岵岷峁峇峋峒峓峞峠嵋峩峯峱峴峹峿崀崁崆禎崋崌崍嶇崐崒崔嵬巍螢顥崚崞崟崠崢巆崤崦崧殂崬崱崳崴崶崿嵂嵇嵊泗嵌嵎嵒嵓嵗嵙嵞嵡嵩嵫嵯嵴嵼嵾嶁嶃嶄晴嶋嶌嶒嶓嶔嶗嶙嶝嶞嶠嶡嶢嶧嶨嶭嶮嶰嶲嶴嶸巂巃巇巉巋巌巓巘巛滇芎巟巠弋迴巣巤炊擘蜥蟒蠱覡巰蜀彥淖杏茂甫楞巻巽幗巿帛斐鯽蕊帑帔帗帚琉汶帟帡帣帨帬帯帰帷帹暆幃幄幇幋幌幏幘幙幚幞幠幡幢幦幨幩幪幬幭幯幰遙蹉跎餘庚鑑幵幷稚邃庀庁広庄庈庉笠庋跋庖犧庠庤庥鯨庬庱庳庴庵馨衢庹庿廃廄廆廋廌廎廏廐廑廒廕廖廛廝搏鑼廞弛袤廥廧廨廩廱綿踵髓廸廹甌鄴廻廼廾廿躔弁皺弇弌弍弎弐弒弔詭憾薦弝弢弣弤弨弭弮弰弳霖繇燾斌旭溥騫弶弸弼弾彀彄彆纍糾彊彔彖彘彟彠陌彤貽彧繪虹彪炳彫蔚鷗彰癉彲彳彴彷彷徉徨彸彽踩斂旆徂徇徊渭畬鉉裼従筌徘徙徜徠膳甦萌漸徬徭醺徯徳徴潘徻徼忀瘁胖燎怦悸顫扉犀澎湃砰恍惚絞隘忉憚挨餓忐忑忒忖応忝忞耿忡忪忭忮忱忸怩忻悠懣怏遏怔怗怚怛怞懟黍訝怫怭懦怱怲怳怵惕怸怹恁恂恇恉恌恏恒恓恔恘恚恛恝恞恟恠恣恧眄恪恫恬澹恰恿悀悁悃悄悆悊悐悒晦悚悛悜悝悤您悩悪悮悰悱悽惻悳悴悵惘悶悻悾惄愫鍾蒐惆惇惌惎惏惓惔惙惛耄惝瘧濁惥惦惪惲惴惷惸拈愀愃愆愈愊愍愐愑愒愓愔愕愙氓蠢騃昵愜赧愨愬愮愯愷愼慁慂慅慆慇靄慉慊慍慝慥慪慫慬慱慳慴慵慷慼焚憀灼鬱憃憊憋憍眺捏軾憒憔憖憙憧憬憨憪憭憮憯憷憸憹憺懃懅懆邀懊懋懌懍懐懞懠懤懥懨懫懮懰懱毖懵遁樑雍懺懽戁戄戇戉戔戕戛戝戞戠戡戢戣戤戥戦戩戭戯轟戱披菊牖戸戹戺戻戼戽鍬扂楔扃扆扈扊杖牽絹銬鐲賚扐摟攪烊盹瞌跟躉鑔靶鼾払扗玫腮扛扞扠扡扢盔押扤扦扱罾揄綏鞍郤窾扻扼扽抃抆抈抉抌抏瞎抔繯縊擻抜抝択抨摔歉躥牾抶抻搐泵菸拃拄拊髀拋拌脯拎拏拑擢秧沓曳攣迂拚拝拠拡拫拭拮踢拴拶拷攢拽掇芥橐簪摹疔挈瓢驥捺蹻挌挍挎挐揀挓挖掘浚挙揍聵挲挶挾挿捂捃捄捅捆捉捋胳膊揎捌捍捎軀蛛捗捘捙捜捥捩捫捭据捱捻捼捽掀掂掄臀膘掊掎掏掐笙掔掗掞棉芍掤搪闡掫掮掯揉掱掲掽掾揃揅揆搓揌諢揕揗揘揜揝揞揠揥揩揪揫櫫遒麈揰揲揵揶揸揹揺搆搉搊搋搌搎搔搕撼櫓搗搘搠搡搢搣搤搥搦搧搨搬楦褳訕赸搯搰搲搳搴搵搷搽搾搿摀摁摂摃摎摑摒摓跤摙摛摜摞摠摦睺羯摭摮摯摰摲摳摴摶摷摻摽撂撃撅稻撊撋撏鐧潑撕撙撚撝撟撢撣撦撧撩撬撱朔撳蚍蜉撾撿擀擄闖擉缶觚擐擕擖擗擡擣擤澡腚擧擨擩擫擭擯擰擷擸擼擽擿攃攄攆攉攥攐攓攖攙攛每攩攫轡澄攮攰攲攴軼攷砭訐攽碘敁敃敇敉敍敎筏敔敕敖閏誨敜煌敧敪敱敹敺敻敿斁衽斄牒縐謅斉斎斕鶉讕駮鱧斒筲斛斝斞斠斡斢斨斫斮晾沂潟穎絳邵斲斸釳於琅斾斿旀旂旃旄渦旌旎旐旒旓旖旛旝旟旡旣浴旰獺魃旴旹旻旼旽昀昃昄昇昉晰躲澈熹皎皓礬昑昕昜昝昞昡昤暉筍昦昨昰昱昳昴昶昺昻晁蹇隧蔬髦晄晅晒晛晜晞晟晡晢晤晥曦晩萘瑩顗晿暁暋暌暍暐暔暕煅暘暝暠暡曚暦暨暪朦朧暱暲殄馮暵暸暹暻暾曀曄曇曈曌曏曐曖曘曙曛曡曨曩駱曱甴肱曷牘禺錕曽滄耽朁朅朆杪栓誇竟粘絛朊膺朏朐朓朕朘朙瞄覲溘饔飧朠朢朣柵椆澱蝨朩朮朰朱炆璋鈺熾鹮朳槿朶朾朿杅杇杌隉欣釗湛漼楷瀍煜玟纓翱肈舜贄适逵杓杕杗杙荀蘅杝杞脩珓筊杰榔狍閦顰緬莞杲杳眇杴杶杸杻杼枋枌枒枓衾葄翹紓逋枙狸椏枟槁枲枳枴枵枷枸櫞枹枻柁柂柃柅柈柊柎某柑橘柒柘柙柚柜柞櫟柟柢柣柤柩柬柮柰柲橙柶柷柸柺査柿栃栄栒栔栘栝栟栢栩栫栭栱栲栳栴檀栵栻桀驁桁鎂桄桉桋桎梏椹葚桓桔桕桜桟桫欏桭桮桯桲桴桷桹湘溟梃梊梍梐潼梔梘梜梠梡梣梧梩梱梲梳梴梵梹棁棃櫻棐棑棕櫚簑繃蓑棖棘棜棨棩棪棫棬棯棰棱棳棸棹槨棼椀椄苕椈椊椋椌椐椑椓椗検椤椪椰椳椴椵椷椸椽椿楀楄楅篪楋楍楎楗楘楙楛楝楟楠楢楥楨楩楪楫楬楮楯楰楳楸楹楻楽榀榃榊榎槺榕榖榘榛狉莽榜笞榠榡榤榥榦榧榪榭榰榱槤霰榼榾榿槊閂槎槑槔槖様槜槢槥槧槪槭槮槱槲槻槼槾樆樊樏樑樕樗樘樛樟樠樧樨権樲樴樵猢猻樺樻罍樾樿橁橄橆橈笥龠橕橚橛輛橢橤橧豎膈跨橾橿檁檃檇檉檍檎檑檖檗檜檟檠檣檨檫檬檮檳檴檵檸櫂櫆櫌櫛櫜櫝櫡櫧櫨櫪櫬櫳櫹櫺茄櫽欀欂欃欐欑欒欙欞溴欨欬欱欵欶欷歔欸欹欻欼欿歁歃歆艎歈歊蒔蝶歓歕歘歙歛歜歟歠蹦詮鑲蹣跚陞陟歩歮歯歰歳歴璞歺瞑歾歿殀殈殍殑殗殜殙殛殞殢殣殥殪殫殭殰殳荃殷殸殹蛟殻殽謗毆毈毉餵毎毑蕈毗毘毚茛鄧毧毬毳毷毹毽毾毿氂氄氆靴氉氊氌氍氐聊氕氖気氘氙氚氛氜氝氡洶焊痙氤氳氥氦鋁鋅氪烴氬銨痤汪滸漉痘盂碾菖蒲蕹蛭螅氵氷氹氺氽燙氾氿渚汆汊汋汍汎汏汐汔汕褟汙汚汜蘺沼穢衊汧汨汩汭汲汳汴隄汾沄沅沆瀣沇沈葆浸淪湎溺痼痾沌沍沏沐沔沕沘浜畹礫沚沢沬沭沮沰沱灢沴沷籽沺烹濡洄泂肛泅泆湧肓泐泑泒泓泔泖泙泚泜泝泠漩饃濤粼濘蘚鰍泩泫泭泯銖泱泲洇洊涇琵琶荽薊箔洌洎洏洑潄濯洙洚洟洢洣洧洨洩痢滔洫洮洳洴洵洸洹洺洼洿淌蜚浄浉浙贛渫浠浡浤浥淼瀚浬浭翩萍浯浰蜃淀苔蛞蝓蜇螵蛸煲鯉浹浼浽溦涂涊涐涑涒涔滂涖涘涙涪涫涬涮涴涶涷涿淄淅淆淊淒黯淓淙漣淜淝淟淠淢淤淥淦淩猥藿褻淬淮淯淰淳詣淶紡淸淹燉癯綺渇済渉渋渓渕渙渟渢滓渤澥渧渨渮渰渲渶渼湅湉湋湍湑湓湔黔湜湝湞湟湢湣湩湫湮麟湱湲湴湼満溈溍溎溏溛舐漭溠溤溧馴溮溱溲溳溵溷溻溼溽溾滁滃滉滊滎滏稽滕滘滙滝滫滮羼耷滷滹滻煎漈漊漎繹漕漖漘漙漚漜漪漾漥漦漯漰漵漶漷濞潀潁潎潏潕潗潚潝潞潠潦祉瘍潲潵潷潸潺潾潿澁澂澃澉澌澍澐澒澔澙澠澣澦澧澨澫澬澮澰澴澶澼熏郁濆濇濈濉濊貊濔疣濜濠濩觴濬濮盥濰濲濼瀁瀅瀆瀋瀌瀏瀒瀔瀕瀘瀛瀟瀠瀡瀦瀧瀨瀬瀰瀲瀳瀵瀹瀺瀼灃灄灉灋灒灕灖灝灞灠灤灥灨灩灪蜴灮燼獴灴灸灺炁炅魷炗炘炙炤炫疽烙釺炯炰炱炲炴炷燬炻烀烋瘴鯧烓烔焙烜烝烳飪烺焃焄耆焌焐焓焗焜焞焠焢焮焯焱焼煁煃煆煇煊熠煍熬煐煒煕煗燻礆霾煚煝煟煠煢矸煨瑣煬萁煳煺煻熀熅熇熉羆熒穹熗熘熛熜稔諳爍熤熨熯熰眶螞熲熳熸熿燀燁燂燄盞燊燋燏燔隼燖燜燠燡燦燨燮燹燻燽燿爇爊爓爚爝爟爨蟾爯爰爲爻爿爿牀牁牂牄牋牎牏牓牕釉牚腩蒡虻牠雖蠣牣牤牮牯牲牳牴牷牸牼絆牿靬犂犄犆犇犉犍犎犒犖犗犛犟犠犨犩犪犮犰狳犴犵犺狁甩狃狆狎狒獾狘狙黠狨狩狫狴狷狺狻豕狽蜘猁猇猈猊猋猓猖獗猗猘猙獰獁猞猟獕猭猱猲猳猷猸猹猺玃獀獃獉獍獏獐獒獘獙獚獜獝獞獠獢獣獧鼇蹊獪獫獬豸獮獯鬻獳獷獼玀玁菟玅玆玈珉糝禛郅玍玎玓瓅玔玕玖玗玘玞玠玡玢玤玥玦玨瑰玭玳瑁玶玷玹玼珂珇珈瑚珌饈饌珔珖珙珛珞珡珣珥珧珩珪珮珶珷珺珽琀琁隕琊琇琖琚琠琤琦琨琫琬琭琮琯琰琱琲瑯琹琺琿瑀瑂瑄瑉瑋瑑瑔瑗瑢瑭瑱瑲瑳瑽瑾瑿璀璨璁璅璆璈璉璊璐璘璚璝璟璠璡璥璦璩璪璫璯璲璵璸璺璿瓀瓔瓖瓘瓚瓛臍瓞瓠瓤瓧瓩瓮瓰瓱瓴瓸瓻瓼甀甁甃甄甇甋甍甎甏甑甒甓甔甕甖甗飴蔗甙詫鉅粱盎銹糰甡褥産甪甬甭甮甯鎧甹甽甾甿畀畁畇畈畊畋畎畓畚畛畟鄂畤畦畧荻畯畳畵畷畸畽畾疃疉疋疍疎簞疐疒疕疘疝疢疥疧疳疶疿痁痄痊痌痍痏痐痒痔痗瘢痚痠痡痣痦痩痭痯痱痳痵痻痿瘀瘂瘃瘈瘉瘊瘌瘏瘐瘓瘕瘖瘙瘚瘛瘲瘜瘝瘞瘠瘥瘨瘭瘮瘯瘰癧瘳癘瘵瘸瘺瘻瘼癃癆癇癈癎癐癔癙癜癠癤癥癩蟆癪癭癰発踔紺蔫酵皙砬砒翎翳蘞鎢鑞皚鵯駒鱀粵褶皀皁莢皃鎛皈皌皐皒硃皕皖皘皜皝皞皤皦皨皪皫皭糙綻皴皸皻皽盅盋盌盍盚盝踞盦盩鞦韆盬盭眦睜瞤盯盱眙裰盵盻睞眂眅眈眊県眑眕眚眛眞眢眣眭眳眴眵眹瞓眽郛睃睅睆睊睍睎睏睒睖睙睟睠睢睥睪睪睯睽睾瞇瞈瞋瞍逛瞏瞕瞖瞘瞜瞟瞠瞢瞫瞭瞳瞵瞷瞹瞽闍瞿矓矉矍鑠矔矗矙矚矞矟矠矣矧矬矯矰矱硪碇磙��舫阡、矼矽礓砃砅砆砉砍砑砕砝砟砠砢砦砧砩砫砮砳艏砵砹砼硇硌硍硎硏硐硒硜硤硨磲茚鋇硭硻硾碃碉碏碣碓碔碞碡碪碫碬碭碯碲碸碻礡磈磉磎磑磔磕磖磛磟磠磡磤磥蹭磪磬磴磵磹磻磽礀礄礅礌礐礚礜礞礤礧礮礱礲礵礽礿祂祄祅祆禳祊祍祏祓祔祕祗祘祛祧祫祲祻祼餌臠錮禂禇禋禑禔禕隋禖禘禚禜禝禠禡禢禤禥禨禫禰禴禸稈秈秊闈颯秌秏秕笈蘵賃秠秣秪秫秬秭秷秸稊稌稍稑稗稙稛稞稬稭稲稹稼顙稾穂穄穇穈穉穋穌貯穏穜穟穠穡穣穤穧穨穭穮穵穸窿闃窀窂窅窆窈窕窊窋窌窒窓窔窞窣窬黷蹙窰窳窴窵窶窸窻竁竃竈竑竜竝竦竪篦篾笆鮫竾笉笊笎笏笐靨笓笤籙笪笫笭笮笰笱笲笳笵笸笻筀筅筇筈筎筑筘筠筤筥筦筧筩筭筯筰筱筳筴讌筸箂箇箊箎箑箒箘箙箛箜篌箝箠箬鏃箯箴箾篁篔簹篘篙篚篛篜篝篟篠篡篢篥篧篨篭篰篲篳篴篶篹篼簀簁簃簆簉簋簌簏簜簟簠簥簦簨簬簰簸簻籊籐籒籓籔籖籚籛籜籣籥籧籩籪籫籯芾麴籵籸籹籼粁粃粋粑粔糲粛粞粢粧粨粲粳粺粻粽闢粿糅糆糈糌糍糒糔萼糗蛆蹋糢糨糬糭糯糱糴糶糸糺紃蹼鰹黴紆紈絝紉閩襻紑紕紘錠鳶鷂紝紞紟紥紩紬紱紲紵紽紾紿絁絃絅経絍絎絏縭褵絓絖絘絜絢絣螯絪絫聒絰絵絶絺絻絿綀綃綅綆綈綉綌綍綎綑綖綘継続緞綣綦綪綫綮綯綰罟蝽綷縩綹綾緁緄緅緆緇緋緌緎総緑緔緖緗緘緙緜緡緤緥緦纂緪緰緱緲緶緹縁縃縄縈縉縋縏縑縕縗縚縝縞縟縠縡縢縦縧縯縰騁縲縳縴縵縶縹縻衙縿繄繅繈繊繋繐繒繖繘繙繠繢繣繨繮繰繸繻繾纁纆纇纈纉纊纑纕纘纙纚纛缾罃罆罈罋罌罎罏罖罘罛罝罠罣罥罦罨罫罭鍰罳罶罹罻罽罿羂羃羇羋蕉51鴕羑羖羗羜羝羢羣羥羧羭羮羰羱羵羶羸藜鮐翀翃翄翊翌翏翕翛翟翡翣翥翦躚翪翫翬翮翯翺翽翾翿闆饕鴰鍁耋耇耎耏耑耒耜耔耞耡耤耨耩耪耬耰鬢耵聹聃聆聎聝聡聦聱聴聶聼閾聿肄肏肐肕腋肙肜肟肧胛肫肬肭肰肴肵肸肼胊胍胏胑胔胗胙胝胠銓胤胦胩胬胭胯胰胲胴胹胻胼胾脇脘脝脞脡脣脤脥脧脰脲脳腆腊腌臢腍腒腓腖腜腠腡腥腧腬腯踝蹬鐐腴腶蠕誹膂膃膆膇膋膔膕膗膙膟黐膣膦膫膰膴膵膷膾臃臄臇臈臌臐臑臓臕臖臙臛臝臞臧蓐詡臽臾臿舀舁鰟鮍舋舎舔舗舘舝舠舡舢舨舭舲舳舴舸舺艁艄艅艉艋艑艕艖艗艘艚艜艟艣艤艨艩艫艬艭荏艴艶艸艹艻艿芃芄芊萰陂藭芏芔芘芚蕙芟芣芤茉芧芨芩芪芮芰鰱芴芷芸蕘豢芼芿苄苒苘苙苜蓿苠苡苣蕒苤苧苪鎊苶苹苺苻苾茀茁范蠡萣茆茇茈茌茍茖茞茠茢茥茦菰茭茯茳藨茷藘茼荁荄荅荇荈菅蜢鴞荍荑荘荳荵荸薺莆莒莔莕莘莙莚莛莜莝莦莨菪莩莪莭莰莿菀菆菉菎菏菐菑菓菔菕菘菝菡菢菣菥蓂菧菫轂鎣菶菷菹醢菺菻菼菾萅萆萇萋萏萐萑萜萩萱萴萵萹萻葇葍葎葑葒葖葙葠葥葦葧葭葯葳葴葶葸葹葽蒄蒎蒓蘢薹蒞蒟蒻蒢蒦蒨蒭藁蒯蒱鉾蒴蒹蒺蒽蓀蓁蓆蓇蓊蓌蓍蓏蓓蓖蓧蓪蓫蓽跣藕蓯蓰蓱蓴蓷蓺蓼蔀蔂蔃蔆蔇蔉蔊蔋蔌蔎蔕蔘蔙蔞蔟鍔蔣雯蔦蔯蔳蔴蔵蔸蔾蕁蕆蕋蕍蕎蕐蕑蕓蕕蕖蕗蕝蕞蕠蕡蕢蕣蕤蕨蕳蕷蕸蕺蕻薀薁薃薅薆薈薉薌薏薐薔薖薘薙諤釵薜薠薢薤薧薨薫薬薳薶薷薸薽薾薿藄藇藋藎藐藙藚藟藦藳藴藶藷藾蘀蘁蘄蘋蘗蘘蘝蘤蘧蘩蘸蘼虀虆虍蟠虒虓虖虡虣虥虩虯虰蛵虵虷鱒虺虼蚆蚈蚋蚓蚔蚖蚘蚜蚡蚣蚧蚨蚩蚪蚯蚰蜒蚱蚳蚶蚹蚺蚻蚿蛀蛁蛄蛅蝮蛌蛍蛐蟮蛑蛓蛔蛘蛚蛜蛡蛣蜊蛩蛺蛻螫蜅蜆蜈蝣蜋蜍蜎蜑蠊蜛餞蜞蜣蜨蜩蜮蜱蜷蜺蜾蜿蝀蝃蝋蝌蝍蝎蝏蝗蝘蝙蝝鱝蝡蝤蝥蝯蝰蝱蝲蝴蝻螃蠏螄螉螋螒螓螗螘螙螚蟥螟螣螥螬螭螮螾螿蟀蟅蟈蟊蟋蟑蟓蟛蟜蟟蟢蟣蟨蟪蟭蟯蟳蟶蟷蟺蟿蠁蠂蠃蠆蠋蠐蠓蠔蠗蠙蠚蠛蠜蠧蠨蠩蠭蠮蠰蠲蠵蠸蠼蠽衁衂衄衇衈衉衋衎衒衕衖衚衞裳鈎衭衲衵衹衺衿袈裟袗袚袟袢袪袮袲袴袷袺袼褙袽裀裉裊裋裌裍裎裒裛裯裱裲裴裾褀褂褉褊褌褎褐褒褓褔褕褘褚褡褢褦褧褪褫褭褯褰褱襠褸褽褾襁襃襆襇襉襋襌襏襚襛襜襝襞襡襢襤襦襫襬襭襮襴襶襼襽襾覂覃覅覇覉覊覌覗覘覚覜覥覦覧覩覬覯覰観覿觔觕觖觜觽觝觡酲觩觫觭觱觳觶觷觼觾觿言賅訃訇訏訑訒詁託訧訬訳訹証訾詀詅詆譭詈詊詎詑詒詖詗詘詧詨詵詶詸詹詻詼詿誂誃誄鋤誆誋誑誒誖誙誚誥誧説読誯誶誾諂諄諆諌諍諏諑諕諗諛諝諞諟諠諡諴諵諶諼謄謆謇謌謍謏謑謖謚謡謦謪謫謳謷謼謾譁譅譆譈譊譌譒譔譖鑫譞譟譩譫譬譱譲譴譸譹譾讅讆讋讌讎讐讒讖讙讜讟谽豁豉豇豈豊豋豌豏豔豞豖豗豜豝豣豦豨豭豱豳豵豶豷豺豻貅貆貍貎貔貘貙貜貤饜貰餸貺賁賂賏賒賕賙賝賡賧賨賫鬭賮賵賸賺賻賾贇贉贐贔贕贗赬赭赱赳迄趁趂趄趐趑趒趔趡趦趫趮趯趲趴趵趷趹趺趿跁跂跅跆躓蹌跐跕跖跗跙跛跦跧跩跫跬跮跱跲跴跺跼跽踅踆踈踉踊踒���踘踜踟躇躕踠踡踣踤踥踦踧蹺踫踮踰踱踴踶踹踺踼踽躞蹁蹂躪蹎蹐蹓蹔蹕蹚蹜蹝蹟蹠蹡蹢躂蹧蹩蹪蹯鞠蹽躃躄躅躊躋躐躑躒躘躙躛躝躠躡躦躧躩躭躰躳躶軃軆輥軏軔軘軜軝齶転軥軨軭軱軲轆軷軹軺軽軿輀輂輦輅輇輈輓輗輙輜輞輠輤輬輭輮輳輴輵輶輹輼輾轀轇轏轑轒轔轕轖轗轘轙轝轞轢轤辠辢辤辵辶辺込辿迅迋迍麿迓迣迤邐迥迨迮迸迺迻迿逄逅逌逍逑逓逕逖逡逭逯逴逶逹遄遅遉遘遛遝遢遨遫遯遰遴遶遹遻邂邅邉邋邎邕邗邘邛邠邢邧邨邯鄲邰邲邳邴邶邷邽邾邿郃郄郇郈郔郕郗郙郚郜郝郞郟郠郢郪郫郯郰郲郳郴郷郹郾郿鄀鄄鄆鄇鄈鄋鄍鄎鄏鄐鄑鄒鄔鄕鄖鄗鄘鄚鄜鄞鄠鄢鄣鄤鄦鄩鄫鄬鄮鄯鄱鄶鄷鄹鄺鄻鄾鄿酃酅酆酇酈酊酋酎酏酐酣酔酕醄酖酗酞酡酢酤酩酴酹酺醁醅醆醊醍醐醑醓醖醝醞醡醤醨醪醭醯醰醱醲醴醵醸醹醼醽醾釂釃釅釆釈鱸鎦閶釓釔釕鈀釙鼢鼴釤釧釪釬釭釱釷釸釹鈁鈃鈄鈆鈇鈈鈊鈌鈐鈑鈒鈤鈥鈧鈬鈮鈰鈳鐺鈸鈹鈽鈿鉄鉆鉈鉋鉌鉍鉏鉑鉕鉚鉢鉥鉦鉨鉬鉭鉱鉲鉶鉸鉺鉼鉿銍銎銑銕鏤銚銛銠銣銤銥銦銧銩銪銫銭銰銲銶銻銼銾鋂鋃鋆鋈鋊鋌鋍鋏鋐鋑鋕鋘鋙鋝鋟鋦鋨鋩鋭鋮鋯鋰鋱鋳鋹鋺鋻鏰鐱錀錁錆錇錈錍錏錒錔錙錚錛錞錟錡錤錩錬録錸錼鍀鍆鍇鍉鍍鍏鍐鍘鍚鍛鍠鍤鍥鍩鍫鍭鍱鍴鍶鍹鍺鍼鍾鎄鎇鎉鎋鎌鎍鎏鎒鎓鎗鎘鎚鎞鎡鎤鎩鎪鎭鎯鎰鎳鎴鎵鎸鎹鎿鏇鏊鏌鏐鏑鏖鏗鏘鏚鏜鏝鏞鏠鏦鏨鏷鏸鏹鏻鏽鏾鐃鐄鐇鐏鐒鐓鐔鐗馗鐙鐝鐠鐡鐦鐨鐩鐫鐬鐱鐳鐶鐻鐽鐿鑀鑅鑌鑐鑕鑚鑛鑢鑤鑥鑪鑭鑯鑱鑴鑵鑷钁钃镻閆閈閌閎閒閔閗閟閡関閤閤閧閬閲閹閺閻閼閽閿闇闉闋闐闑闒闓闘闚闞闟闠闤闥阞阢阤阨阬阯阹阼阽陁陑陔陛陜陡陥陬騭陴険陼陾隂隃隈隒隗隞隠隣隤隩隮隰顴隳隷隹雂雈雉雊雎雑雒雗雘雚雝雟雩雰雱驛霂霅霈霊霑霒霓霙霝霢霣霤霨霩霪霫霮靁靆靉靑靚靣靦靪靮靰靳靷靸靺靼靿鞀鞃鞄鞌鞗鞙鞚鞝鞞鞡鞣鞨鞫鞬鞮鞶鞹鞾韃韅韉馱韍韎韔韖韘韝韞韡韣韭韮韱韹韺頀颳頄頇頊頍頎頏頒頖頞頠頫頬顱頯頲頴頼顇顋顑顒顓顔顕顚顜顢顣顬顳颭颮颱颶颸颺颻颽颾颿飀飂飈飌飜飡飣飤飥飩飫飮飱飶餀餂餄餎餇餈餑餔餕餖餗餚餛餜餟餠餤餧餩餪餫餬餮餱餲餳餺餻餼餽餿饁饅饇饉饊饍饎饐饘饟饢馘馥馝馡馣騮騾馵馹駃駄駅駆駉駋駑駓駔駗駘駙駜駡駢駪駬駰駴駸駹駽駾騂騄騅騆騉騋騍騏驎騑騒験騕騖騠騢騣騤騧驤騵騶騸騺驀驂驃驄驆驈驊驌驍驎驏驒驔驖驙驦驩驫骺鯁骫骭骯骱骴骶骷髏骾髁髂髄髆髈髐髑髕髖髙髝髞髟髡髣髧髪髫髭髯髲髳髹髺髽髾鬁鬃鬅鬈鬋鬎鬏鬐鬑鬒鬖鬗鬘鬙鬠鬣鬪鬫鬬鬮鬯鬰鬲鬵鬷魆魈魊魋魍魎魑魖鰾魛魟魣魦魨魬魴魵魸鮀鮁鮆鮌鮎鮑鮒鮓鮚鮞鮟鱇鮠鮦鮨鮪鮭鮶鮸鮿鯀鯄鯆鯇鯈鯔鯕鯖鯗鯙鯠鯤鯥鯫鯰鯷鯸鯿鰂鰆鶼鰉鰋鰐鰒鰕鰛鰜鰣鰤鰥鰦鰨鰩鰮鰳鰶鰷鱺鰼鰽鱀鱄鱅鱆鱈鱎鱐鱓鱔鱖鱘鱟鱠鱣鱨鱭鱮鱲鱵鱻鲅鳦鳧鳯鳲鳷鳻鴂鴃鴄鴆鴈鴎鴒鴔鴗鴛鴦鴝鵒鴟鴠鴢鴣鴥鴯鶓鴳鴴鴷鴽鵀鵁鵂鵓鵖鵙鵜鶘鵞鵟鵩鵪鵫鵵鵷鵻鵾鶂鶊鶏鶒鶖鶗鶡鶤鶦鶬鶱鶲鶵鶸鶹鶺鶿鷀鷁鷃鷄鷇鷈鷉鷊鷏鷓鷕鷖鷙鷞鷟鷥鷦鷯鷩鷫鷭鷳鷴鷽鷾鷿鸂鸇鸊鸏鸑鸒鸓鸕鸛鸜鸝鹸鹹鹺麀麂麃麄麇麋麌麐麑麒麚麛麝麤麩麪麫麮麯麰麺麾黁黈黌黢黒黓黕黙黝黟黥黦黧黮黰黱黲黶黹黻黼黽黿鼂鼃鼅鼈鼉鼏鼐鼒鼕鼖鼙鼚鼛鼡鼩鼱鼪鼫鼯鼷鼽齁齆齇齈齉齌齎齏齔齕齗齙齚齜齞齟齬齠齢齣齧齩齮齯齰齱齵齾龎龑龒龔龖龘龝龡龢龤'
20
+
21
+ assert len(simplified_charcters) == len(simplified_charcters)
22
+
23
+ s2t_dict = {}
24
+ t2s_dict = {}
25
+ for i, item in enumerate(simplified_charcters):
26
+ s2t_dict[item] = traditional_characters[i]
27
+ t2s_dict[traditional_characters[i]] = item
28
+
29
+
30
+ def tranditional_to_simplified(text: str) -> str:
31
+ return "".join(
32
+ [t2s_dict[item] if item in t2s_dict else item for item in text])
33
+
34
+
35
+ def simplified_to_traditional(text: str) -> str:
36
+ return "".join(
37
+ [s2t_dict[item] if item in s2t_dict else item for item in text])
38
+
39
+
40
+ if __name__ == "__main__":
41
+ text = "一般是指存取一個應用程式啟動時始終顯示在網站或網頁瀏覽器中的一個或多個初始網頁等畫面存在的站點"
42
+ print(text)
43
+ text_simple = tranditional_to_simplified(text)
44
+ print(text_simple)
45
+ text_traditional = simplified_to_traditional(text_simple)
46
+ print(text_traditional)
GPT_SoVITS/text/zh_normalization/chronology.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import re
15
+
16
+ from .num import DIGITS
17
+ from .num import num2str
18
+ from .num import verbalize_cardinal
19
+ from .num import verbalize_digit
20
+
21
+
22
+ def _time_num2str(num_string: str) -> str:
23
+ """A special case for verbalizing number in time."""
24
+ result = num2str(num_string.lstrip('0'))
25
+ if num_string.startswith('0'):
26
+ result = DIGITS['0'] + result
27
+ return result
28
+
29
+
30
+ # 时刻表达式
31
+ RE_TIME = re.compile(r'([0-1]?[0-9]|2[0-3])'
32
+ r':([0-5][0-9])'
33
+ r'(:([0-5][0-9]))?')
34
+
35
+ # 时间范围,如8:30-12:30
36
+ RE_TIME_RANGE = re.compile(r'([0-1]?[0-9]|2[0-3])'
37
+ r':([0-5][0-9])'
38
+ r'(:([0-5][0-9]))?'
39
+ r'(~|-)'
40
+ r'([0-1]?[0-9]|2[0-3])'
41
+ r':([0-5][0-9])'
42
+ r'(:([0-5][0-9]))?')
43
+
44
+
45
+ def replace_time(match) -> str:
46
+ """
47
+ Args:
48
+ match (re.Match)
49
+ Returns:
50
+ str
51
+ """
52
+
53
+ is_range = len(match.groups()) > 5
54
+
55
+ hour = match.group(1)
56
+ minute = match.group(2)
57
+ second = match.group(4)
58
+
59
+ if is_range:
60
+ hour_2 = match.group(6)
61
+ minute_2 = match.group(7)
62
+ second_2 = match.group(9)
63
+
64
+ result = f"{num2str(hour)}点"
65
+ if minute.lstrip('0'):
66
+ if int(minute) == 30:
67
+ result += "半"
68
+ else:
69
+ result += f"{_time_num2str(minute)}分"
70
+ if second and second.lstrip('0'):
71
+ result += f"{_time_num2str(second)}秒"
72
+
73
+ if is_range:
74
+ result += "至"
75
+ result += f"{num2str(hour_2)}点"
76
+ if minute_2.lstrip('0'):
77
+ if int(minute) == 30:
78
+ result += "半"
79
+ else:
80
+ result += f"{_time_num2str(minute_2)}分"
81
+ if second_2 and second_2.lstrip('0'):
82
+ result += f"{_time_num2str(second_2)}秒"
83
+
84
+ return result
85
+
86
+
87
+ RE_DATE = re.compile(r'(\d{4}|\d{2})年'
88
+ r'((0?[1-9]|1[0-2])月)?'
89
+ r'(((0?[1-9])|((1|2)[0-9])|30|31)([日号]))?')
90
+
91
+
92
+ def replace_date(match) -> str:
93
+ """
94
+ Args:
95
+ match (re.Match)
96
+ Returns:
97
+ str
98
+ """
99
+ year = match.group(1)
100
+ month = match.group(3)
101
+ day = match.group(5)
102
+ result = ""
103
+ if year:
104
+ result += f"{verbalize_digit(year)}年"
105
+ if month:
106
+ result += f"{verbalize_cardinal(month)}月"
107
+ if day:
108
+ result += f"{verbalize_cardinal(day)}{match.group(9)}"
109
+ return result
110
+
111
+
112
+ # 用 / 或者 - 分隔的 YY/MM/DD 或者 YY-MM-DD 日期
113
+ RE_DATE2 = re.compile(
114
+ r'(\d{4})([- /.])(0[1-9]|1[012])\2(0[1-9]|[12][0-9]|3[01])')
115
+
116
+
117
+ def replace_date2(match) -> str:
118
+ """
119
+ Args:
120
+ match (re.Match)
121
+ Returns:
122
+ str
123
+ """
124
+ year = match.group(1)
125
+ month = match.group(3)
126
+ day = match.group(4)
127
+ result = ""
128
+ if year:
129
+ result += f"{verbalize_digit(year)}年"
130
+ if month:
131
+ result += f"{verbalize_cardinal(month)}月"
132
+ if day:
133
+ result += f"{verbalize_cardinal(day)}日"
134
+ return result
GPT_SoVITS/text/zh_normalization/constants.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import re
15
+ import string
16
+
17
+ from pypinyin.constants import SUPPORT_UCS4
18
+
19
+ # 全角半角转换
20
+ # 英文字符全角 -> 半角映射表 (num: 52)
21
+ F2H_ASCII_LETTERS = {
22
+ ord(char) + 65248: ord(char)
23
+ for char in string.ascii_letters
24
+ }
25
+
26
+ # 英文字符半角 -> 全角映射表
27
+ H2F_ASCII_LETTERS = {value: key for key, value in F2H_ASCII_LETTERS.items()}
28
+
29
+ # 数字字符全角 -> 半角映射表 (num: 10)
30
+ F2H_DIGITS = {ord(char) + 65248: ord(char) for char in string.digits}
31
+ # 数字字符半角 -> 全角映射表
32
+ H2F_DIGITS = {value: key for key, value in F2H_DIGITS.items()}
33
+
34
+ # 标点符号全角 -> 半角映射表 (num: 32)
35
+ F2H_PUNCTUATIONS = {ord(char) + 65248: ord(char) for char in string.punctuation}
36
+ # 标点符号半角 -> 全角映射表
37
+ H2F_PUNCTUATIONS = {value: key for key, value in F2H_PUNCTUATIONS.items()}
38
+
39
+ # 空格 (num: 1)
40
+ F2H_SPACE = {'\u3000': ' '}
41
+ H2F_SPACE = {' ': '\u3000'}
42
+
43
+ # 非"有拼音的汉字"的字符串,可用于NSW提取
44
+ if SUPPORT_UCS4:
45
+ RE_NSW = re.compile(r'(?:[^'
46
+ r'\u3007' # 〇
47
+ r'\u3400-\u4dbf' # CJK扩展A:[3400-4DBF]
48
+ r'\u4e00-\u9fff' # CJK基本:[4E00-9FFF]
49
+ r'\uf900-\ufaff' # CJK兼容:[F900-FAFF]
50
+ r'\U00020000-\U0002A6DF' # CJK扩展B:[20000-2A6DF]
51
+ r'\U0002A703-\U0002B73F' # CJK扩展C:[2A700-2B73F]
52
+ r'\U0002B740-\U0002B81D' # CJK扩展D:[2B740-2B81D]
53
+ r'\U0002F80A-\U0002FA1F' # CJK兼容扩展:[2F800-2FA1F]
54
+ r'])+')
55
+ else:
56
+ RE_NSW = re.compile( # pragma: no cover
57
+ r'(?:[^'
58
+ r'\u3007' # 〇
59
+ r'\u3400-\u4dbf' # CJK扩展A:[3400-4DBF]
60
+ r'\u4e00-\u9fff' # CJK基本:[4E00-9FFF]
61
+ r'\uf900-\ufaff' # CJK兼容:[F900-FAFF]
62
+ r'])+')
GPT_SoVITS/text/zh_normalization/num.py ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """
15
+ Rules to verbalize numbers into Chinese characters.
16
+ https://zh.wikipedia.org/wiki/中文数字#現代中文
17
+ """
18
+ import re
19
+ from collections import OrderedDict
20
+ from typing import List
21
+
22
+ DIGITS = {str(i): tran for i, tran in enumerate('零一二三四五六七八九')}
23
+ UNITS = OrderedDict({
24
+ 1: '十',
25
+ 2: '百',
26
+ 3: '千',
27
+ 4: '万',
28
+ 8: '亿',
29
+ })
30
+
31
+ COM_QUANTIFIERS = '(处|台|架|枚|趟|幅|平|方|堵|间|床|株|批|项|例|列|篇|栋|注|亩|封|艘|把|目|套|段|人|所|朵|匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|丘|棵|只|支|袭|辆|挑|担|颗|壳|窠|曲|墙|群|腔|砣|座|客|贯|扎|捆|刀|令|打|手|罗|坡|山|岭|江|溪|钟|队|单|双|对|出|口|头|脚|板|跳|枝|件|贴|针|线|管|名|位|身|堂|课|本|页|家|户|层|丝|毫|厘|分|钱|两|斤|担|铢|石|钧|锱|忽|(千|毫|微)克|毫|厘|(公)分|分|寸|尺|丈|里|寻|常|铺|程|(千|分|厘|毫|微)米|米|撮|勺|合|升|斗|石|盘|碗|碟|叠|桶|笼|盆|盒|杯|钟|斛|锅|簋|篮|盘|桶|罐|瓶|壶|卮|盏|箩|箱|煲|啖|袋|钵|年|月|日|季|刻|时|周|天|秒|分|小时|旬|纪|岁|世|更|夜|春|夏|秋|冬|代|伏|辈|丸|泡|粒|颗|幢|堆|条|根|支|道|面|片|张|颗|块|元|(亿|千万|百万|万|千|百)|(亿|千万|百万|万|千|百|美|)元|(亿|千万|百万|万|千|百|十|)吨|(亿|千万|百万|万|千|百|)块|角|毛|分)'
32
+
33
+ # 分数表达式
34
+ RE_FRAC = re.compile(r'(-?)(\d+)/(\d+)')
35
+
36
+
37
+ def replace_frac(match) -> str:
38
+ """
39
+ Args:
40
+ match (re.Match)
41
+ Returns:
42
+ str
43
+ """
44
+ sign = match.group(1)
45
+ nominator = match.group(2)
46
+ denominator = match.group(3)
47
+ sign: str = "负" if sign else ""
48
+ nominator: str = num2str(nominator)
49
+ denominator: str = num2str(denominator)
50
+ result = f"{sign}{denominator}分之{nominator}"
51
+ return result
52
+
53
+
54
+ # 百分数表达式
55
+ RE_PERCENTAGE = re.compile(r'(-?)(\d+(\.\d+)?)%')
56
+
57
+
58
+ def replace_percentage(match) -> str:
59
+ """
60
+ Args:
61
+ match (re.Match)
62
+ Returns:
63
+ str
64
+ """
65
+ sign = match.group(1)
66
+ percent = match.group(2)
67
+ sign: str = "负" if sign else ""
68
+ percent: str = num2str(percent)
69
+ result = f"{sign}百分之{percent}"
70
+ return result
71
+
72
+
73
+ # 整数表达式
74
+ # 带负号的整数 -10
75
+ RE_INTEGER = re.compile(r'(-)' r'(\d+)')
76
+
77
+
78
+ def replace_negative_num(match) -> str:
79
+ """
80
+ Args:
81
+ match (re.Match)
82
+ Returns:
83
+ str
84
+ """
85
+ sign = match.group(1)
86
+ number = match.group(2)
87
+ sign: str = "负" if sign else ""
88
+ number: str = num2str(number)
89
+ result = f"{sign}{number}"
90
+ return result
91
+
92
+
93
+ # 编号-无符号整形
94
+ # 00078
95
+ RE_DEFAULT_NUM = re.compile(r'\d{3}\d*')
96
+
97
+
98
+ def replace_default_num(match):
99
+ """
100
+ Args:
101
+ match (re.Match)
102
+ Returns:
103
+ str
104
+ """
105
+ number = match.group(0)
106
+ return verbalize_digit(number, alt_one=True)
107
+
108
+
109
+ # 加减乘除
110
+ # RE_ASMD = re.compile(
111
+ # r'((-?)((\d+)(\.\d+)?)|(\.(\d+)))([\+\-\×÷=])((-?)((\d+)(\.\d+)?)|(\.(\d+)))')
112
+ RE_ASMD = re.compile(
113
+ r'((-?)((\d+)(\.\d+)?[⁰¹²³⁴⁵⁶⁷⁸⁹ˣʸⁿ]*)|(\.\d+[⁰¹²³⁴⁵⁶⁷⁸⁹ˣʸⁿ]*)|([A-Za-z][⁰¹²³⁴⁵⁶⁷⁸⁹ˣʸⁿ]*))([\+\-\×÷=])((-?)((\d+)(\.\d+)?[⁰¹²³⁴⁵⁶⁷⁸⁹ˣʸⁿ]*)|(\.\d+[⁰¹²³⁴⁵⁶⁷⁸⁹ˣʸⁿ]*)|([A-Za-z][⁰¹²³⁴⁵⁶⁷⁸⁹ˣʸⁿ]*))')
114
+
115
+ asmd_map = {
116
+ '+': '加',
117
+ '-': '减',
118
+ '×': '乘',
119
+ '÷': '除',
120
+ '=': '等于'
121
+ }
122
+
123
+ def replace_asmd(match) -> str:
124
+ """
125
+ Args:
126
+ match (re.Match)
127
+ Returns:
128
+ str
129
+ """
130
+ result = match.group(1) + asmd_map[match.group(8)] + match.group(9)
131
+ return result
132
+
133
+
134
+ # 次方专项
135
+ RE_POWER = re.compile(r'[⁰¹²³⁴⁵⁶⁷⁸⁹ˣʸⁿ]+')
136
+
137
+ power_map = {
138
+ '⁰': '0',
139
+ '¹': '1',
140
+ '²': '2',
141
+ '³': '3',
142
+ '⁴': '4',
143
+ '⁵': '5',
144
+ '⁶': '6',
145
+ '⁷': '7',
146
+ '⁸': '8',
147
+ '⁹': '9',
148
+ 'ˣ': 'x',
149
+ 'ʸ': 'y',
150
+ 'ⁿ': 'n'
151
+ }
152
+
153
+ def replace_power(match) -> str:
154
+ """
155
+ Args:
156
+ match (re.Match)
157
+ Returns:
158
+ str
159
+ """
160
+ power_num = ""
161
+ for m in match.group(0):
162
+ power_num += power_map[m]
163
+ result = "的" + power_num + "次方"
164
+ return result
165
+
166
+
167
+ # 数字表达式
168
+ # 纯小数
169
+ RE_DECIMAL_NUM = re.compile(r'(-?)((\d+)(\.\d+))' r'|(\.(\d+))')
170
+ # 正整数 + 量词
171
+ RE_POSITIVE_QUANTIFIERS = re.compile(r"(\d+)([多余几\+])?" + COM_QUANTIFIERS)
172
+ RE_NUMBER = re.compile(r'(-?)((\d+)(\.\d+)?)' r'|(\.(\d+))')
173
+
174
+
175
+ def replace_positive_quantifier(match) -> str:
176
+ """
177
+ Args:
178
+ match (re.Match)
179
+ Returns:
180
+ str
181
+ """
182
+ number = match.group(1)
183
+ match_2 = match.group(2)
184
+ if match_2 == "+":
185
+ match_2 = "多"
186
+ match_2: str = match_2 if match_2 else ""
187
+ quantifiers: str = match.group(3)
188
+ number: str = num2str(number)
189
+ number = "两" if number == "二" else number
190
+ result = f"{number}{match_2}{quantifiers}"
191
+ return result
192
+
193
+
194
+ def replace_number(match) -> str:
195
+ """
196
+ Args:
197
+ match (re.Match)
198
+ Returns:
199
+ str
200
+ """
201
+ sign = match.group(1)
202
+ number = match.group(2)
203
+ pure_decimal = match.group(5)
204
+ if pure_decimal:
205
+ result = num2str(pure_decimal)
206
+ else:
207
+ sign: str = "负" if sign else ""
208
+ number: str = num2str(number)
209
+ result = f"{sign}{number}"
210
+ return result
211
+
212
+
213
+ # 范围表达式
214
+ # match.group(1) and match.group(8) are copy from RE_NUMBER
215
+
216
+ RE_RANGE = re.compile(
217
+ r"""
218
+ (?<![\d\+\-\×÷=]) # 使用反向前瞻以确保数字范围之前没有其他数字和操作符
219
+ ((-?)((\d+)(\.\d+)?)) # 匹配范围起始的负数或正数(整数或小数)
220
+ [-~] # 匹配范围分隔符
221
+ ((-?)((\d+)(\.\d+)?)) # 匹配范围结束的负数或正数(整数或小数)
222
+ (?![\d\+\-\×÷=]) # 使用正向前瞻以确保数字范围之后没有其他数字和操作符
223
+ """, re.VERBOSE)
224
+
225
+
226
+ def replace_range(match) -> str:
227
+ """
228
+ Args:
229
+ match (re.Match)
230
+ Returns:
231
+ str
232
+ """
233
+ first, second = match.group(1), match.group(6)
234
+ first = RE_NUMBER.sub(replace_number, first)
235
+ second = RE_NUMBER.sub(replace_number, second)
236
+ result = f"{first}到{second}"
237
+ return result
238
+
239
+
240
+ # ~至表达式
241
+ RE_TO_RANGE = re.compile(
242
+ r'((-?)((\d+)(\.\d+)?)|(\.(\d+)))(%|°C|℃|度|摄氏度|cm2|cm²|cm3|cm³|cm|db|ds|kg|km|m2|m²|m³|m3|ml|m|mm|s)[~]((-?)((\d+)(\.\d+)?)|(\.(\d+)))(%|°C|℃|度|摄氏度|cm2|cm²|cm3|cm³|cm|db|ds|kg|km|m2|m²|m³|m3|ml|m|mm|s)')
243
+
244
+ def replace_to_range(match) -> str:
245
+ """
246
+ Args:
247
+ match (re.Match)
248
+ Returns:
249
+ str
250
+ """
251
+ result = match.group(0).replace('~', '至')
252
+ return result
253
+
254
+
255
+ def _get_value(value_string: str, use_zero: bool=True) -> List[str]:
256
+ stripped = value_string.lstrip('0')
257
+ if len(stripped) == 0:
258
+ return []
259
+ elif len(stripped) == 1:
260
+ if use_zero and len(stripped) < len(value_string):
261
+ return [DIGITS['0'], DIGITS[stripped]]
262
+ else:
263
+ return [DIGITS[stripped]]
264
+ else:
265
+ largest_unit = next(
266
+ power for power in reversed(UNITS.keys()) if power < len(stripped))
267
+ first_part = value_string[:-largest_unit]
268
+ second_part = value_string[-largest_unit:]
269
+ return _get_value(first_part) + [UNITS[largest_unit]] + _get_value(
270
+ second_part)
271
+
272
+
273
+ def verbalize_cardinal(value_string: str) -> str:
274
+ if not value_string:
275
+ return ''
276
+
277
+ # 000 -> '零' , 0 -> '零'
278
+ value_string = value_string.lstrip('0')
279
+ if len(value_string) == 0:
280
+ return DIGITS['0']
281
+
282
+ result_symbols = _get_value(value_string)
283
+ # verbalized number starting with '一十*' is abbreviated as `十*`
284
+ if len(result_symbols) >= 2 and result_symbols[0] == DIGITS[
285
+ '1'] and result_symbols[1] == UNITS[1]:
286
+ result_symbols = result_symbols[1:]
287
+ return ''.join(result_symbols)
288
+
289
+
290
+ def verbalize_digit(value_string: str, alt_one=False) -> str:
291
+ result_symbols = [DIGITS[digit] for digit in value_string]
292
+ result = ''.join(result_symbols)
293
+ if alt_one:
294
+ result = result.replace("一", "幺")
295
+ return result
296
+
297
+
298
+ def num2str(value_string: str) -> str:
299
+ integer_decimal = value_string.split('.')
300
+ if len(integer_decimal) == 1:
301
+ integer = integer_decimal[0]
302
+ decimal = ''
303
+ elif len(integer_decimal) == 2:
304
+ integer, decimal = integer_decimal
305
+ else:
306
+ raise ValueError(
307
+ f"The value string: '${value_string}' has more than one point in it."
308
+ )
309
+
310
+ result = verbalize_cardinal(integer)
311
+
312
+ decimal = decimal.rstrip('0')
313
+ if decimal:
314
+ # '.22' is verbalized as '零点二二'
315
+ # '3.20' is verbalized as '三点二
316
+ result = result if result else "零"
317
+ result += '点' + verbalize_digit(decimal)
318
+ return result
GPT_SoVITS/text/zh_normalization/phonecode.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import re
15
+
16
+ from .num import verbalize_digit
17
+
18
+ # 规范化固话/手机号码
19
+ # 手机
20
+ # http://www.jihaoba.com/news/show/13680
21
+ # 移动:139、138、137、136、135、134、159、158、157、150、151、152、188、187、182、183、184、178、198
22
+ # 联通:130、131、132、156、155、186、185、176
23
+ # 电信:133、153、189、180、181、177
24
+ RE_MOBILE_PHONE = re.compile(
25
+ r"(?<!\d)((\+?86 ?)?1([38]\d|5[0-35-9]|7[678]|9[89])\d{8})(?!\d)")
26
+ RE_TELEPHONE = re.compile(
27
+ r"(?<!\d)((0(10|2[1-3]|[3-9]\d{2})-?)?[1-9]\d{6,7})(?!\d)")
28
+
29
+ # 全国统一的号码400开头
30
+ RE_NATIONAL_UNIFORM_NUMBER = re.compile(r"(400)(-)?\d{3}(-)?\d{4}")
31
+
32
+
33
+ def phone2str(phone_string: str, mobile=True) -> str:
34
+ if mobile:
35
+ sp_parts = phone_string.strip('+').split()
36
+ result = ','.join(
37
+ [verbalize_digit(part, alt_one=True) for part in sp_parts])
38
+ return result
39
+ else:
40
+ sil_parts = phone_string.split('-')
41
+ result = ','.join(
42
+ [verbalize_digit(part, alt_one=True) for part in sil_parts])
43
+ return result
44
+
45
+
46
+ def replace_phone(match) -> str:
47
+ """
48
+ Args:
49
+ match (re.Match)
50
+ Returns:
51
+ str
52
+ """
53
+ return phone2str(match.group(0), mobile=False)
54
+
55
+
56
+ def replace_mobile(match) -> str:
57
+ """
58
+ Args:
59
+ match (re.Match)
60
+ Returns:
61
+ str
62
+ """
63
+ return phone2str(match.group(0))
GPT_SoVITS/text/zh_normalization/quantifier.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import re
15
+
16
+ from .num import num2str
17
+
18
+ # 温度表达式,温度会影响负号的读法
19
+ # -3°C 零下三度
20
+ RE_TEMPERATURE = re.compile(r'(-?)(\d+(\.\d+)?)(°C|℃|度|摄氏度)')
21
+ measure_dict = {
22
+ "cm2": "平方厘米",
23
+ "cm²": "平方厘米",
24
+ "cm3": "立方厘米",
25
+ "cm³": "立方厘米",
26
+ "cm": "厘米",
27
+ "db": "分贝",
28
+ "ds": "毫秒",
29
+ "kg": "千克",
30
+ "km": "千米",
31
+ "m2": "平方米",
32
+ "m²": "平方米",
33
+ "m³": "立方米",
34
+ "m3": "立方米",
35
+ "ml": "毫升",
36
+ "m": "米",
37
+ "mm": "毫米",
38
+ "s": "秒"
39
+ }
40
+
41
+
42
+ def replace_temperature(match) -> str:
43
+ """
44
+ Args:
45
+ match (re.Match)
46
+ Returns:
47
+ str
48
+ """
49
+ sign = match.group(1)
50
+ temperature = match.group(2)
51
+ unit = match.group(3)
52
+ sign: str = "零下" if sign else ""
53
+ temperature: str = num2str(temperature)
54
+ unit: str = "摄氏度" if unit == "摄氏度" else "度"
55
+ result = f"{sign}{temperature}{unit}"
56
+ return result
57
+
58
+
59
+ def replace_measure(sentence) -> str:
60
+ for q_notation in measure_dict:
61
+ if q_notation in sentence:
62
+ sentence = sentence.replace(q_notation, measure_dict[q_notation])
63
+ return sentence
GPT_SoVITS/text/zh_normalization/text_normlization.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import re
15
+ from typing import List
16
+
17
+ from .char_convert import tranditional_to_simplified
18
+ from .chronology import RE_DATE
19
+ from .chronology import RE_DATE2
20
+ from .chronology import RE_TIME
21
+ from .chronology import RE_TIME_RANGE
22
+ from .chronology import replace_date
23
+ from .chronology import replace_date2
24
+ from .chronology import replace_time
25
+ from .constants import F2H_ASCII_LETTERS
26
+ from .constants import F2H_DIGITS
27
+ from .constants import F2H_SPACE
28
+ from .num import RE_DECIMAL_NUM
29
+ from .num import RE_DEFAULT_NUM
30
+ from .num import RE_FRAC
31
+ from .num import RE_INTEGER
32
+ from .num import RE_NUMBER
33
+ from .num import RE_PERCENTAGE
34
+ from .num import RE_POSITIVE_QUANTIFIERS
35
+ from .num import RE_RANGE
36
+ from .num import RE_TO_RANGE
37
+ from .num import RE_ASMD
38
+ from .num import RE_POWER
39
+ from .num import replace_default_num
40
+ from .num import replace_frac
41
+ from .num import replace_negative_num
42
+ from .num import replace_number
43
+ from .num import replace_percentage
44
+ from .num import replace_positive_quantifier
45
+ from .num import replace_range
46
+ from .num import replace_to_range
47
+ from .num import replace_asmd
48
+ from .num import replace_power
49
+ from .phonecode import RE_MOBILE_PHONE
50
+ from .phonecode import RE_NATIONAL_UNIFORM_NUMBER
51
+ from .phonecode import RE_TELEPHONE
52
+ from .phonecode import replace_mobile
53
+ from .phonecode import replace_phone
54
+ from .quantifier import RE_TEMPERATURE
55
+ from .quantifier import replace_measure
56
+ from .quantifier import replace_temperature
57
+
58
+
59
+ class TextNormalizer():
60
+ def __init__(self):
61
+ self.SENTENCE_SPLITOR = re.compile(r'([:、,;。?!,;?!][”’]?)')
62
+
63
+ def _split(self, text: str, lang="zh") -> List[str]:
64
+ """Split long text into sentences with sentence-splitting punctuations.
65
+ Args:
66
+ text (str): The input text.
67
+ Returns:
68
+ List[str]: Sentences.
69
+ """
70
+ # Only for pure Chinese here
71
+ if lang == "zh":
72
+ text = text.replace(" ", "")
73
+ # 过滤掉特殊字符
74
+ text = re.sub(r'[——《》【】<>{}()()#&@“”^_|\\]', '', text)
75
+ text = self.SENTENCE_SPLITOR.sub(r'\1\n', text)
76
+ text = text.strip()
77
+ sentences = [sentence.strip() for sentence in re.split(r'\n+', text)]
78
+ return sentences
79
+
80
+ def _post_replace(self, sentence: str) -> str:
81
+ sentence = sentence.replace('/', '每')
82
+ # sentence = sentence.replace('~', '至')
83
+ # sentence = sentence.replace('~', '至')
84
+ sentence = sentence.replace('①', '一')
85
+ sentence = sentence.replace('②', '二')
86
+ sentence = sentence.replace('③', '三')
87
+ sentence = sentence.replace('④', '四')
88
+ sentence = sentence.replace('⑤', '五')
89
+ sentence = sentence.replace('⑥', '六')
90
+ sentence = sentence.replace('⑦', '七')
91
+ sentence = sentence.replace('⑧', '八')
92
+ sentence = sentence.replace('⑨', '九')
93
+ sentence = sentence.replace('⑩', '十')
94
+ sentence = sentence.replace('α', '阿尔法')
95
+ sentence = sentence.replace('β', '贝塔')
96
+ sentence = sentence.replace('γ', '伽玛').replace('Γ', '伽玛')
97
+ sentence = sentence.replace('δ', '德尔塔').replace('Δ', '德尔塔')
98
+ sentence = sentence.replace('ε', '艾普西龙')
99
+ sentence = sentence.replace('ζ', '捷塔')
100
+ sentence = sentence.replace('η', '依塔')
101
+ sentence = sentence.replace('θ', '西塔').replace('Θ', '西塔')
102
+ sentence = sentence.replace('ι', '艾欧塔')
103
+ sentence = sentence.replace('κ', '喀帕')
104
+ sentence = sentence.replace('λ', '拉姆达').replace('Λ', '拉姆达')
105
+ sentence = sentence.replace('μ', '缪')
106
+ sentence = sentence.replace('ν', '拗')
107
+ sentence = sentence.replace('ξ', '克西').replace('Ξ', '克西')
108
+ sentence = sentence.replace('ο', '欧米克伦')
109
+ sentence = sentence.replace('π', '派').replace('Π', '派')
110
+ sentence = sentence.replace('ρ', '肉')
111
+ sentence = sentence.replace('ς', '西格玛').replace('Σ', '西格玛').replace(
112
+ 'σ', '西格玛')
113
+ sentence = sentence.replace('τ', '套')
114
+ sentence = sentence.replace('υ', '宇普西龙')
115
+ sentence = sentence.replace('φ', '服艾').replace('Φ', '服艾')
116
+ sentence = sentence.replace('χ', '器')
117
+ sentence = sentence.replace('ψ', '普赛').replace('Ψ', '普赛')
118
+ sentence = sentence.replace('ω', '欧米伽').replace('Ω', '欧米伽')
119
+ # 兜底数学运算,顺便兼容懒人用语
120
+ sentence = sentence.replace('+', '加')
121
+ sentence = sentence.replace('-', '减')
122
+ sentence = sentence.replace('×', '乘')
123
+ sentence = sentence.replace('÷', '除')
124
+ sentence = sentence.replace('=', '等')
125
+ # re filter special characters, have one more character "-" than line 68
126
+ sentence = re.sub(r'[-——《》【】<=>{}()()#&@“”^_|\\]', '', sentence)
127
+ return sentence
128
+
129
+ def normalize_sentence(self, sentence: str) -> str:
130
+ # basic character conversions
131
+ sentence = tranditional_to_simplified(sentence)
132
+ sentence = sentence.translate(F2H_ASCII_LETTERS).translate(
133
+ F2H_DIGITS).translate(F2H_SPACE)
134
+
135
+ # number related NSW verbalization
136
+ sentence = RE_DATE.sub(replace_date, sentence)
137
+ sentence = RE_DATE2.sub(replace_date2, sentence)
138
+
139
+ # range first
140
+ sentence = RE_TIME_RANGE.sub(replace_time, sentence)
141
+ sentence = RE_TIME.sub(replace_time, sentence)
142
+
143
+ # 处理~波浪号作为至的替换
144
+ sentence = RE_TO_RANGE.sub(replace_to_range, sentence)
145
+ sentence = RE_TEMPERATURE.sub(replace_temperature, sentence)
146
+ sentence = replace_measure(sentence)
147
+
148
+ # 处理数学运算
149
+ while RE_ASMD.search(sentence):
150
+ sentence = RE_ASMD.sub(replace_asmd, sentence)
151
+ sentence = RE_POWER.sub(replace_power, sentence)
152
+
153
+ sentence = RE_FRAC.sub(replace_frac, sentence)
154
+ sentence = RE_PERCENTAGE.sub(replace_percentage, sentence)
155
+ sentence = RE_MOBILE_PHONE.sub(replace_mobile, sentence)
156
+
157
+ sentence = RE_TELEPHONE.sub(replace_phone, sentence)
158
+ sentence = RE_NATIONAL_UNIFORM_NUMBER.sub(replace_phone, sentence)
159
+
160
+ sentence = RE_RANGE.sub(replace_range, sentence)
161
+
162
+ sentence = RE_INTEGER.sub(replace_negative_num, sentence)
163
+ sentence = RE_DECIMAL_NUM.sub(replace_number, sentence)
164
+ sentence = RE_POSITIVE_QUANTIFIERS.sub(replace_positive_quantifier,
165
+ sentence)
166
+ sentence = RE_DEFAULT_NUM.sub(replace_default_num, sentence)
167
+ sentence = RE_NUMBER.sub(replace_number, sentence)
168
+ sentence = self._post_replace(sentence)
169
+
170
+ return sentence
171
+
172
+ def normalize(self, text: str) -> List[str]:
173
+ sentences = self._split(text)
174
+ sentences = [self.normalize_sentence(sent) for sent in sentences]
175
+ return sentences