jhansss commited on
Commit
e285e98
Β·
1 Parent(s): b20ddc7

Update Japanese text preprocessing and formatting to support natural Japanese text input without spaces between characters

Browse files
Files changed (2) hide show
  1. svs_utils.py +6 -6
  2. util.py +2 -0
svs_utils.py CHANGED
@@ -52,8 +52,8 @@ def svs_text_preprocessor(model_path, texts, lang):
52
  texts = preprocess_input(texts, "")
53
  text_list = get_pinyin(texts)
54
  elif lang == "jp":
55
- texts = preprocess_input(texts, " ")
56
- text_list = texts.strip().split()
57
 
58
  # text to phoneme
59
  tokenizer = get_tokenizer(model_path, lang)
@@ -286,12 +286,12 @@ if __name__ == "__main__":
286
  # load model
287
  model = svs_warmup(config)
288
 
289
- if config.lang=="zh":
290
  answer_text = "ε€©ζ°”ηœŸε₯½\nη©Ίζ°”ζΈ…ζ–°\nζ°”ζΈ©ζΈ©ε’Œ\nι£Žε’Œζ—₯δΈ½\nε€©ι«˜ζ°”ηˆ½\nι˜³ε…‰ζ˜Žεͺš"
291
- elif config.lang=="jp":
292
- answer_text = "せ か い で い け ば γ‚“ お ひ め さ ま\nそ う い う あ ぀ か い\nこ こ ろ え て γ‚ˆ ね" #
293
  else:
294
- print(f'Currently system does not support {config.lang}')
295
  exit(1)
296
 
297
  sample_rate = 44100
 
52
  texts = preprocess_input(texts, "")
53
  text_list = get_pinyin(texts)
54
  elif lang == "jp":
55
+ texts = preprocess_input(texts, "")
56
+ text_list = list(texts)
57
 
58
  # text to phoneme
59
  tokenizer = get_tokenizer(model_path, lang)
 
286
  # load model
287
  model = svs_warmup(config)
288
 
289
+ if config.lang == "zh":
290
  answer_text = "ε€©ζ°”ηœŸε₯½\nη©Ίζ°”ζΈ…ζ–°\nζ°”ζΈ©ζΈ©ε’Œ\nι£Žε’Œζ—₯δΈ½\nε€©ι«˜ζ°”ηˆ½\nι˜³ε…‰ζ˜Žεͺš"
291
+ elif config.lang == "jp":
292
+ answer_text = "γ›γ‹γ„γ§γ„γ‘γ°γ‚“γŠγ²γ‚γ•γΎ\nそういうあ぀かい\nγ“γ“γ‚γˆγ¦γ‚ˆγ­"
293
  else:
294
+ print(f"Currently system does not support {config.lang}")
295
  exit(1)
296
 
297
  sample_rate = 44100
util.py CHANGED
@@ -24,6 +24,8 @@ def pyopenjtalk_g2p(text) -> List[str]:
24
  import pyopenjtalk
25
  with warnings.catch_warnings(record=True) as w:
26
  warnings.simplefilter("always")
 
 
27
  # phones is a str object separated by space
28
  phones = pyopenjtalk.g2p(text, kana=False)
29
  if len(w) > 0:
 
24
  import pyopenjtalk
25
  with warnings.catch_warnings(record=True) as w:
26
  warnings.simplefilter("always")
27
+ # add space between each character
28
+ text = " ".join(list(text))
29
  # phones is a str object separated by space
30
  phones = pyopenjtalk.g2p(text, kana=False)
31
  if len(w) > 0: