ms180 commited on
Commit
ca2d805
·
1 Parent(s): d191039

fugashi -> kanjiconv and add touhou music

Browse files
Files changed (1) hide show
  1. svs_utils.py +37 -18
svs_utils.py CHANGED
@@ -9,9 +9,11 @@ from espnet_model_zoo.downloader import ModelDownloader
9
 
10
  from util import get_pinyin, get_tokenizer, postprocess_phn, preprocess_input
11
 
12
- import fugashi
13
  import unicodedata
14
- import pykakasi
 
 
15
 
16
 
17
  def svs_warmup(config):
@@ -74,22 +76,10 @@ def is_small_kana(kana): # ょ True よ False
74
 
75
 
76
  def kanji_to_SVSDictKana(text):
77
- tagger = fugashi.Tagger()
78
-
79
- katagana_text = " ".join(word.feature.pron if word.feature.pron else word.surface for word in tagger(text))
80
- print(katagana_text) # ['トーキョー', 'ダイガク', 'ト', 'キョート', 'ダイガク'] # NOTE(yiwen) the svs predefined dict does not support ー
81
-
82
- kks = pykakasi.kakasi()
83
- kks.setMode("K", "H") # 片仮名 → 平仮名
84
- conv = kks.getConverter()
85
-
86
- hiragana_text = " ".join(
87
- conv.do(word.feature.pron) if word.feature.pron else word.surface
88
- for word in tagger(katagana_text)
89
- )
90
 
91
  hiragana_text_wl = replace_chouonpu(hiragana_text).split(" ") # list
92
- # print(f'debug -- hiragana_text {hiragana_text_wl}')
93
 
94
  final_ls = []
95
  for subword in hiragana_text_wl:
@@ -253,6 +243,11 @@ def singmos_evaluation(predictor, wav_info, fs):
253
 
254
 
255
  def estimate_sentence_length(query, config, song2note_lengths):
 
 
 
 
 
256
  if config.melody_source.startswith("random_select"):
257
  song_name = random.choice(list(song2note_lengths.keys()))
258
  phrase_length = song2note_lengths[song_name]
@@ -323,6 +318,25 @@ def align_score_and_text(segment_iterator, lyric_ls, sybs, labels, config):
323
  return batch
324
 
325
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
  def song_segment_iterator(song_db, metadata):
327
  song_name = metadata["song_name"]
328
  if song_name.startswith("kising_"):
@@ -331,6 +345,11 @@ def song_segment_iterator(song_db, metadata):
331
  while f"{song_name}_{segment_id:03d}" in song_db.index:
332
  yield song_db.loc[f"{song_name}_{segment_id:03d}"]
333
  segment_id += 1
 
 
 
 
 
334
  else:
335
  raise NotImplementedError(f"song name {song_name} not supported")
336
 
@@ -360,7 +379,7 @@ if __name__ == "__main__":
360
  model_path="espnet/mixdata_svs_visinger2_spkembed_lang_pretrained",
361
  cache_dir="cache",
362
  device="cuda", # "cpu"
363
- melody_source="random_generate", # "random_select.take_lyric_continuation"
364
  lang="jp",
365
  speaker="resource/singer/singer_embedding_ace-2.npy",
366
  )
@@ -371,7 +390,7 @@ if __name__ == "__main__":
371
  if config.lang == "zh":
372
  answer_text = "天气真好\n空气清新\n气温温和\n风和日丽\n天高气爽\n阳光明媚"
373
  elif config.lang == "jp":
374
- answer_text = "世界で一番おひめさま そういう扱い心得てよね\n私を誰だと思ってるの"
375
  else:
376
  print(f"Currently system does not support {config.lang}")
377
  exit(1)
 
9
 
10
  from util import get_pinyin, get_tokenizer, postprocess_phn, preprocess_input
11
 
12
+ from kanjiconv import KanjiConv
13
  import unicodedata
14
+
15
+
16
+ kanji_to_kana = KanjiConv()
17
 
18
 
19
  def svs_warmup(config):
 
76
 
77
 
78
  def kanji_to_SVSDictKana(text):
79
+ hiragana_text = kanji_to_kana.to_hiragana(text.replace(" ", ""))
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
  hiragana_text_wl = replace_chouonpu(hiragana_text).split(" ") # list
82
+ # print(f'debug -- hiragana_text {hiragana_text_wl}')
83
 
84
  final_ls = []
85
  for subword in hiragana_text_wl:
 
243
 
244
 
245
  def estimate_sentence_length(query, config, song2note_lengths):
246
+ if config.melody_source == "random_select.touhou":
247
+ song_name = "touhou"
248
+ phrase_length = None
249
+ metadata = {"song_name": song_name}
250
+ return phrase_length, metadata
251
  if config.melody_source.startswith("random_select"):
252
  song_name = random.choice(list(song2note_lengths.keys()))
253
  phrase_length = song2note_lengths[song_name]
 
318
  return batch
319
 
320
 
321
+ def load_list_from_json(json_path):
322
+ with open(json_path, 'r', encoding='utf-8') as f:
323
+ data = json.load(f)
324
+ data = [
325
+ {
326
+ "tempo": d["tempo"],
327
+ "note_start_times": [n[0] * (145/d["tempo"]) for n in d["score"]],
328
+ "note_end_times": [n[1] * (145/d["tempo"]) for n in d["score"]],
329
+ "note_lyrics": ["" for n in d["score"]],
330
+ "note_midi": [n[2] for n in d["score"]],
331
+ }
332
+ for d in data
333
+ ]
334
+ if isinstance(data, list):
335
+ return data
336
+ else:
337
+ raise ValueError("The contents of the json is not list.")
338
+
339
+
340
  def song_segment_iterator(song_db, metadata):
341
  song_name = metadata["song_name"]
342
  if song_name.startswith("kising_"):
 
345
  while f"{song_name}_{segment_id:03d}" in song_db.index:
346
  yield song_db.loc[f"{song_name}_{segment_id:03d}"]
347
  segment_id += 1
348
+ elif song_name.startswith("touhou"):
349
+ # return a iterator that load from touhou musics
350
+ data = load_list_from_json("data/touhou/note_data.json")
351
+ for d in data:
352
+ yield d
353
  else:
354
  raise NotImplementedError(f"song name {song_name} not supported")
355
 
 
379
  model_path="espnet/mixdata_svs_visinger2_spkembed_lang_pretrained",
380
  cache_dir="cache",
381
  device="cuda", # "cpu"
382
+ melody_source="random_select.touhou", #"random_generate" "random_select.take_lyric_continuation", "random_select.touhou"
383
  lang="jp",
384
  speaker="resource/singer/singer_embedding_ace-2.npy",
385
  )
 
390
  if config.lang == "zh":
391
  answer_text = "天气真好\n空气清新\n气温温和\n风和日丽\n天高气爽\n阳光明媚"
392
  elif config.lang == "jp":
393
+ answer_text = "世界で一番おひめさま そういう扱い心得てよね"
394
  else:
395
  print(f"Currently system does not support {config.lang}")
396
  exit(1)