SWivid commited on
Commit
0b0aff1
·
1 Parent(s): 78ca190

minor fix for vocab check in finetune_gradio

Browse files
src/f5_tts/train/finetune_gradio.py CHANGED
@@ -1099,7 +1099,7 @@ def vocab_extend(project_name, symbols, model_type):
1099
  return f"vocab old size : {size_vocab}\nvocab new size : {size}\nvocab add : {vocab_size_new}\nnew symbols :\n{vocab_new}"
1100
 
1101
 
1102
- def vocab_check(project_name):
1103
  name_project = project_name
1104
  path_project = os.path.join(path_data, name_project)
1105
 
@@ -1128,6 +1128,8 @@ def vocab_check(project_name):
1128
  continue
1129
 
1130
  text = sp[1].lower().strip()
 
 
1131
 
1132
  for t in text:
1133
  if t not in vocab and t not in miss_symbols_keep:
@@ -1498,7 +1500,9 @@ Using the extended model, you can finetune to a new language that is missing sym
1498
  txt_info_extend = gr.Textbox(label="Info", value="")
1499
 
1500
  txt_extend.change(vocab_count, inputs=[txt_extend], outputs=[txt_count_symbol])
1501
- check_button.click(fn=vocab_check, inputs=[cm_project], outputs=[txt_info_check, txt_extend])
 
 
1502
  extend_button.click(
1503
  fn=vocab_extend, inputs=[cm_project, txt_extend, exp_name_extend], outputs=[txt_info_extend]
1504
  )
 
1099
  return f"vocab old size : {size_vocab}\nvocab new size : {size}\nvocab add : {vocab_size_new}\nnew symbols :\n{vocab_new}"
1100
 
1101
 
1102
+ def vocab_check(project_name, tokenizer_type):
1103
  name_project = project_name
1104
  path_project = os.path.join(path_data, name_project)
1105
 
 
1128
  continue
1129
 
1130
  text = sp[1].lower().strip()
1131
+ if tokenizer_type == "pinyin":
1132
+ text = convert_char_to_pinyin([text], polyphone=True)[0]
1133
 
1134
  for t in text:
1135
  if t not in vocab and t not in miss_symbols_keep:
 
1500
  txt_info_extend = gr.Textbox(label="Info", value="")
1501
 
1502
  txt_extend.change(vocab_count, inputs=[txt_extend], outputs=[txt_count_symbol])
1503
+ check_button.click(
1504
+ fn=vocab_check, inputs=[cm_project, tokenizer_type], outputs=[txt_info_check, txt_extend]
1505
+ )
1506
  extend_button.click(
1507
  fn=vocab_extend, inputs=[cm_project, txt_extend, exp_name_extend], outputs=[txt_info_extend]
1508
  )