minor fix for vocab check in finetune_gradio
Browse files
src/f5_tts/train/finetune_gradio.py
CHANGED
@@ -1099,7 +1099,7 @@ def vocab_extend(project_name, symbols, model_type):
|
|
1099 |
return f"vocab old size : {size_vocab}\nvocab new size : {size}\nvocab add : {vocab_size_new}\nnew symbols :\n{vocab_new}"
|
1100 |
|
1101 |
|
1102 |
-
def vocab_check(project_name):
|
1103 |
name_project = project_name
|
1104 |
path_project = os.path.join(path_data, name_project)
|
1105 |
|
@@ -1128,6 +1128,8 @@ def vocab_check(project_name):
|
|
1128 |
continue
|
1129 |
|
1130 |
text = sp[1].lower().strip()
|
|
|
|
|
1131 |
|
1132 |
for t in text:
|
1133 |
if t not in vocab and t not in miss_symbols_keep:
|
@@ -1498,7 +1500,9 @@ Using the extended model, you can finetune to a new language that is missing sym
|
|
1498 |
txt_info_extend = gr.Textbox(label="Info", value="")
|
1499 |
|
1500 |
txt_extend.change(vocab_count, inputs=[txt_extend], outputs=[txt_count_symbol])
|
1501 |
-
check_button.click(
|
|
|
|
|
1502 |
extend_button.click(
|
1503 |
fn=vocab_extend, inputs=[cm_project, txt_extend, exp_name_extend], outputs=[txt_info_extend]
|
1504 |
)
|
|
|
1099 |
return f"vocab old size : {size_vocab}\nvocab new size : {size}\nvocab add : {vocab_size_new}\nnew symbols :\n{vocab_new}"
|
1100 |
|
1101 |
|
1102 |
+
def vocab_check(project_name, tokenizer_type):
|
1103 |
name_project = project_name
|
1104 |
path_project = os.path.join(path_data, name_project)
|
1105 |
|
|
|
1128 |
continue
|
1129 |
|
1130 |
text = sp[1].lower().strip()
|
1131 |
+
if tokenizer_type == "pinyin":
|
1132 |
+
text = convert_char_to_pinyin([text], polyphone=True)[0]
|
1133 |
|
1134 |
for t in text:
|
1135 |
if t not in vocab and t not in miss_symbols_keep:
|
|
|
1500 |
txt_info_extend = gr.Textbox(label="Info", value="")
|
1501 |
|
1502 |
txt_extend.change(vocab_count, inputs=[txt_extend], outputs=[txt_count_symbol])
|
1503 |
+
check_button.click(
|
1504 |
+
fn=vocab_check, inputs=[cm_project, tokenizer_type], outputs=[txt_info_check, txt_extend]
|
1505 |
+
)
|
1506 |
extend_button.click(
|
1507 |
fn=vocab_extend, inputs=[cm_project, txt_extend, exp_name_extend], outputs=[txt_info_extend]
|
1508 |
)
|