Spaces:

zhouzhou363
/

f5-tts

Configuration error

SWivid commited on Oct 10, 2024

Commit

39ce201

1 Parent(s): f6e3b78

disable mask for single infer to save mem; add custom trans for vocab to address oov

Files changed (2) hide show

model/cfm.py CHANGED Viewed

@@ -142,7 +142,10 @@ class CFM(nn.Module):
         cond_mask = rearrange(cond_mask, '... -> ... 1')
         step_cond = torch.where(cond_mask, cond, torch.zeros_like(cond))  # allow direct control (cut cond audio) with lens passed in
-        mask = lens_to_mask(duration)
         # test for no ref audio
         if no_ref_audio:

         cond_mask = rearrange(cond_mask, '... -> ... 1')
         step_cond = torch.where(cond_mask, cond, torch.zeros_like(cond))  # allow direct control (cut cond audio) with lens passed in
+        if batch > 1:
+            mask = lens_to_mask(duration)
+        else:  # save memory and speed up, as single inference need no mask currently
+            mask = None
         # test for no ref audio
         if no_ref_audio:

model/utils.py CHANGED Viewed

@@ -153,9 +153,11 @@ def get_tokenizer(dataset_name, tokenizer: str = "pinyin"):
 def convert_char_to_pinyin(text_list, polyphone = True):
     final_text_list = []
     god_knows_why_en_testset_contains_zh_quote = str.maketrans({'“': '"', '”': '"', '‘': "'", '’': "'"})  # in case librispeech (orig no-pc) test-clean
     for text in text_list:
         char_list = []
         text = text.translate(god_knows_why_en_testset_contains_zh_quote)
         for seg in jieba.cut(text):
             seg_byte_len = len(bytes(seg, 'UTF-8'))
             if seg_byte_len == len(seg):  # if pure alphabets and symbols

 def convert_char_to_pinyin(text_list, polyphone = True):
     final_text_list = []
     god_knows_why_en_testset_contains_zh_quote = str.maketrans({'“': '"', '”': '"', '‘': "'", '’': "'"})  # in case librispeech (orig no-pc) test-clean
+    custom_trans = str.maketrans({';': ','})  # add custom trans here, to address oov
     for text in text_list:
         char_list = []
         text = text.translate(god_knows_why_en_testset_contains_zh_quote)
+        text = text.translate(custom_trans)
         for seg in jieba.cut(text):
             seg_byte_len = len(bytes(seg, 'UTF-8'))
             if seg_byte_len == len(seg):  # if pure alphabets and symbols