vietnamese-tts

Running

Hưng commited on Apr 2

Commit

41b7348

1 Parent(s): a0fbc02

update model link

Files changed (1) hide show

app.py CHANGED Viewed

@@ -23,7 +23,7 @@ model = load_model(
     DiT,
     dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4),
     ckpt_path=str(
-        cached_path("hf://zalopay/vietnamese-tts/model_960000.pt")
     ),
     mel_spec_type="vocos",
     vocab_file=str(cached_path("hf://zalopay/vietnamese-tts/vocab.txt")),
@@ -38,6 +38,8 @@ def infer(ref_audio_orig: str, ref_text: str, gen_text: str, speed: float = 1.0)
     if gen_text is None or gen_text.strip() == "":
         raise gr.Error("Text to generate is required.")
     if ref_text is None or ref_text.strip() == "":
         raise gr.Error("Ref Text is required.")
@@ -46,7 +48,7 @@ def infer(ref_audio_orig: str, ref_text: str, gen_text: str, speed: float = 1.0)
         gr.Info("Generated audio text: {} with audio file {} ".format(ref_text, ref_audio_orig))
         final_wave, final_sample_rate, combined_spectrogram = infer_process(
             ref_audio,
-            ref_text,
             gen_text,
             model,
             vocoder,
@@ -103,6 +105,12 @@ iface = gr.Interface(
             "Bạn đã nhận được thanh toán thành công số tiền ba mươi ngàn đồng",
             1.0,
         ],
     ],
 )

     DiT,
     dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4),
     ckpt_path=str(
+        cached_path("hf://zalopay/vietnamese-tts/model_1290000.pt")
     ),
     mel_spec_type="vocos",
     vocab_file=str(cached_path("hf://zalopay/vietnamese-tts/vocab.txt")),
     if gen_text is None or gen_text.strip() == "":
         raise gr.Error("Text to generate is required.")
+    gen_text = " " + gen_text.lower()
     if ref_text is None or ref_text.strip() == "":
         raise gr.Error("Ref Text is required.")
         gr.Info("Generated audio text: {} with audio file {} ".format(ref_text, ref_audio_orig))
         final_wave, final_sample_rate, combined_spectrogram = infer_process(
             ref_audio,
+            ref_text.lower(),
             gen_text,
             model,
             vocoder,
             "Bạn đã nhận được thanh toán thành công số tiền ba mươi ngàn đồng",
             1.0,
         ],
+        [
+            "examples/pc-02.wav",
+            "sông Đồng Nai là con sông nội địa dài nhất Việt Nam, lớn thứ nhì nam bộ về lưu vực chỉ sau sông Cửu long",
+            "Ứng dụng thanh toán Zalopay chính thức ra mắt phiên bản mới với các thay đổi hấp dẫn, làm mới mọi trải nghiệm về tiền",
+            1.0,
+        ],
     ],
 )