Spaces:

Emmiq
/

EmmiSpace

Build error

App Files Files Community

zkniu commited on Nov 23, 2024

Commit

4dd981f

1 Parent(s): 9894489

support command line set args

Browse files

Files changed (4) hide show

src/f5_tts/eval/README.md +2 -2
src/f5_tts/eval/eval_infer_batch.py +2 -2
src/f5_tts/eval/eval_librispeech_test_clean.py +63 -52
src/f5_tts/eval/eval_seedtts_testset.py +63 -54

src/f5_tts/eval/README.md CHANGED Viewed

@@ -42,8 +42,8 @@ Then update in the following scripts with the paths you put evaluation model ckp
 Update the path with your batch-inferenced results, and carry out WER / SIM evaluations:
 ```bash
 # Evaluation for Seed-TTS test set
-python src/f5_tts/eval/eval_seedtts_testset.py
 # Evaluation for LibriSpeech-PC test-clean (cross-sentence)
-python src/f5_tts/eval/eval_librispeech_test_clean.py
 ```

 Update the path with your batch-inferenced results, and carry out WER / SIM evaluations:
 ```bash
 # Evaluation for Seed-TTS test set
+python src/f5_tts/eval/eval_seedtts_testset.py --gen_wav_dir <GEN_WAVE_DIR>
 # Evaluation for LibriSpeech-PC test-clean (cross-sentence)
+python src/f5_tts/eval/eval_librispeech_test_clean.py --gen_wav_dir
 ```

src/f5_tts/eval/eval_infer_batch.py CHANGED Viewed

@@ -34,8 +34,6 @@ win_length = 1024
 n_fft = 1024
 target_rms = 0.1
-tokenizer = "pinyin"
 rel_path = str(files("f5_tts").joinpath("../../"))
@@ -49,6 +47,7 @@ def main():
     parser.add_argument("-n", "--expname", required=True)
     parser.add_argument("-c", "--ckptstep", default=1200000, type=int)
     parser.add_argument("-m", "--mel_spec_type", default="vocos", type=str, choices=["bigvgan", "vocos"])
     parser.add_argument("-nfe", "--nfestep", default=32, type=int)
     parser.add_argument("-o", "--odemethod", default="euler")
@@ -64,6 +63,7 @@ def main():
     ckpt_step = args.ckptstep
     ckpt_path = rel_path + f"/ckpts/{exp_name}/model_{ckpt_step}.pt"
     mel_spec_type = args.mel_spec_type
     nfe_step = args.nfestep
     ode_method = args.odemethod

 n_fft = 1024
 target_rms = 0.1
 rel_path = str(files("f5_tts").joinpath("../../"))
     parser.add_argument("-n", "--expname", required=True)
     parser.add_argument("-c", "--ckptstep", default=1200000, type=int)
     parser.add_argument("-m", "--mel_spec_type", default="vocos", type=str, choices=["bigvgan", "vocos"])
+    parser.add_argument("-to", "--tokenizer", default="pinyin", type=str, choices=["pinyin", "char"])
     parser.add_argument("-nfe", "--nfestep", default=32, type=int)
     parser.add_argument("-o", "--odemethod", default="euler")
     ckpt_step = args.ckptstep
     ckpt_path = rel_path + f"/ckpts/{exp_name}/model_{ckpt_step}.pt"
     mel_spec_type = args.mel_spec_type
+    tokenizer = args.tokenizer
     nfe_step = args.nfestep
     ode_method = args.odemethod

src/f5_tts/eval/eval_librispeech_test_clean.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import sys
 import os
 sys.path.append(os.getcwd())
@@ -19,55 +20,65 @@ from f5_tts.eval.utils_eval import (
 rel_path = str(files("f5_tts").joinpath("../../"))
-eval_task = "wer"  # sim | wer
-lang = "en"
-metalst = rel_path + "/data/librispeech_pc_test_clean_cross_sentence.lst"
-librispeech_test_clean_path = "<SOME_PATH>/LibriSpeech/test-clean"  # test-clean path
-gen_wav_dir = "PATH_TO_GENERATED"  # generated wavs
-gpus = [0, 1, 2, 3, 4, 5, 6, 7]
-test_set = get_librispeech_test(metalst, gen_wav_dir, gpus, librispeech_test_clean_path)
-## In LibriSpeech, some speakers utilized varying voice characteristics for different characters in the book,
-## leading to a low similarity for the ground truth in some cases.
-# test_set = get_librispeech_test(metalst, gen_wav_dir, gpus, librispeech_test_clean_path, eval_ground_truth = True)  # eval ground truth
-local = False
-if local:  # use local custom checkpoint dir
-    asr_ckpt_dir = "../checkpoints/Systran/faster-whisper-large-v3"
-else:
-    asr_ckpt_dir = ""  # auto download to cache dir
-wavlm_ckpt_dir = "../checkpoints/UniSpeech/wavlm_large_finetune.pth"
-# --------------------------- WER ---------------------------
-if eval_task == "wer":
-    wers = []
-    with mp.Pool(processes=len(gpus)) as pool:
-        args = [(rank, lang, sub_test_set, asr_ckpt_dir) for (rank, sub_test_set) in test_set]
-        results = pool.map(run_asr_wer, args)
-        for wers_ in results:
-            wers.extend(wers_)
-    wer = round(np.mean(wers) * 100, 3)
-    print(f"\nTotal {len(wers)} samples")
-    print(f"WER      : {wer}%")
-# --------------------------- SIM ---------------------------
-if eval_task == "sim":
-    sim_list = []
-    with mp.Pool(processes=len(gpus)) as pool:
-        args = [(rank, sub_test_set, wavlm_ckpt_dir) for (rank, sub_test_set) in test_set]
-        results = pool.map(run_sim, args)
-        for sim_ in results:
-            sim_list.extend(sim_)
-    sim = round(sum(sim_list) / len(sim_list), 3)
-    print(f"\nTotal {len(sim_list)} samples")
-    print(f"SIM      : {sim}")

 import sys
 import os
+import argparse
 sys.path.append(os.getcwd())
 rel_path = str(files("f5_tts").joinpath("../../"))
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-e", "--eval_task", type=str, default="wer", choices=["sim", "wer"])
+    parser.add_argument("-l", "--lang", type=str, default="en")
+    parser.add_argument("-g", "--gen_wav_dir", type=str, required=True)
+    parser.add_argument("-p", "--librispeech_test_clean_path", type=str, required=True)
+    parser.add_argument("-n", "--gpu_nums", type=int, default=8, help="Number of GPUs to use")
+    parser.add_argument("--local", action="store_true", help="Use local custom checkpoint directory")
+    return parser.parse_args()
+def main():
+    args = get_args()
+    eval_task = args.eval_task
+    lang = args.lang
+    librispeech_test_clean_path = args.librispeech_test_clean_path  # test-clean path
+    gen_wav_dir = args.gen_wav_dir
+    metalst = rel_path + "/data/librispeech_pc_test_clean_cross_sentence.lst"
+    gpus = list(range(args.gpu_nums))
+    test_set = get_librispeech_test(metalst, gen_wav_dir, gpus, librispeech_test_clean_path)
+    ## In LibriSpeech, some speakers utilized varying voice characteristics for different characters in the book,
+    ## leading to a low similarity for the ground truth in some cases.
+    # test_set = get_librispeech_test(metalst, gen_wav_dir, gpus, librispeech_test_clean_path, eval_ground_truth = True)  # eval ground truth
+    local = args.local
+    if local:  # use local custom checkpoint dir
+        asr_ckpt_dir = "../checkpoints/Systran/faster-whisper-large-v3"
+    else:
+        asr_ckpt_dir = ""  # auto download to cache dir
+    wavlm_ckpt_dir = "../checkpoints/UniSpeech/wavlm_large_finetune.pth"
+    # --------------------------- WER ---------------------------
+    if eval_task == "wer":
+        wers = []
+        with mp.Pool(processes=len(gpus)) as pool:
+            args = [(rank, lang, sub_test_set, asr_ckpt_dir) for (rank, sub_test_set) in test_set]
+            results = pool.map(run_asr_wer, args)
+            for wers_ in results:
+                wers.extend(wers_)
+        wer = round(np.mean(wers) * 100, 3)
+        print(f"\nTotal {len(wers)} samples")
+        print(f"WER      : {wer}%")
+    # --------------------------- SIM ---------------------------
+    if eval_task == "sim":
+        sim_list = []
+        with mp.Pool(processes=len(gpus)) as pool:
+            args = [(rank, sub_test_set, wavlm_ckpt_dir) for (rank, sub_test_set) in test_set]
+            results = pool.map(run_sim, args)
+            for sim_ in results:
+                sim_list.extend(sim_)
+        sim = round(sum(sim_list) / len(sim_list), 3)
+        print(f"\nTotal {len(sim_list)} samples")
+        print(f"SIM      : {sim}")
+if __name__ == "__main__":
+    main()

src/f5_tts/eval/eval_seedtts_testset.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import sys
 import os
 sys.path.append(os.getcwd())
@@ -19,57 +20,65 @@ from f5_tts.eval.utils_eval import (
 rel_path = str(files("f5_tts").joinpath("../../"))
-eval_task = "wer"  # sim | wer
-lang = "zh"  # zh | en
-metalst = rel_path + f"/data/seedtts_testset/{lang}/meta.lst"  # seed-tts testset
-# gen_wav_dir = rel_path + f"/data/seedtts_testset/{lang}/wavs"  # ground truth wavs
-gen_wav_dir = "PATH_TO_GENERATED"  # generated wavs
-# NOTE. paraformer-zh result will be slightly different according to the number of gpus, cuz batchsize is different
-#       zh 1.254 seems a result of 4 workers wer_seed_tts
-gpus = [0, 1, 2, 3, 4, 5, 6, 7]
-test_set = get_seed_tts_test(metalst, gen_wav_dir, gpus)
-local = False
-if local:  # use local custom checkpoint dir
-    if lang == "zh":
-        asr_ckpt_dir = "../checkpoints/funasr"  # paraformer-zh dir under funasr
-    elif lang == "en":
-        asr_ckpt_dir = "../checkpoints/Systran/faster-whisper-large-v3"
-else:
-    asr_ckpt_dir = ""  # auto download to cache dir
-wavlm_ckpt_dir = "../checkpoints/UniSpeech/wavlm_large_finetune.pth"
-# --------------------------- WER ---------------------------
-if eval_task == "wer":
-    wers = []
-    with mp.Pool(processes=len(gpus)) as pool:
-        args = [(rank, lang, sub_test_set, asr_ckpt_dir) for (rank, sub_test_set) in test_set]
-        results = pool.map(run_asr_wer, args)
-        for wers_ in results:
-            wers.extend(wers_)
-    wer = round(np.mean(wers) * 100, 3)
-    print(f"\nTotal {len(wers)} samples")
-    print(f"WER      : {wer}%")
-# --------------------------- SIM ---------------------------
-if eval_task == "sim":
-    sim_list = []
-    with mp.Pool(processes=len(gpus)) as pool:
-        args = [(rank, sub_test_set, wavlm_ckpt_dir) for (rank, sub_test_set) in test_set]
-        results = pool.map(run_sim, args)
-        for sim_ in results:
-            sim_list.extend(sim_)
-    sim = round(sum(sim_list) / len(sim_list), 3)
-    print(f"\nTotal {len(sim_list)} samples")
-    print(f"SIM      : {sim}")

 import sys
 import os
+import argparse
 sys.path.append(os.getcwd())
 rel_path = str(files("f5_tts").joinpath("../../"))
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-e", "--eval_task", type=str, default="wer", choices=["sim", "wer"])
+    parser.add_argument("-l", "--lang", type=str, default="en", choices=["zh", "en"])
+    parser.add_argument("-g", "--gen_wav_dir", type=str, required=True)
+    parser.add_argument("-n", "--gpu_nums", type=int, default=8, help="Number of GPUs to use")
+    parser.add_argument("--local", action="store_true", help="Use local custom checkpoint directory")
+    return parser.parse_args()
+def main():
+    args = get_args()
+    eval_task = args.eval_task
+    lang = args.lang
+    gen_wav_dir = args.gen_wav_dir
+    metalst = rel_path + f"/data/seedtts_testset/{lang}/meta.lst"  # seed-tts testset
+    # NOTE. paraformer-zh result will be slightly different according to the number of gpus, cuz batchsize is different
+    #       zh 1.254 seems a result of 4 workers wer_seed_tts
+    gpus = list(range(args.gpu_nums))
+    test_set = get_seed_tts_test(metalst, gen_wav_dir, gpus)
+    local = args.local
+    if local:  # use local custom checkpoint dir
+        if lang == "zh":
+            asr_ckpt_dir = "../checkpoints/funasr"  # paraformer-zh dir under funasr
+        elif lang == "en":
+            asr_ckpt_dir = "../checkpoints/Systran/faster-whisper-large-v3"
+    else:
+        asr_ckpt_dir = ""  # auto download to cache dir
+    wavlm_ckpt_dir = "../checkpoints/UniSpeech/wavlm_large_finetune.pth"
+    # --------------------------- WER ---------------------------
+    if eval_task == "wer":
+        wers = []
+        with mp.Pool(processes=len(gpus)) as pool:
+            args = [(rank, lang, sub_test_set, asr_ckpt_dir) for (rank, sub_test_set) in test_set]
+            results = pool.map(run_asr_wer, args)
+            for wers_ in results:
+                wers.extend(wers_)
+        wer = round(np.mean(wers) * 100, 3)
+        print(f"\nTotal {len(wers)} samples")
+        print(f"WER      : {wer}%")
+    # --------------------------- SIM ---------------------------
+    if eval_task == "sim":
+        sim_list = []
+        with mp.Pool(processes=len(gpus)) as pool:
+            args = [(rank, sub_test_set, wavlm_ckpt_dir) for (rank, sub_test_set) in test_set]
+            results = pool.map(run_sim, args)
+            for sim_ in results:
+                sim_list.extend(sim_)
+        sim = round(sum(sim_list) / len(sim_list), 3)
+        print(f"\nTotal {len(sim_list)} samples")
+        print(f"SIM      : {sim}")
+if __name__ == "__main__":
+    main()