File size: 1,628 Bytes
f3ecff1 1dd0b5c f3ecff1 1dd0b5c f3ecff1 1dd0b5c f3ecff1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import os, sys
import argparse
from SenseVoiceAx import SenseVoiceAx
from tokenizer import SentencepiecesTokenizer
from print_utils import rich_transcription_postprocess, rich_print_asr_res
from download_utils import download_model
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument("--input", "-i", required=True, type=str, help="Input audio file")
parser.add_argument("--language", "-l", required=False, type=str, default="auto", choices=["auto", "zh", "en", "yue", "ja", "ko"])
return parser.parse_args()
def main():
args = get_args()
input_audio = args.input
language = args.language
use_itn = True # 标点符号预测
max_len = 68
model_path_root = download_model("SenseVoice")
model_path = os.path.join(model_path_root, "sensevoice_ax650", "sensevoice.axmodel")
bpemodel = os.path.join(model_path_root, "chn_jpn_yue_eng_ko_spectok.bpe.model")
assert os.path.exists(model_path), f"model {model_path} not exist"
print(f"input_audio: {input_audio}")
print(f"language: {language}")
print(f"use_itn: {use_itn}")
print(f"model_path: {model_path}")
tokenizer = SentencepiecesTokenizer(bpemodel=bpemodel)
pipeline = SenseVoiceAx(model_path,
max_len=max_len,
language=language,
use_itn=use_itn,
tokenizer=tokenizer)
asr_res = pipeline.infer(input_audio, print_rtf=True)
print([rich_transcription_postprocess(i) for i in asr_res])
# rich_print_asr_res(asr_res)
if __name__ == "__main__":
main() |