|
import os, sys |
|
import argparse |
|
from SenseVoiceAx import SenseVoiceAx |
|
from tokenizer import SentencepiecesTokenizer |
|
from print_utils import rich_transcription_postprocess, rich_print_asr_res |
|
from download_utils import download_model |
|
|
|
|
|
def get_args(): |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument("--input", "-i", required=True, type=str, help="Input audio file") |
|
parser.add_argument("--language", "-l", required=False, type=str, default="auto", choices=["auto", "zh", "en", "yue", "ja", "ko"]) |
|
return parser.parse_args() |
|
|
|
|
|
def main(): |
|
args = get_args() |
|
|
|
input_audio = args.input |
|
language = args.language |
|
use_itn = True |
|
max_len = 68 |
|
|
|
model_path_root = download_model("SenseVoice") |
|
model_path = os.path.join(model_path_root, "sensevoice_ax650", "sensevoice.axmodel") |
|
bpemodel = os.path.join(model_path_root, "chn_jpn_yue_eng_ko_spectok.bpe.model") |
|
|
|
assert os.path.exists(model_path), f"model {model_path} not exist" |
|
|
|
print(f"input_audio: {input_audio}") |
|
print(f"language: {language}") |
|
print(f"use_itn: {use_itn}") |
|
print(f"model_path: {model_path}") |
|
|
|
tokenizer = SentencepiecesTokenizer(bpemodel=bpemodel) |
|
pipeline = SenseVoiceAx(model_path, |
|
max_len=max_len, |
|
language=language, |
|
use_itn=use_itn, |
|
tokenizer=tokenizer) |
|
asr_res = pipeline.infer(input_audio, print_rtf=True) |
|
print([rich_transcription_postprocess(i) for i in asr_res]) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |