Spaces:
Runtime error
Runtime error
| from funasr import AutoModel | |
| from funasr.utils.postprocess_utils import rich_transcription_postprocess | |
| from modelscope import snapshot_download | |
| import datetime | |
| import math | |
| import io | |
| import os | |
| import tempfile | |
| import json | |
| from typing import Optional | |
| import torch | |
| import gradio as gr | |
| from config import model_config | |
| device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
| model_dir = snapshot_download(model_config['model_dir']) | |
| model = AutoModel( | |
| model=model_dir, | |
| trust_remote_code=False, | |
| remote_code="./model.py", | |
| vad_model="fsmn-vad", | |
| punc_model="ct-punc", | |
| spk_model="cam++", | |
| vad_kwargs={"max_single_segment_time": 15000}, | |
| ncpu=torch.get_num_threads(), | |
| batch_size=1, | |
| hub="hf", | |
| device=device, | |
| ) | |
| def transcribe_audio(file_path, vad_model="fsmn-vad", punc_model="ct-punc", spk_model="cam++", vad_kwargs='{"max_single_segment_time": 15000}', | |
| batch_size=1, language="auto", use_itn=True, batch_size_s=60, | |
| merge_vad=True, merge_length_s=15, batch_size_threshold_s=50, | |
| hotword=" ", ban_emo_unk=True): | |
| try: | |
| vad_kwargs = json.loads(vad_kwargs) | |
| temp_file_path = file_path | |
| res = model.generate( | |
| input=temp_file_path, | |
| cache={}, | |
| language=language, | |
| use_itn=use_itn, | |
| batch_size_s=batch_size_s, | |
| merge_vad=merge_vad, | |
| merge_length_s=merge_length_s, | |
| batch_size_threshold_s=batch_size_threshold_s, | |
| hotword=hotword, | |
| ban_emo_unk=ban_emo_unk | |
| ) | |
| text = rich_transcription_postprocess(res[0]["text"]) | |
| return text | |
| except Exception as e: | |
| return str(e) | |
| inputs = [ | |
| gr.Audio(type="filepath"), | |
| gr.Textbox(value="fsmn-vad", label="VAD Model"), | |
| gr.Textbox(value="ct-punc", label="PUNC Model"), | |
| gr.Textbox(value="cam++", label="SPK Model"), | |
| gr.Textbox(value='{"max_single_segment_time": 15000}', label="VAD Kwargs"), | |
| gr.Slider(1, 10, value=1, step=1, label="Batch Size"), | |
| gr.Textbox(value="auto", label="Language"), | |
| gr.Checkbox(value=True, label="Use ITN"), | |
| gr.Slider(30, 120, value=60, step=1, label="Batch Size (seconds)"), | |
| gr.Checkbox(value=True, label="Merge VAD"), | |
| gr.Slider(5, 60, value=15, step=1, label="Merge Length (seconds)"), | |
| gr.Slider(10, 100, value=50, step=1, label="Batch Size Threshold (seconds)"), | |
| gr.Textbox(value=" ", label="Hotword"), | |
| gr.Checkbox(value=True, label="Ban Emotional Unknown"), | |
| ] | |
| outputs = gr.Textbox(label="Transcription") | |
| gr.Interface( | |
| fn=transcribe_audio, | |
| inputs=inputs, | |
| outputs=outputs, | |
| title="ASR Transcription with FunASR" | |
| ).launch() |