Spaces:
Runtime error
Runtime error
| from pprint import pformat | |
| from huggingface_hub import hf_hub_download | |
| import librosa | |
| import gradio as gr | |
| from pipeline import PreTrainedPipeline | |
| HF_HUB_URL = 'ales/wav2vec2-cv-be' | |
| LM_HUB_FP = 'language_model/cv8be_5gram.bin' | |
| MODEL_SAMPLING_RATE = 16_000 # 16kHz | |
| def main(audio_fp: str): | |
| # read audio file | |
| inputs = librosa.load(audio_fp, sr=MODEL_SAMPLING_RATE, mono=True)[0] | |
| # download Language Model from HF Hub | |
| lm_fp = hf_hub_download(repo_id=HF_HUB_URL, filename=LM_HUB_FP) | |
| # init pipeline | |
| pipeline = PreTrainedPipeline(model_path=HF_HUB_URL, language_model_fp=lm_fp) | |
| # recognize speech | |
| pipeline_res = pipeline(inputs=inputs) | |
| text = pipeline_res['text'][0] # unpack batch of size 1 | |
| # add technical information to the output | |
| tech_data = pipeline_res | |
| del tech_data['text'] | |
| tech_data['model_sampling_rate'] = MODEL_SAMPLING_RATE | |
| tech_data['inputs_shape'] = inputs.shape | |
| tech_data['inputs_max'] = inputs.max().item() | |
| tech_data['inputs_min'] = inputs.min().item() | |
| tech_data_str = pformat(tech_data) | |
| return text, tech_data_str | |
| article = """ | |
| The model used can be found here: [ales/wav2vec2-cv-be](https://huggingface.co/ales/wav2vec2-cv-be) | |
|  | |
| """ | |
| iface = gr.Interface( | |
| fn=main, | |
| inputs=gr.inputs.Audio( | |
| source='microphone', type='filepath', | |
| label='Запішыце аўдыяфайл, каб распазнаць маўленьне' | |
| ), | |
| outputs=[ | |
| gr.outputs.Textbox(type='str', label='Распазнаны тэкст'), | |
| gr.outputs.Textbox(type='str', label='Тэхнічная інфармацыя') | |
| ], | |
| title='wav2vec2 fine-tuned on CommonVoice 8 Be + Language Model', | |
| description=('Мадэль распазнаваньня беларускага маўленьня, навучаная на датсэце Common Voice 8.\n' | |
| 'Акустычная мадэль + моўная мадэль.' | |
| ), | |
| article=article, | |
| ) | |
| iface.launch(enable_queue=True) | |