Spaces:
Runtime error
Runtime error
File size: 2,345 Bytes
aca9f3d feb2a2b 51f7123 feb2a2b ded23d4 51f7123 feb2a2b 44daa8d feb2a2b 44daa8d 51f7123 3702096 44daa8d d71b5df 44daa8d feb2a2b 5b4ea6e a2498ab 5b4ea6e feb2a2b ded23d4 d71b5df 1022fd5 ded23d4 44daa8d 1022fd5 44daa8d cff8d27 1022fd5 5b4ea6e feb2a2b 5b4ea6e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
from pprint import pformat
from huggingface_hub import hf_hub_download
import datasets as hfd
import gradio as gr
from pipeline import PreTrainedPipeline
HF_HUB_URL = 'ales/wav2vec2-cv-be'
LM_HUB_FP = 'language_model/cv8be_5gram.bin'
def main(audio_fp: str):
# read and preprocess audio with huggingface.datasets
ds = hfd.Dataset.from_dict({'path': [audio_fp]})
ds = ds.cast_column('path', hfd.Audio(sampling_rate=16_000, mono=True))
ds = ds.rename_column('path', 'audio')
inputs = ds[0]['audio']['array']
sampling_rate = ds[0]['audio']['sampling_rate']
# download Language Model from HF Hub
lm_fp = hf_hub_download(repo_id=HF_HUB_URL, filename=LM_HUB_FP)
# init pipeline
pipeline = PreTrainedPipeline(model_path=HF_HUB_URL, language_model_fp=lm_fp)
# recognize speech
pipeline_res = pipeline(inputs=inputs)
text = pipeline_res['text'][0] # unpack batch of size 1
# add technical information to the output
tech_data = pipeline_res
del tech_data['text']
tech_data['sampling_rate_orig'] = sampling_rate
tech_data['inputs_shape'] = inputs.shape
tech_data['inputs_max'] = inputs.max().item()
tech_data['inputs_min'] = inputs.min().item()
tech_data_str = pformat(tech_data)
return text, tech_data_str
article = """
The model used can be found here: [ales/wav2vec2-cv-be](https://huggingface.co/ales/wav2vec2-cv-be)

"""
iface = gr.Interface(
fn=main,
inputs=gr.inputs.Audio(
source='microphone', type='filepath',
label='Запішыце аўдыяфайл, каб распазнаць маўленьне'
),
outputs=[
gr.outputs.Textbox(type='str', label='Распазнаны тэкст'),
gr.outputs.Textbox(type='str', label='Тэхнічная інфармацыя')
],
title='wav2vec2 fine-tuned on CommonVoice 8 Be + Language Model',
description=('Мадэль распазнаваньня беларускага маўленьня, навучаная на датсэце Common Voice 8.\n'
'Акустычная мадэль + моўная мадэль.'
),
article=article,
)
iface.launch(enable_queue=True)
|