Spaces:

ales
/

wav2vec2-cv-be-lm

Runtime error

App Files Files Community

wav2vec2-cv-be-lm / app.py

ales

change audio loading

51f7123 over 3 years ago

raw

history blame

2.35 kB

	from pprint import pformat

	from huggingface_hub import hf_hub_download

	import datasets as hfd

	import gradio as gr

	from pipeline import PreTrainedPipeline


	HF_HUB_URL = 'ales/wav2vec2-cv-be'
	LM_HUB_FP = 'language_model/cv8be_5gram.bin'


	def main(audio_fp: str):
	# read and preprocess audio with huggingface.datasets
	ds = hfd.Dataset.from_dict({'path': [audio_fp]})
	ds = ds.cast_column('path', hfd.Audio(sampling_rate=16_000, mono=True))
	ds = ds.rename_column('path', 'audio')
	inputs = ds[0]['audio']['array']
	sampling_rate = ds[0]['audio']['sampling_rate']

	# download Language Model from HF Hub
	lm_fp = hf_hub_download(repo_id=HF_HUB_URL, filename=LM_HUB_FP)

	# init pipeline
	pipeline = PreTrainedPipeline(model_path=HF_HUB_URL, language_model_fp=lm_fp)

	# recognize speech
	pipeline_res = pipeline(inputs=inputs)
	text = pipeline_res['text'][0] # unpack batch of size 1

	# add technical information to the output
	tech_data = pipeline_res
	del tech_data['text']
	tech_data['sampling_rate_orig'] = sampling_rate
	tech_data['inputs_shape'] = inputs.shape
	tech_data['inputs_max'] = inputs.max().item()
	tech_data['inputs_min'] = inputs.min().item()

	tech_data_str = pformat(tech_data)

	return text, tech_data_str

	article = """
	The model used can be found here: [ales/wav2vec2-cv-be](https://huggingface.co/ales/wav2vec2-cv-be)

	![visitors](https://visitor-badge.glitch.me/badge?page_id=huggingface.co/spaces/ales/wav2vec2-cv-be-lm&left_color=darkgray&right_color=crimson)
	"""

	iface = gr.Interface(
	fn=main,
	inputs=gr.inputs.Audio(
	source='microphone', type='filepath',
	label='Запішыце аўдыяфайл, каб распазнаць маўленьне'
	),
	outputs=[
	gr.outputs.Textbox(type='str', label='Распазнаны тэкст'),
	gr.outputs.Textbox(type='str', label='Тэхнічная інфармацыя')
	],
	title='wav2vec2 fine-tuned on CommonVoice 8 Be + Language Model',
	description=('Мадэль распазнаваньня беларускага маўленьня, навучаная на датсэце Common Voice 8.\n'
	'Акустычная мадэль + моўная мадэль.'
	),
	article=article,
	)

	iface.launch(enable_queue=True)