osanseviero
/

asr-with-transformers-wav2vec2

Automatic Speech Recognition

Model card Files Files and versions Community

asr-with-transformers-wav2vec2 / model.py

osanseviero's picture

Update model.py

efb0bee about 4 years ago

1.4 kB

	import numpy as np

	from transformers import AutomaticSpeechRecognitionPipeline, AutoTokenizer, Wav2Vec2FeatureExtractor, Wav2Vec2ForCTC
	from typing import Dict
	from pathlib import Path

	from datasets import load_dataset


	class PreTrainedModel():
	def __init__(self):
	"""
	Loads model and tokenizer from local directory
	"""
	current_file_path = Path(__file__)
	model = Wav2Vec2ForCTC.from_pretrained(current_file_path.parent)
	tokenizer = AutoTokenizer.from_pretrained(current_file_path.parent)
	extractor = Wav2Vec2FeatureExtractor.from_pretrained(current_file_path.parent)

	self.model = AutomaticSpeechRecognitionPipeline(model=model, feature_extractor=extractor, tokenizer=tokenizer)

	def __call__(self, inputs)-> Dict[str, str]:
	"""
	Args:
	inputs (:obj:`np.array`):
	The raw waveform of audio received. By default at 16KHz.
	Return:
	A :obj:`dict`:. The object return should be liked {"text": "XXX"} containing
	the detected text from the input audio.
	"""
	return self.model(inputs)


	"""
	# Just an example using this.

	model = PreTrainedModel()
	ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
	filename = ds[0]["file"]
	with open(filename, "rb") as f:
	data = f.read()
	print(model(data))
	"""