osanseviero's picture
Update model.py
efb0bee
raw
history blame
1.4 kB
import numpy as np
from transformers import AutomaticSpeechRecognitionPipeline, AutoTokenizer, Wav2Vec2FeatureExtractor, Wav2Vec2ForCTC
from typing import Dict
from pathlib import Path
from datasets import load_dataset
class PreTrainedModel():
def __init__(self):
"""
Loads model and tokenizer from local directory
"""
current_file_path = Path(__file__)
model = Wav2Vec2ForCTC.from_pretrained(current_file_path.parent)
tokenizer = AutoTokenizer.from_pretrained(current_file_path.parent)
extractor = Wav2Vec2FeatureExtractor.from_pretrained(current_file_path.parent)
self.model = AutomaticSpeechRecognitionPipeline(model=model, feature_extractor=extractor, tokenizer=tokenizer)
def __call__(self, inputs)-> Dict[str, str]:
"""
Args:
inputs (:obj:`np.array`):
The raw waveform of audio received. By default at 16KHz.
Return:
A :obj:`dict`:. The object return should be liked {"text": "XXX"} containing
the detected text from the input audio.
"""
return self.model(inputs)
"""
# Just an example using this.
model = PreTrainedModel()
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
filename = ds[0]["file"]
with open(filename, "rb") as f:
data = f.read()
print(model(data))
"""