Michele De Stefano
Adapted the code so that it can run locally
1b8aef5
import importlib.resources
import json
import torch
from pathlib import Path
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
from question_retriever import get_question
from tools.data_helpers import get_file_path
__resources_path = Path(str(importlib.resources.files("data")))
def test_whisper() -> None:
task_id = "1f975693-876d-457b-a649-393859e79bf3"
question = json.loads(get_question(task_id=task_id))
audio_file = get_file_path(file_name=question["file_name"])
# cuda_available = torch.cuda.is_available()
cuda_available = False
device = "cuda:0" if cuda_available else "cpu"
torch_dtype = torch.float16 if cuda_available else torch.float32
model_id = "openai/whisper-large-v3-turbo"
model = AutoModelForSpeechSeq2Seq.from_pretrained(
model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
)
model.to(device)
processor = AutoProcessor.from_pretrained(model_id)
pipe = pipeline(
"automatic-speech-recognition",
model=model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
torch_dtype=torch_dtype,
device=device,
)
sample = audio_file
generate_kwargs = {
"return_timestamps": True,
}
result = pipe(sample, generate_kwargs=generate_kwargs)
print(result["text"])