File size: 1,313 Bytes
617df14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import numpy as np
import logging
from scipy import signal

logger = logging.getLogger(__name__)

def warmup_model(processor, model, device, np_dtype, torch_dtype, logger):
    # Warm up the model with empty audio
    logger.info("Warming up Whisper model with dummy input")
    warmup_audio = np.zeros((16000,), dtype=np_dtype)  # 1s of silence
    input_features = processor(warmup_audio, sampling_rate=16000, return_tensors="pt").input_features
    input_features = input_features.to(device=device, dtype=torch_dtype)  # Convert to correct dtype
    model.generate(input_features)
    logger.info("Model warmup complete")


def resample_audio(audio, sample_rate):
    # Check if audio is of type np.int16 and convert to np.float32 in range -1 to 1
    if audio.dtype == np.int16:
        logger.info("Converting audio from np.int16 to np.float32 and normalizing to range -1 to 1")
        audio = audio.squeeze()
        audio = audio.astype(np.float32) / 32768.0  # 32768 is the max value for int16

    if sample_rate != 16000:
        logger.info(f"Resampling audio from {sample_rate}Hz to 16000Hz len:{len(audio)}")
        number_of_samples = round(len(audio) * float(16000) / sample_rate)
        audio = signal.resample(audio, number_of_samples)
        sample_rate = 16000
    return audio, sample_rate