File size: 1,179 Bytes
9556d07 7edc534 c8c786a 9556d07 7edc534 9556d07 7edc534 d543411 7edc534 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import torch
from silero_vad import VADIterator
import time
class VoiceActivityController:
SAMPLING_RATE = 16000
def __init__(self):
self.model, _ = torch.hub.load(
repo_or_dir='snakers4/silero-vad',
model='silero_vad'
)
# we use the default options: 500ms silence, etc.
self.iterator = VADIterator(self.model)
def reset(self):
self.iterator.reset_states()
def __call__(self, audio):
'''
audio: audio chunk in the current np.array format
returns:
- { 'start': time_frame } ... when voice start was detected. time_frame is number of frame (can be converted to seconds)
- { 'end': time_frame } ... when voice end is detected
- None ... when no change detected by current chunk
'''
x = audio
# if not torch.is_tensor(x):
# try:
# x = torch.Tensor(x)
# except:
# raise TypeError("Audio cannot be casted to tensor. Cast it manually")
t = time.time()
a = self.iterator(x)
print("VAD took ",time.time()-t,"seconds")
return a
|