|
import torch |
|
from silero_vad import VADIterator |
|
import time |
|
|
|
class VoiceActivityController: |
|
SAMPLING_RATE = 16000 |
|
def __init__(self): |
|
self.model, _ = torch.hub.load( |
|
repo_or_dir='snakers4/silero-vad', |
|
model='silero_vad' |
|
) |
|
|
|
self.iterator = VADIterator(self.model) |
|
|
|
def reset(self): |
|
self.iterator.reset_states() |
|
|
|
def __call__(self, audio): |
|
''' |
|
audio: audio chunk in the current np.array format |
|
returns: |
|
- { 'start': time_frame } ... when voice start was detected. time_frame is number of frame (can be converted to seconds) |
|
- { 'end': time_frame } ... when voice end is detected |
|
- None ... when no change detected by current chunk |
|
''' |
|
x = audio |
|
|
|
|
|
|
|
|
|
|
|
t = time.time() |
|
a = self.iterator(x) |
|
print("VAD took ",time.time()-t,"seconds") |
|
return a |
|
|