File size: 1,179 Bytes
9556d07
7edc534
 
c8c786a
9556d07
7edc534
 
 
9556d07
 
 
7edc534
 
 
 
 
 
 
 
 
 
 
 
 
 
d543411
7edc534
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import torch
from silero_vad import VADIterator
import time

class VoiceActivityController:
    SAMPLING_RATE = 16000
    def __init__(self):
        self.model, _ = torch.hub.load(
            repo_or_dir='snakers4/silero-vad',
            model='silero_vad'
        )
        # we use the default options: 500ms silence, etc.
        self.iterator = VADIterator(self.model)

    def reset(self):
        self.iterator.reset_states()

    def __call__(self, audio):
        '''
        audio: audio chunk in the current np.array format
        returns: 
        - { 'start': time_frame } ... when voice start was detected. time_frame is number of frame (can be converted to seconds)
        - { 'end': time_frame }   ... when voice end is detected
        - None                    ... when no change detected by current chunk 
        '''
        x = audio
#        if not torch.is_tensor(x):
#            try:
#                x = torch.Tensor(x)
#            except:
#                raise TypeError("Audio cannot be casted to tensor. Cast it manually")
        t = time.time()
        a = self.iterator(x)
        print("VAD took ",time.time()-t,"seconds")
        return a