correct error when using VAC
Browse files
whisperlivekit/whisper_streaming_custom/online_asr.py
CHANGED
@@ -343,15 +343,15 @@ class OnlineASRProcessor:
|
|
343 |
)
|
344 |
sentences.append(sentence)
|
345 |
return sentences
|
346 |
-
|
|
|
347 |
"""
|
348 |
Flush the remaining transcript when processing ends.
|
349 |
"""
|
350 |
remaining_tokens = self.transcript_buffer.buffer
|
351 |
-
|
352 |
-
logger.debug(f"Final non-committed transcript: {final_transcript}")
|
353 |
self.buffer_time_offset += len(self.audio_buffer) / self.SAMPLING_RATE
|
354 |
-
return
|
355 |
|
356 |
def concatenate_tokens(
|
357 |
self,
|
@@ -384,7 +384,8 @@ class VACOnlineASRProcessor:
|
|
384 |
def __init__(self, online_chunk_size: float, *args, **kwargs):
|
385 |
self.online_chunk_size = online_chunk_size
|
386 |
self.online = OnlineASRProcessor(*args, **kwargs)
|
387 |
-
|
|
|
388 |
# Load a VAD model (e.g. Silero VAD)
|
389 |
import torch
|
390 |
model, _ = torch.hub.load(repo_or_dir="snakers4/silero-vad", model="silero_vad")
|
@@ -455,7 +456,7 @@ class VACOnlineASRProcessor:
|
|
455 |
self.buffer_offset += max(0, len(self.audio_buffer) - self.SAMPLING_RATE)
|
456 |
self.audio_buffer = self.audio_buffer[-self.SAMPLING_RATE:]
|
457 |
|
458 |
-
def process_iter(self) ->
|
459 |
"""
|
460 |
Depending on the VAD status and the amount of accumulated audio,
|
461 |
process the current audio chunk.
|
@@ -467,9 +468,9 @@ class VACOnlineASRProcessor:
|
|
467 |
return self.online.process_iter()
|
468 |
else:
|
469 |
logger.debug("No online update, only VAD")
|
470 |
-
return
|
471 |
|
472 |
-
def finish(self) ->
|
473 |
"""Finish processing by flushing any remaining text."""
|
474 |
result = self.online.finish()
|
475 |
self.current_online_chunk_buffer_size = 0
|
@@ -480,4 +481,4 @@ class VACOnlineASRProcessor:
|
|
480 |
"""
|
481 |
Get the unvalidated buffer in string format.
|
482 |
"""
|
483 |
-
return self.online.concatenate_tokens(self.online.transcript_buffer.buffer)
|
|
|
343 |
)
|
344 |
sentences.append(sentence)
|
345 |
return sentences
|
346 |
+
|
347 |
+
def finish(self) -> List[ASRToken]:
|
348 |
"""
|
349 |
Flush the remaining transcript when processing ends.
|
350 |
"""
|
351 |
remaining_tokens = self.transcript_buffer.buffer
|
352 |
+
logger.debug(f"Final non-committed tokens: {remaining_tokens}")
|
|
|
353 |
self.buffer_time_offset += len(self.audio_buffer) / self.SAMPLING_RATE
|
354 |
+
return remaining_tokens
|
355 |
|
356 |
def concatenate_tokens(
|
357 |
self,
|
|
|
384 |
def __init__(self, online_chunk_size: float, *args, **kwargs):
|
385 |
self.online_chunk_size = online_chunk_size
|
386 |
self.online = OnlineASRProcessor(*args, **kwargs)
|
387 |
+
self.asr = self.online.asr
|
388 |
+
|
389 |
# Load a VAD model (e.g. Silero VAD)
|
390 |
import torch
|
391 |
model, _ = torch.hub.load(repo_or_dir="snakers4/silero-vad", model="silero_vad")
|
|
|
456 |
self.buffer_offset += max(0, len(self.audio_buffer) - self.SAMPLING_RATE)
|
457 |
self.audio_buffer = self.audio_buffer[-self.SAMPLING_RATE:]
|
458 |
|
459 |
+
def process_iter(self) -> List[ASRToken]:
|
460 |
"""
|
461 |
Depending on the VAD status and the amount of accumulated audio,
|
462 |
process the current audio chunk.
|
|
|
468 |
return self.online.process_iter()
|
469 |
else:
|
470 |
logger.debug("No online update, only VAD")
|
471 |
+
return []
|
472 |
|
473 |
+
def finish(self) -> List[ASRToken]:
|
474 |
"""Finish processing by flushing any remaining text."""
|
475 |
result = self.online.finish()
|
476 |
self.current_online_chunk_buffer_size = 0
|
|
|
481 |
"""
|
482 |
Get the unvalidated buffer in string format.
|
483 |
"""
|
484 |
+
return self.online.concatenate_tokens(self.online.transcript_buffer.buffer)
|