qfuxa commited on
Commit
3ccfa18
·
1 Parent(s): f86e733

correct error when using VAC

Browse files
whisperlivekit/whisper_streaming_custom/online_asr.py CHANGED
@@ -343,15 +343,15 @@ class OnlineASRProcessor:
343
  )
344
  sentences.append(sentence)
345
  return sentences
346
- def finish(self) -> Transcript:
 
347
  """
348
  Flush the remaining transcript when processing ends.
349
  """
350
  remaining_tokens = self.transcript_buffer.buffer
351
- final_transcript = self.concatenate_tokens(remaining_tokens)
352
- logger.debug(f"Final non-committed transcript: {final_transcript}")
353
  self.buffer_time_offset += len(self.audio_buffer) / self.SAMPLING_RATE
354
- return final_transcript
355
 
356
  def concatenate_tokens(
357
  self,
@@ -384,7 +384,8 @@ class VACOnlineASRProcessor:
384
  def __init__(self, online_chunk_size: float, *args, **kwargs):
385
  self.online_chunk_size = online_chunk_size
386
  self.online = OnlineASRProcessor(*args, **kwargs)
387
-
 
388
  # Load a VAD model (e.g. Silero VAD)
389
  import torch
390
  model, _ = torch.hub.load(repo_or_dir="snakers4/silero-vad", model="silero_vad")
@@ -455,7 +456,7 @@ class VACOnlineASRProcessor:
455
  self.buffer_offset += max(0, len(self.audio_buffer) - self.SAMPLING_RATE)
456
  self.audio_buffer = self.audio_buffer[-self.SAMPLING_RATE:]
457
 
458
- def process_iter(self) -> Transcript:
459
  """
460
  Depending on the VAD status and the amount of accumulated audio,
461
  process the current audio chunk.
@@ -467,9 +468,9 @@ class VACOnlineASRProcessor:
467
  return self.online.process_iter()
468
  else:
469
  logger.debug("No online update, only VAD")
470
- return Transcript(None, None, "")
471
 
472
- def finish(self) -> Transcript:
473
  """Finish processing by flushing any remaining text."""
474
  result = self.online.finish()
475
  self.current_online_chunk_buffer_size = 0
@@ -480,4 +481,4 @@ class VACOnlineASRProcessor:
480
  """
481
  Get the unvalidated buffer in string format.
482
  """
483
- return self.online.concatenate_tokens(self.online.transcript_buffer.buffer).text
 
343
  )
344
  sentences.append(sentence)
345
  return sentences
346
+
347
+ def finish(self) -> List[ASRToken]:
348
  """
349
  Flush the remaining transcript when processing ends.
350
  """
351
  remaining_tokens = self.transcript_buffer.buffer
352
+ logger.debug(f"Final non-committed tokens: {remaining_tokens}")
 
353
  self.buffer_time_offset += len(self.audio_buffer) / self.SAMPLING_RATE
354
+ return remaining_tokens
355
 
356
  def concatenate_tokens(
357
  self,
 
384
  def __init__(self, online_chunk_size: float, *args, **kwargs):
385
  self.online_chunk_size = online_chunk_size
386
  self.online = OnlineASRProcessor(*args, **kwargs)
387
+ self.asr = self.online.asr
388
+
389
  # Load a VAD model (e.g. Silero VAD)
390
  import torch
391
  model, _ = torch.hub.load(repo_or_dir="snakers4/silero-vad", model="silero_vad")
 
456
  self.buffer_offset += max(0, len(self.audio_buffer) - self.SAMPLING_RATE)
457
  self.audio_buffer = self.audio_buffer[-self.SAMPLING_RATE:]
458
 
459
+ def process_iter(self) -> List[ASRToken]:
460
  """
461
  Depending on the VAD status and the amount of accumulated audio,
462
  process the current audio chunk.
 
468
  return self.online.process_iter()
469
  else:
470
  logger.debug("No online update, only VAD")
471
+ return []
472
 
473
+ def finish(self) -> List[ASRToken]:
474
  """Finish processing by flushing any remaining text."""
475
  result = self.online.finish()
476
  self.current_online_chunk_buffer_size = 0
 
481
  """
482
  Get the unvalidated buffer in string format.
483
  """
484
+ return self.online.concatenate_tokens(self.online.transcript_buffer.buffer)