Spaces:

langtech-innovation
/

WhisperLiveKitDiarization

Paused

SilasKieser commited on Jan 27

Commit

33d0ade

1 Parent(s): a6bbce6

chunk at sentence takes now an argument =self.comited

Files changed (1) hide show

src/whisper_streaming/online_asr.py CHANGED Viewed

@@ -173,9 +173,11 @@ class OnlineASRProcessor:
         self.transcript_buffer.insert(tsw, self.buffer_time_offset)
         o = self.transcript_buffer.flush()
         self.commited.extend(o)
         completed = self.to_flush(o)
         logger.debug(f">>>>COMPLETE NOW: {completed[2]}")
         the_rest = self.to_flush(self.transcript_buffer.complete())
         logger.debug(f"INCOMPLETE: {the_rest[2]}")
@@ -183,11 +185,12 @@ class OnlineASRProcessor:
         if self.buffer_trimming_way == "sentence":
-            self.chunk_completed_sentence()
         if len(self.audio_buffer) / self.SAMPLING_RATE > self.buffer_trimming_sec :
@@ -219,13 +222,13 @@ class OnlineASRProcessor:
-        return self.to_flush(o)
-    def chunk_completed_sentence(self):
-        if self.commited == []:
             return
-        sents = self.words_to_sentences(self.commited)
@@ -436,7 +439,7 @@ class VACOnlineASRProcessor(OnlineASRProcessor):
             ret = self.online.process_iter()
             return ret
         else:
-            print("no online update, only VAD", self.status, file=self.logfile)
             return (None, None, "")
     def finish(self):

         self.transcript_buffer.insert(tsw, self.buffer_time_offset)
         o = self.transcript_buffer.flush()
+        # Completed words
         self.commited.extend(o)
         completed = self.to_flush(o)
         logger.debug(f">>>>COMPLETE NOW: {completed[2]}")
+        ## The rest is incomplete
         the_rest = self.to_flush(self.transcript_buffer.complete())
         logger.debug(f"INCOMPLETE: {the_rest[2]}")
         if self.buffer_trimming_way == "sentence":
+            self.chunk_completed_sentence(self.commited)
+        # TODO: new words in `completed` should not be reterned unless they form a sentence
+        # TODO: only complete sentences should go to completed
         if len(self.audio_buffer) / self.SAMPLING_RATE > self.buffer_trimming_sec :
+        return completed
+    def chunk_completed_sentence(self, commited_text):
+        if commited_text == []:
             return
+        sents = self.words_to_sentences(commited_text)
             ret = self.online.process_iter()
             return ret
         else:
+            logger.debug("no online update, only VAD")
             return (None, None, "")
     def finish(self):