Commit
·
33d0ade
1
Parent(s):
a6bbce6
chunk at sentence takes now an argument =self.comited
Browse files
src/whisper_streaming/online_asr.py
CHANGED
|
@@ -173,9 +173,11 @@ class OnlineASRProcessor:
|
|
| 173 |
|
| 174 |
self.transcript_buffer.insert(tsw, self.buffer_time_offset)
|
| 175 |
o = self.transcript_buffer.flush()
|
|
|
|
| 176 |
self.commited.extend(o)
|
| 177 |
completed = self.to_flush(o)
|
| 178 |
logger.debug(f">>>>COMPLETE NOW: {completed[2]}")
|
|
|
|
| 179 |
the_rest = self.to_flush(self.transcript_buffer.complete())
|
| 180 |
logger.debug(f"INCOMPLETE: {the_rest[2]}")
|
| 181 |
|
|
@@ -183,11 +185,12 @@ class OnlineASRProcessor:
|
|
| 183 |
|
| 184 |
if self.buffer_trimming_way == "sentence":
|
| 185 |
|
| 186 |
-
self.chunk_completed_sentence()
|
| 187 |
|
| 188 |
|
| 189 |
|
| 190 |
-
|
|
|
|
| 191 |
|
| 192 |
|
| 193 |
if len(self.audio_buffer) / self.SAMPLING_RATE > self.buffer_trimming_sec :
|
|
@@ -219,13 +222,13 @@ class OnlineASRProcessor:
|
|
| 219 |
|
| 220 |
|
| 221 |
|
| 222 |
-
return
|
| 223 |
|
| 224 |
-
def chunk_completed_sentence(self):
|
| 225 |
-
if
|
| 226 |
return
|
| 227 |
|
| 228 |
-
sents = self.words_to_sentences(
|
| 229 |
|
| 230 |
|
| 231 |
|
|
@@ -436,7 +439,7 @@ class VACOnlineASRProcessor(OnlineASRProcessor):
|
|
| 436 |
ret = self.online.process_iter()
|
| 437 |
return ret
|
| 438 |
else:
|
| 439 |
-
|
| 440 |
return (None, None, "")
|
| 441 |
|
| 442 |
def finish(self):
|
|
|
|
| 173 |
|
| 174 |
self.transcript_buffer.insert(tsw, self.buffer_time_offset)
|
| 175 |
o = self.transcript_buffer.flush()
|
| 176 |
+
# Completed words
|
| 177 |
self.commited.extend(o)
|
| 178 |
completed = self.to_flush(o)
|
| 179 |
logger.debug(f">>>>COMPLETE NOW: {completed[2]}")
|
| 180 |
+
## The rest is incomplete
|
| 181 |
the_rest = self.to_flush(self.transcript_buffer.complete())
|
| 182 |
logger.debug(f"INCOMPLETE: {the_rest[2]}")
|
| 183 |
|
|
|
|
| 185 |
|
| 186 |
if self.buffer_trimming_way == "sentence":
|
| 187 |
|
| 188 |
+
self.chunk_completed_sentence(self.commited)
|
| 189 |
|
| 190 |
|
| 191 |
|
| 192 |
+
# TODO: new words in `completed` should not be reterned unless they form a sentence
|
| 193 |
+
# TODO: only complete sentences should go to completed
|
| 194 |
|
| 195 |
|
| 196 |
if len(self.audio_buffer) / self.SAMPLING_RATE > self.buffer_trimming_sec :
|
|
|
|
| 222 |
|
| 223 |
|
| 224 |
|
| 225 |
+
return completed
|
| 226 |
|
| 227 |
+
def chunk_completed_sentence(self, commited_text):
|
| 228 |
+
if commited_text == []:
|
| 229 |
return
|
| 230 |
|
| 231 |
+
sents = self.words_to_sentences(commited_text)
|
| 232 |
|
| 233 |
|
| 234 |
|
|
|
|
| 439 |
ret = self.online.process_iter()
|
| 440 |
return ret
|
| 441 |
else:
|
| 442 |
+
logger.debug("no online update, only VAD")
|
| 443 |
return (None, None, "")
|
| 444 |
|
| 445 |
def finish(self):
|