Commit
·
33d0ade
1
Parent(s):
a6bbce6
chunk at sentence takes now an argument =self.comited
Browse files
src/whisper_streaming/online_asr.py
CHANGED
@@ -173,9 +173,11 @@ class OnlineASRProcessor:
|
|
173 |
|
174 |
self.transcript_buffer.insert(tsw, self.buffer_time_offset)
|
175 |
o = self.transcript_buffer.flush()
|
|
|
176 |
self.commited.extend(o)
|
177 |
completed = self.to_flush(o)
|
178 |
logger.debug(f">>>>COMPLETE NOW: {completed[2]}")
|
|
|
179 |
the_rest = self.to_flush(self.transcript_buffer.complete())
|
180 |
logger.debug(f"INCOMPLETE: {the_rest[2]}")
|
181 |
|
@@ -183,11 +185,12 @@ class OnlineASRProcessor:
|
|
183 |
|
184 |
if self.buffer_trimming_way == "sentence":
|
185 |
|
186 |
-
self.chunk_completed_sentence()
|
187 |
|
188 |
|
189 |
|
190 |
-
|
|
|
191 |
|
192 |
|
193 |
if len(self.audio_buffer) / self.SAMPLING_RATE > self.buffer_trimming_sec :
|
@@ -219,13 +222,13 @@ class OnlineASRProcessor:
|
|
219 |
|
220 |
|
221 |
|
222 |
-
return
|
223 |
|
224 |
-
def chunk_completed_sentence(self):
|
225 |
-
if
|
226 |
return
|
227 |
|
228 |
-
sents = self.words_to_sentences(
|
229 |
|
230 |
|
231 |
|
@@ -436,7 +439,7 @@ class VACOnlineASRProcessor(OnlineASRProcessor):
|
|
436 |
ret = self.online.process_iter()
|
437 |
return ret
|
438 |
else:
|
439 |
-
|
440 |
return (None, None, "")
|
441 |
|
442 |
def finish(self):
|
|
|
173 |
|
174 |
self.transcript_buffer.insert(tsw, self.buffer_time_offset)
|
175 |
o = self.transcript_buffer.flush()
|
176 |
+
# Completed words
|
177 |
self.commited.extend(o)
|
178 |
completed = self.to_flush(o)
|
179 |
logger.debug(f">>>>COMPLETE NOW: {completed[2]}")
|
180 |
+
## The rest is incomplete
|
181 |
the_rest = self.to_flush(self.transcript_buffer.complete())
|
182 |
logger.debug(f"INCOMPLETE: {the_rest[2]}")
|
183 |
|
|
|
185 |
|
186 |
if self.buffer_trimming_way == "sentence":
|
187 |
|
188 |
+
self.chunk_completed_sentence(self.commited)
|
189 |
|
190 |
|
191 |
|
192 |
+
# TODO: new words in `completed` should not be reterned unless they form a sentence
|
193 |
+
# TODO: only complete sentences should go to completed
|
194 |
|
195 |
|
196 |
if len(self.audio_buffer) / self.SAMPLING_RATE > self.buffer_trimming_sec :
|
|
|
222 |
|
223 |
|
224 |
|
225 |
+
return completed
|
226 |
|
227 |
+
def chunk_completed_sentence(self, commited_text):
|
228 |
+
if commited_text == []:
|
229 |
return
|
230 |
|
231 |
+
sents = self.words_to_sentences(commited_text)
|
232 |
|
233 |
|
234 |
|
|
|
439 |
ret = self.online.process_iter()
|
440 |
return ret
|
441 |
else:
|
442 |
+
logger.debug("no online update, only VAD")
|
443 |
return (None, None, "")
|
444 |
|
445 |
def finish(self):
|