SilasKieser commited on
Commit
33d0ade
·
1 Parent(s): a6bbce6

chunk at sentence takes now an argument =self.comited

Browse files
Files changed (1) hide show
  1. src/whisper_streaming/online_asr.py +10 -7
src/whisper_streaming/online_asr.py CHANGED
@@ -173,9 +173,11 @@ class OnlineASRProcessor:
173
 
174
  self.transcript_buffer.insert(tsw, self.buffer_time_offset)
175
  o = self.transcript_buffer.flush()
 
176
  self.commited.extend(o)
177
  completed = self.to_flush(o)
178
  logger.debug(f">>>>COMPLETE NOW: {completed[2]}")
 
179
  the_rest = self.to_flush(self.transcript_buffer.complete())
180
  logger.debug(f"INCOMPLETE: {the_rest[2]}")
181
 
@@ -183,11 +185,12 @@ class OnlineASRProcessor:
183
 
184
  if self.buffer_trimming_way == "sentence":
185
 
186
- self.chunk_completed_sentence()
187
 
188
 
189
 
190
-
 
191
 
192
 
193
  if len(self.audio_buffer) / self.SAMPLING_RATE > self.buffer_trimming_sec :
@@ -219,13 +222,13 @@ class OnlineASRProcessor:
219
 
220
 
221
 
222
- return self.to_flush(o)
223
 
224
- def chunk_completed_sentence(self):
225
- if self.commited == []:
226
  return
227
 
228
- sents = self.words_to_sentences(self.commited)
229
 
230
 
231
 
@@ -436,7 +439,7 @@ class VACOnlineASRProcessor(OnlineASRProcessor):
436
  ret = self.online.process_iter()
437
  return ret
438
  else:
439
- print("no online update, only VAD", self.status, file=self.logfile)
440
  return (None, None, "")
441
 
442
  def finish(self):
 
173
 
174
  self.transcript_buffer.insert(tsw, self.buffer_time_offset)
175
  o = self.transcript_buffer.flush()
176
+ # Completed words
177
  self.commited.extend(o)
178
  completed = self.to_flush(o)
179
  logger.debug(f">>>>COMPLETE NOW: {completed[2]}")
180
+ ## The rest is incomplete
181
  the_rest = self.to_flush(self.transcript_buffer.complete())
182
  logger.debug(f"INCOMPLETE: {the_rest[2]}")
183
 
 
185
 
186
  if self.buffer_trimming_way == "sentence":
187
 
188
+ self.chunk_completed_sentence(self.commited)
189
 
190
 
191
 
192
+ # TODO: new words in `completed` should not be reterned unless they form a sentence
193
+ # TODO: only complete sentences should go to completed
194
 
195
 
196
  if len(self.audio_buffer) / self.SAMPLING_RATE > self.buffer_trimming_sec :
 
222
 
223
 
224
 
225
+ return completed
226
 
227
+ def chunk_completed_sentence(self, commited_text):
228
+ if commited_text == []:
229
  return
230
 
231
+ sents = self.words_to_sentences(commited_text)
232
 
233
 
234
 
 
439
  ret = self.online.process_iter()
440
  return ret
441
  else:
442
+ logger.debug("no online update, only VAD")
443
  return (None, None, "")
444
 
445
  def finish(self):