qfuxa commited on
Commit
beb3190
·
2 Parent(s): bfbbded 9413b32

Merge pull request #20 from SilasK/clean-main

Browse files

In my limited experience with french "" should also be the sep for mlx-whisper

src/whisper_streaming/backends.py CHANGED
@@ -164,7 +164,7 @@ class MLXWhisper(ASRBase):
164
  Significantly faster than faster-whisper (without CUDA) on Apple M1.
165
  """
166
 
167
- sep = " "
168
 
169
  def load_model(self, modelsize=None, cache_dir=None, model_dir=None):
170
  """
 
164
  Significantly faster than faster-whisper (without CUDA) on Apple M1.
165
  """
166
 
167
+ sep = "" # In my experience in french it should also be no space.
168
 
169
  def load_model(self, modelsize=None, cache_dir=None, model_dir=None):
170
  """
src/whisper_streaming/online_asr.py CHANGED
@@ -166,7 +166,13 @@ class OnlineASRProcessor:
166
  if (
167
  len(self.audio_buffer) / self.SAMPLING_RATE > self.buffer_trimming_sec
168
  ): # longer than this
 
 
169
  self.chunk_completed_sentence()
 
 
 
 
170
 
171
  if self.buffer_trimming_way == "segment":
172
  s = self.buffer_trimming_sec # trim the completed segments longer than s,
@@ -194,7 +200,10 @@ class OnlineASRProcessor:
194
  def chunk_completed_sentence(self):
195
  if self.commited == []:
196
  return
197
- logger.debug("COMPLETED SENTENCE: ", [s[2] for s in self.commited])
 
 
 
198
  sents = self.words_to_sentences(self.commited)
199
  for s in sents:
200
  logger.debug(f"\t\tSENT: {s}")
@@ -243,7 +252,7 @@ class OnlineASRProcessor:
243
  """
244
 
245
  cwords = [w for w in words]
246
- t = " ".join(o[2] for o in cwords)
247
  s = self.tokenize(t)
248
  out = []
249
  while s:
@@ -269,7 +278,7 @@ class OnlineASRProcessor:
269
  """
270
  o = self.transcript_buffer.complete()
271
  f = self.to_flush(o)
272
- logger.debug(f"last, noncommited: {f}")
273
  self.buffer_time_offset += len(self.audio_buffer) / 16000
274
  return f
275
 
 
166
  if (
167
  len(self.audio_buffer) / self.SAMPLING_RATE > self.buffer_trimming_sec
168
  ): # longer than this
169
+
170
+ logger.debug("chunking sentence")
171
  self.chunk_completed_sentence()
172
+
173
+
174
+ else:
175
+ logger.debug("not enough audio to trim as a sentence")
176
 
177
  if self.buffer_trimming_way == "segment":
178
  s = self.buffer_trimming_sec # trim the completed segments longer than s,
 
200
  def chunk_completed_sentence(self):
201
  if self.commited == []:
202
  return
203
+
204
+ import pdb; pdb.set_trace()
205
+ raw_text = self.asr.sep.join([s[2] for s in self.commited])
206
+ logger.debug(f"COMPLETED SENTENCE: {raw_text}")
207
  sents = self.words_to_sentences(self.commited)
208
  for s in sents:
209
  logger.debug(f"\t\tSENT: {s}")
 
252
  """
253
 
254
  cwords = [w for w in words]
255
+ t = self.asr.sep.join(o[2] for o in cwords)
256
  s = self.tokenize(t)
257
  out = []
258
  while s:
 
278
  """
279
  o = self.transcript_buffer.complete()
280
  f = self.to_flush(o)
281
+ logger.debug(f"last, noncommited: {f[0]*1000:.0f}-{f[1]*1000:.0f}: {f[2]}")
282
  self.buffer_time_offset += len(self.audio_buffer) / 16000
283
  return f
284