Merge pull request #20 from SilasK/clean-main
Browse filesIn my limited experience with french "" should also be the sep for mlx-whisper
src/whisper_streaming/backends.py
CHANGED
@@ -164,7 +164,7 @@ class MLXWhisper(ASRBase):
|
|
164 |
Significantly faster than faster-whisper (without CUDA) on Apple M1.
|
165 |
"""
|
166 |
|
167 |
-
sep = "
|
168 |
|
169 |
def load_model(self, modelsize=None, cache_dir=None, model_dir=None):
|
170 |
"""
|
|
|
164 |
Significantly faster than faster-whisper (without CUDA) on Apple M1.
|
165 |
"""
|
166 |
|
167 |
+
sep = "" # In my experience in french it should also be no space.
|
168 |
|
169 |
def load_model(self, modelsize=None, cache_dir=None, model_dir=None):
|
170 |
"""
|
src/whisper_streaming/online_asr.py
CHANGED
@@ -166,7 +166,13 @@ class OnlineASRProcessor:
|
|
166 |
if (
|
167 |
len(self.audio_buffer) / self.SAMPLING_RATE > self.buffer_trimming_sec
|
168 |
): # longer than this
|
|
|
|
|
169 |
self.chunk_completed_sentence()
|
|
|
|
|
|
|
|
|
170 |
|
171 |
if self.buffer_trimming_way == "segment":
|
172 |
s = self.buffer_trimming_sec # trim the completed segments longer than s,
|
@@ -194,7 +200,10 @@ class OnlineASRProcessor:
|
|
194 |
def chunk_completed_sentence(self):
|
195 |
if self.commited == []:
|
196 |
return
|
197 |
-
|
|
|
|
|
|
|
198 |
sents = self.words_to_sentences(self.commited)
|
199 |
for s in sents:
|
200 |
logger.debug(f"\t\tSENT: {s}")
|
@@ -243,7 +252,7 @@ class OnlineASRProcessor:
|
|
243 |
"""
|
244 |
|
245 |
cwords = [w for w in words]
|
246 |
-
t =
|
247 |
s = self.tokenize(t)
|
248 |
out = []
|
249 |
while s:
|
@@ -269,7 +278,7 @@ class OnlineASRProcessor:
|
|
269 |
"""
|
270 |
o = self.transcript_buffer.complete()
|
271 |
f = self.to_flush(o)
|
272 |
-
logger.debug(f"last, noncommited: {f}")
|
273 |
self.buffer_time_offset += len(self.audio_buffer) / 16000
|
274 |
return f
|
275 |
|
|
|
166 |
if (
|
167 |
len(self.audio_buffer) / self.SAMPLING_RATE > self.buffer_trimming_sec
|
168 |
): # longer than this
|
169 |
+
|
170 |
+
logger.debug("chunking sentence")
|
171 |
self.chunk_completed_sentence()
|
172 |
+
|
173 |
+
|
174 |
+
else:
|
175 |
+
logger.debug("not enough audio to trim as a sentence")
|
176 |
|
177 |
if self.buffer_trimming_way == "segment":
|
178 |
s = self.buffer_trimming_sec # trim the completed segments longer than s,
|
|
|
200 |
def chunk_completed_sentence(self):
|
201 |
if self.commited == []:
|
202 |
return
|
203 |
+
|
204 |
+
import pdb; pdb.set_trace()
|
205 |
+
raw_text = self.asr.sep.join([s[2] for s in self.commited])
|
206 |
+
logger.debug(f"COMPLETED SENTENCE: {raw_text}")
|
207 |
sents = self.words_to_sentences(self.commited)
|
208 |
for s in sents:
|
209 |
logger.debug(f"\t\tSENT: {s}")
|
|
|
252 |
"""
|
253 |
|
254 |
cwords = [w for w in words]
|
255 |
+
t = self.asr.sep.join(o[2] for o in cwords)
|
256 |
s = self.tokenize(t)
|
257 |
out = []
|
258 |
while s:
|
|
|
278 |
"""
|
279 |
o = self.transcript_buffer.complete()
|
280 |
f = self.to_flush(o)
|
281 |
+
logger.debug(f"last, noncommited: {f[0]*1000:.0f}-{f[1]*1000:.0f}: {f[2]}")
|
282 |
self.buffer_time_offset += len(self.audio_buffer) / 16000
|
283 |
return f
|
284 |
|