Commit
·
c5b9103
1
Parent(s):
944f370
buffer length in sentence segmentation is no also max as in segment.
Browse files
src/whisper_streaming/online_asr.py
CHANGED
|
@@ -110,6 +110,15 @@ class OnlineASRProcessor:
|
|
| 110 |
|
| 111 |
self.buffer_trimming_way, self.buffer_trimming_sec = buffer_trimming
|
| 112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
def init(self, offset=None):
|
| 114 |
"""run this when starting or restarting processing"""
|
| 115 |
self.audio_buffer = np.array([], dtype=np.float32)
|
|
@@ -171,35 +180,40 @@ class OnlineASRProcessor:
|
|
| 171 |
|
| 172 |
# there is a newly confirmed text
|
| 173 |
|
| 174 |
-
if
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
): # longer than this
|
| 178 |
-
|
| 179 |
-
logger.debug("chunking sentence")
|
| 180 |
-
self.chunk_completed_sentence()
|
| 181 |
|
| 182 |
|
| 183 |
-
else:
|
| 184 |
-
logger.debug("not enough audio to trim as a sentence")
|
| 185 |
|
| 186 |
-
|
| 187 |
-
s = self.buffer_trimming_sec # trim the completed segments longer than s,
|
| 188 |
-
else:
|
| 189 |
-
s = 30 # if the audio buffer is longer than 30s, trim it
|
| 190 |
|
| 191 |
-
if len(self.audio_buffer) / self.SAMPLING_RATE >
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
self.chunk_completed_segment(res)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
|
| 194 |
-
# alternative: on any word
|
| 195 |
-
# l = self.buffer_time_offset + len(self.audio_buffer)/self.SAMPLING_RATE - 10
|
| 196 |
-
# let's find commited word that is less
|
| 197 |
-
# k = len(self.commited)-1
|
| 198 |
-
# while k>0 and self.commited[k][1] > l:
|
| 199 |
-
# k -= 1
|
| 200 |
-
# t = self.commited[k][1]
|
| 201 |
-
logger.debug("chunking segment")
|
| 202 |
-
# self.chunk_at(t)
|
| 203 |
|
| 204 |
logger.debug(
|
| 205 |
f"len of buffer now: {len(self.audio_buffer)/self.SAMPLING_RATE:2.2f}"
|
|
|
|
| 110 |
|
| 111 |
self.buffer_trimming_way, self.buffer_trimming_sec = buffer_trimming
|
| 112 |
|
| 113 |
+
if self.buffer_trimming_way not in ["sentence", "segment"]:
|
| 114 |
+
raise ValueError("buffer_trimming must be either 'sentence' or 'segment'")
|
| 115 |
+
if self.buffer_trimming_sec <= 0:
|
| 116 |
+
raise ValueError("buffer_trimming_sec must be positive")
|
| 117 |
+
elif self.buffer_trimming_sec > 30:
|
| 118 |
+
logger.warning(
|
| 119 |
+
f"buffer_trimming_sec is set to {self.buffer_trimming_sec}, which is very long. It may cause OOM."
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
def init(self, offset=None):
|
| 123 |
"""run this when starting or restarting processing"""
|
| 124 |
self.audio_buffer = np.array([], dtype=np.float32)
|
|
|
|
| 180 |
|
| 181 |
# there is a newly confirmed text
|
| 182 |
|
| 183 |
+
if self.buffer_trimming_way == "sentence":
|
| 184 |
+
|
| 185 |
+
self.chunk_completed_sentence()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
|
| 187 |
|
|
|
|
|
|
|
| 188 |
|
| 189 |
+
|
|
|
|
|
|
|
|
|
|
| 190 |
|
| 191 |
+
if len(self.audio_buffer) / self.SAMPLING_RATE > self.buffer_trimming_sec :
|
| 192 |
+
|
| 193 |
+
if self.buffer_trimming_way == "sentence":
|
| 194 |
+
logger.warning(f"Chunck segment after {self.buffer_trimming_sec} seconds!"
|
| 195 |
+
" Even if no sentence was found!"
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
+
|
| 199 |
self.chunk_completed_segment(res)
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
# alternative: on any word
|
| 203 |
+
# l = self.buffer_time_offset + len(self.audio_buffer)/self.SAMPLING_RATE - 10
|
| 204 |
+
# let's find commited word that is less
|
| 205 |
+
# k = len(self.commited)-1
|
| 206 |
+
# while k>0 and self.commited[k][1] > l:
|
| 207 |
+
# k -= 1
|
| 208 |
+
# t = self.commited[k][1]
|
| 209 |
+
# self.chunk_at(t)
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
|
| 216 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
|
| 218 |
logger.debug(
|
| 219 |
f"len of buffer now: {len(self.audio_buffer)/self.SAMPLING_RATE:2.2f}"
|