SilasKieser commited on
Commit
c5b9103
·
1 Parent(s): 944f370

buffer length in sentence segmentation is no also max as in segment.

Browse files
Files changed (1) hide show
  1. src/whisper_streaming/online_asr.py +37 -23
src/whisper_streaming/online_asr.py CHANGED
@@ -110,6 +110,15 @@ class OnlineASRProcessor:
110
 
111
  self.buffer_trimming_way, self.buffer_trimming_sec = buffer_trimming
112
 
 
 
 
 
 
 
 
 
 
113
  def init(self, offset=None):
114
  """run this when starting or restarting processing"""
115
  self.audio_buffer = np.array([], dtype=np.float32)
@@ -171,35 +180,40 @@ class OnlineASRProcessor:
171
 
172
  # there is a newly confirmed text
173
 
174
- if o and self.buffer_trimming_way == "sentence": # trim the completed sentences
175
- if (
176
- len(self.audio_buffer) / self.SAMPLING_RATE > self.buffer_trimming_sec
177
- ): # longer than this
178
-
179
- logger.debug("chunking sentence")
180
- self.chunk_completed_sentence()
181
 
182
 
183
- else:
184
- logger.debug("not enough audio to trim as a sentence")
185
 
186
- if self.buffer_trimming_way == "segment":
187
- s = self.buffer_trimming_sec # trim the completed segments longer than s,
188
- else:
189
- s = 30 # if the audio buffer is longer than 30s, trim it
190
 
191
- if len(self.audio_buffer) / self.SAMPLING_RATE > s:
 
 
 
 
 
 
 
192
  self.chunk_completed_segment(res)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
- # alternative: on any word
195
- # l = self.buffer_time_offset + len(self.audio_buffer)/self.SAMPLING_RATE - 10
196
- # let's find commited word that is less
197
- # k = len(self.commited)-1
198
- # while k>0 and self.commited[k][1] > l:
199
- # k -= 1
200
- # t = self.commited[k][1]
201
- logger.debug("chunking segment")
202
- # self.chunk_at(t)
203
 
204
  logger.debug(
205
  f"len of buffer now: {len(self.audio_buffer)/self.SAMPLING_RATE:2.2f}"
 
110
 
111
  self.buffer_trimming_way, self.buffer_trimming_sec = buffer_trimming
112
 
113
+ if self.buffer_trimming_way not in ["sentence", "segment"]:
114
+ raise ValueError("buffer_trimming must be either 'sentence' or 'segment'")
115
+ if self.buffer_trimming_sec <= 0:
116
+ raise ValueError("buffer_trimming_sec must be positive")
117
+ elif self.buffer_trimming_sec > 30:
118
+ logger.warning(
119
+ f"buffer_trimming_sec is set to {self.buffer_trimming_sec}, which is very long. It may cause OOM."
120
+ )
121
+
122
  def init(self, offset=None):
123
  """run this when starting or restarting processing"""
124
  self.audio_buffer = np.array([], dtype=np.float32)
 
180
 
181
  # there is a newly confirmed text
182
 
183
+ if self.buffer_trimming_way == "sentence":
184
+
185
+ self.chunk_completed_sentence()
 
 
 
 
186
 
187
 
 
 
188
 
189
+
 
 
 
190
 
191
+ if len(self.audio_buffer) / self.SAMPLING_RATE > self.buffer_trimming_sec :
192
+
193
+ if self.buffer_trimming_way == "sentence":
194
+ logger.warning(f"Chunck segment after {self.buffer_trimming_sec} seconds!"
195
+ " Even if no sentence was found!"
196
+ )
197
+
198
+
199
  self.chunk_completed_segment(res)
200
+
201
+
202
+ # alternative: on any word
203
+ # l = self.buffer_time_offset + len(self.audio_buffer)/self.SAMPLING_RATE - 10
204
+ # let's find commited word that is less
205
+ # k = len(self.commited)-1
206
+ # while k>0 and self.commited[k][1] > l:
207
+ # k -= 1
208
+ # t = self.commited[k][1]
209
+ # self.chunk_at(t)
210
+
211
+
212
+
213
+
214
+
215
+
216
 
 
 
 
 
 
 
 
 
 
217
 
218
  logger.debug(
219
  f"len of buffer now: {len(self.audio_buffer)/self.SAMPLING_RATE:2.2f}"