regularfry commited on
Commit
380c30d
·
1 Parent(s): 5ebbed3

Further tidying of print output, so by default there's little on the console

Browse files
Files changed (2) hide show
  1. whisper_online.py +38 -34
  2. whisper_online_server.py +4 -6
whisper_online.py CHANGED
@@ -4,6 +4,7 @@ import numpy as np
4
  import librosa
5
  from functools import lru_cache
6
  import time
 
7
 
8
 
9
 
@@ -57,7 +58,7 @@ class WhisperTimestampedASR(ASRBase):
57
  from whisper_timestamped import transcribe_timestamped
58
  self.transcribe_timestamped = transcribe_timestamped
59
  if model_dir is not None:
60
- print("ignoring model_dir, not implemented",file=self.logfile)
61
  return whisper.load_model(modelsize, download_root=cache_dir)
62
 
63
  def transcribe(self, audio, init_prompt=""):
@@ -97,7 +98,7 @@ class FasterWhisperASR(ASRBase):
97
  def load_model(self, modelsize=None, cache_dir=None, model_dir=None):
98
  from faster_whisper import WhisperModel
99
  if model_dir is not None:
100
- print(f"Loading whisper model from model_dir {model_dir}. modelsize and cache_dir parameters are not used.",file=self.logfile)
101
  model_size_or_path = model_dir
102
  elif modelsize is not None:
103
  model_size_or_path = modelsize
@@ -173,9 +174,11 @@ class HypothesisBuffer:
173
  c = " ".join([self.commited_in_buffer[-j][2] for j in range(1,i+1)][::-1])
174
  tail = " ".join(self.new[j-1][2] for j in range(1,i+1))
175
  if c == tail:
176
- print("removing last",i,"words:",file=self.logfile)
177
  for j in range(i):
178
- print("\t",self.new.pop(0),file=self.logfile)
 
 
179
  break
180
 
181
  def flush(self):
@@ -267,9 +270,9 @@ class OnlineASRProcessor:
267
  """
268
 
269
  prompt, non_prompt = self.prompt()
270
- print("PROMPT:", prompt, file=self.logfile)
271
- print("CONTEXT:", non_prompt, file=self.logfile)
272
- print(f"transcribing {len(self.audio_buffer)/self.SAMPLING_RATE:2.2f} seconds from {self.buffer_time_offset:2.2f}",file=self.logfile)
273
  res = self.asr.transcribe(self.audio_buffer, init_prompt=prompt)
274
 
275
  # transform to [(beg,end,"word1"), ...]
@@ -278,8 +281,10 @@ class OnlineASRProcessor:
278
  self.transcript_buffer.insert(tsw, self.buffer_time_offset)
279
  o = self.transcript_buffer.flush()
280
  self.commited.extend(o)
281
- print(">>>>COMPLETE NOW:",self.to_flush(o),file=self.logfile,flush=True)
282
- print("INCOMPLETE:",self.to_flush(self.transcript_buffer.complete()),file=self.logfile,flush=True)
 
 
283
 
284
  # there is a newly confirmed text
285
 
@@ -303,18 +308,18 @@ class OnlineASRProcessor:
303
  #while k>0 and self.commited[k][1] > l:
304
  # k -= 1
305
  #t = self.commited[k][1]
306
- print(f"chunking segment",file=self.logfile)
307
  #self.chunk_at(t)
308
 
309
- print(f"len of buffer now: {len(self.audio_buffer)/self.SAMPLING_RATE:2.2f}",file=self.logfile)
310
  return self.to_flush(o)
311
 
312
  def chunk_completed_sentence(self):
313
  if self.commited == []: return
314
- print(self.commited,file=self.logfile)
315
  sents = self.words_to_sentences(self.commited)
316
  for s in sents:
317
- print("\t\tSENT:",s,file=self.logfile)
318
  if len(sents) < 2:
319
  return
320
  while len(sents) > 2:
@@ -322,7 +327,7 @@ class OnlineASRProcessor:
322
  # we will continue with audio processing at this timestamp
323
  chunk_at = sents[-2][1]
324
 
325
- print(f"--- sentence chunked at {chunk_at:2.2f}",file=self.logfile)
326
  self.chunk_at(chunk_at)
327
 
328
  def chunk_completed_segment(self, res):
@@ -339,12 +344,12 @@ class OnlineASRProcessor:
339
  ends.pop(-1)
340
  e = ends[-2]+self.buffer_time_offset
341
  if e <= t:
342
- print(f"--- segment chunked at {e:2.2f}",file=self.logfile)
343
  self.chunk_at(e)
344
  else:
345
- print(f"--- last segment not within commited area",file=self.logfile)
346
  else:
347
- print(f"--- not enough segments to chunk",file=self.logfile)
348
 
349
 
350
 
@@ -391,7 +396,7 @@ class OnlineASRProcessor:
391
  """
392
  o = self.transcript_buffer.complete()
393
  f = self.to_flush(o)
394
- print("last, noncommited:",f,file=self.logfile)
395
  return f
396
 
397
 
@@ -431,7 +436,7 @@ def create_tokenizer(lan):
431
 
432
  # the following languages are in Whisper, but not in wtpsplit:
433
  if lan in "as ba bo br bs fo haw hr ht jw lb ln lo mi nn oc sa sd sn so su sw tk tl tt".split():
434
- print(f"{lan} code is not supported by wtpsplit. Going to use None lang_code option.", file=sys.stderr)
435
  lan = None
436
 
437
  from wtpsplit import WtP
@@ -476,20 +481,20 @@ if __name__ == "__main__":
476
  logfile = sys.stderr
477
 
478
  if args.offline and args.comp_unaware:
479
- print("No or one option from --offline and --comp_unaware are available, not both. Exiting.",file=logfile)
480
  sys.exit(1)
481
 
482
  audio_path = args.audio_path
483
 
484
  SAMPLING_RATE = 16000
485
  duration = len(load_audio(audio_path))/SAMPLING_RATE
486
- print("Audio duration is: %2.2f seconds" % duration, file=logfile)
487
 
488
  size = args.model
489
  language = args.lan
490
 
491
  t = time.time()
492
- print(f"Loading Whisper {size} model for {language}...",file=logfile,end=" ",flush=True)
493
 
494
  if args.backend == "faster-whisper":
495
  asr_cls = FasterWhisperASR
@@ -506,10 +511,10 @@ if __name__ == "__main__":
506
 
507
 
508
  e = time.time()
509
- print(f"done. It took {round(e-t,2)} seconds.",file=logfile)
510
 
511
  if args.vad:
512
- print("setting VAD filter",file=logfile)
513
  asr.use_vad()
514
 
515
 
@@ -543,16 +548,15 @@ if __name__ == "__main__":
543
  print("%1.4f %1.0f %1.0f %s" % (now*1000, o[0]*1000,o[1]*1000,o[2]),file=logfile,flush=True)
544
  print("%1.4f %1.0f %1.0f %s" % (now*1000, o[0]*1000,o[1]*1000,o[2]),flush=True)
545
  else:
546
- print(o,file=logfile,flush=True)
547
 
548
  if args.offline: ## offline mode processing (for testing/debugging)
549
  a = load_audio(audio_path)
550
  online.insert_audio_chunk(a)
551
  try:
552
  o = online.process_iter()
553
- except AssertionError:
554
- print("assertion error",file=logfile)
555
- pass
556
  else:
557
  output_transcript(o)
558
  now = None
@@ -563,13 +567,13 @@ if __name__ == "__main__":
563
  online.insert_audio_chunk(a)
564
  try:
565
  o = online.process_iter()
566
- except AssertionError:
567
- print("assertion error",file=logfile)
568
  pass
569
  else:
570
  output_transcript(o, now=end)
571
 
572
- print(f"## last processed {end:.2f}s",file=logfile,flush=True)
573
 
574
  if end >= duration:
575
  break
@@ -595,13 +599,13 @@ if __name__ == "__main__":
595
 
596
  try:
597
  o = online.process_iter()
598
- except AssertionError:
599
- print("assertion error",file=logfile)
600
  pass
601
  else:
602
  output_transcript(o)
603
  now = time.time() - start
604
- print(f"## last processed {end:.2f} s, now is {now:.2f}, the latency is {now-end:.2f}",file=logfile,flush=True)
605
 
606
  if end >= duration:
607
  break
 
4
  import librosa
5
  from functools import lru_cache
6
  import time
7
+ import logging
8
 
9
 
10
 
 
58
  from whisper_timestamped import transcribe_timestamped
59
  self.transcribe_timestamped = transcribe_timestamped
60
  if model_dir is not None:
61
+ logging.debug("ignoring model_dir, not implemented")
62
  return whisper.load_model(modelsize, download_root=cache_dir)
63
 
64
  def transcribe(self, audio, init_prompt=""):
 
98
  def load_model(self, modelsize=None, cache_dir=None, model_dir=None):
99
  from faster_whisper import WhisperModel
100
  if model_dir is not None:
101
+ logging.debug(f"Loading whisper model from model_dir {model_dir}. modelsize and cache_dir parameters are not used.")
102
  model_size_or_path = model_dir
103
  elif modelsize is not None:
104
  model_size_or_path = modelsize
 
174
  c = " ".join([self.commited_in_buffer[-j][2] for j in range(1,i+1)][::-1])
175
  tail = " ".join(self.new[j-1][2] for j in range(1,i+1))
176
  if c == tail:
177
+ words = []
178
  for j in range(i):
179
+ words.append(repr(self.new.pop(0)))
180
+ words_msg = "\t".join(words)
181
+ logging.debug(f"removing last {i} words: {words_msg}")
182
  break
183
 
184
  def flush(self):
 
270
  """
271
 
272
  prompt, non_prompt = self.prompt()
273
+ logging.debug(f"PROMPT: {prompt}")
274
+ logging.debug(f"CONTEXT: {non_prompt}")
275
+ logging.debug(f"transcribing {len(self.audio_buffer)/self.SAMPLING_RATE:2.2f} seconds from {self.buffer_time_offset:2.2f}")
276
  res = self.asr.transcribe(self.audio_buffer, init_prompt=prompt)
277
 
278
  # transform to [(beg,end,"word1"), ...]
 
281
  self.transcript_buffer.insert(tsw, self.buffer_time_offset)
282
  o = self.transcript_buffer.flush()
283
  self.commited.extend(o)
284
+ completed = self.to_flush(o)
285
+ logging.debug(f">>>>COMPLETE NOW: {completed}")
286
+ the_rest = self.to_flush(self.transcript_buffer.complete())
287
+ logging.debug(f"INCOMPLETE: {the_rest}")
288
 
289
  # there is a newly confirmed text
290
 
 
308
  #while k>0 and self.commited[k][1] > l:
309
  # k -= 1
310
  #t = self.commited[k][1]
311
+ logging.debug(f"chunking segment")
312
  #self.chunk_at(t)
313
 
314
+ logging.debug(f"len of buffer now: {len(self.audio_buffer)/self.SAMPLING_RATE:2.2f}")
315
  return self.to_flush(o)
316
 
317
  def chunk_completed_sentence(self):
318
  if self.commited == []: return
319
+ logging.debug(self.commited)
320
  sents = self.words_to_sentences(self.commited)
321
  for s in sents:
322
+ logging.debug(f"\t\tSENT: {s}")
323
  if len(sents) < 2:
324
  return
325
  while len(sents) > 2:
 
327
  # we will continue with audio processing at this timestamp
328
  chunk_at = sents[-2][1]
329
 
330
+ logging.debug(f"--- sentence chunked at {chunk_at:2.2f}")
331
  self.chunk_at(chunk_at)
332
 
333
  def chunk_completed_segment(self, res):
 
344
  ends.pop(-1)
345
  e = ends[-2]+self.buffer_time_offset
346
  if e <= t:
347
+ logging.debug(f"--- segment chunked at {e:2.2f}")
348
  self.chunk_at(e)
349
  else:
350
+ logging.debug(f"--- last segment not within commited area")
351
  else:
352
+ logging.debug(f"--- not enough segments to chunk")
353
 
354
 
355
 
 
396
  """
397
  o = self.transcript_buffer.complete()
398
  f = self.to_flush(o)
399
+ logging.debug("last, noncommited: {f}")
400
  return f
401
 
402
 
 
436
 
437
  # the following languages are in Whisper, but not in wtpsplit:
438
  if lan in "as ba bo br bs fo haw hr ht jw lb ln lo mi nn oc sa sd sn so su sw tk tl tt".split():
439
+ logging.debug(f"{lan} code is not supported by wtpsplit. Going to use None lang_code option.")
440
  lan = None
441
 
442
  from wtpsplit import WtP
 
481
  logfile = sys.stderr
482
 
483
  if args.offline and args.comp_unaware:
484
+ logging.error("No or one option from --offline and --comp_unaware are available, not both. Exiting.")
485
  sys.exit(1)
486
 
487
  audio_path = args.audio_path
488
 
489
  SAMPLING_RATE = 16000
490
  duration = len(load_audio(audio_path))/SAMPLING_RATE
491
+ logging.info("Audio duration is: %2.2f seconds" % duration)
492
 
493
  size = args.model
494
  language = args.lan
495
 
496
  t = time.time()
497
+ logging.info(f"Loading Whisper {size} model for {language}...")
498
 
499
  if args.backend == "faster-whisper":
500
  asr_cls = FasterWhisperASR
 
511
 
512
 
513
  e = time.time()
514
+ logging.info(f"done. It took {round(e-t,2)} seconds.")
515
 
516
  if args.vad:
517
+ logging.info("setting VAD filter")
518
  asr.use_vad()
519
 
520
 
 
548
  print("%1.4f %1.0f %1.0f %s" % (now*1000, o[0]*1000,o[1]*1000,o[2]),file=logfile,flush=True)
549
  print("%1.4f %1.0f %1.0f %s" % (now*1000, o[0]*1000,o[1]*1000,o[2]),flush=True)
550
  else:
551
+ print("here?", o,file=logfile,flush=True)
552
 
553
  if args.offline: ## offline mode processing (for testing/debugging)
554
  a = load_audio(audio_path)
555
  online.insert_audio_chunk(a)
556
  try:
557
  o = online.process_iter()
558
+ except AssertionError as e:
559
+ log.error(f"assertion error: {repr(e)}")
 
560
  else:
561
  output_transcript(o)
562
  now = None
 
567
  online.insert_audio_chunk(a)
568
  try:
569
  o = online.process_iter()
570
+ except AssertionError as e:
571
+ logging.error(f"assertion error: {repr(e)}")
572
  pass
573
  else:
574
  output_transcript(o, now=end)
575
 
576
+ logging.debug(f"## last processed {end:.2f}s")
577
 
578
  if end >= duration:
579
  break
 
599
 
600
  try:
601
  o = online.process_iter()
602
+ except AssertionError as e:
603
+ logging.error(f"assertion error: {e}")
604
  pass
605
  else:
606
  output_transcript(o)
607
  now = time.time() - start
608
+ logging.debug(f"## last processed {end:.2f} s, now is {now:.2f}, the latency is {now-end:.2f}")
609
 
610
  if end >= duration:
611
  break
whisper_online_server.py CHANGED
@@ -39,6 +39,7 @@ logging.debug(f"Loading Whisper {size} model for {language}...")
39
  if args.backend == "faster-whisper":
40
  from faster_whisper import WhisperModel
41
  asr_cls = FasterWhisperASR
 
42
  else:
43
  import whisper
44
  import whisper_timestamped
@@ -80,7 +81,7 @@ if os.path.exists(demo_audio_path):
80
  # warm up the ASR, because the very first transcribe takes much more time than the other
81
  asr.transcribe(a)
82
  else:
83
- logging.info("Whisper is not warmed up")
84
 
85
 
86
  ######### Server objects
@@ -135,8 +136,6 @@ class ServerProcessor:
135
  out = []
136
  while sum(len(x) for x in out) < self.min_chunk*SAMPLING_RATE:
137
  raw_bytes = self.connection.non_blocking_receive_audio()
138
- print(raw_bytes[:10])
139
- print(len(raw_bytes))
140
  if not raw_bytes:
141
  break
142
  sf = soundfile.SoundFile(io.BytesIO(raw_bytes), channels=1,endian="LITTLE",samplerate=SAMPLING_RATE, subtype="PCM_16",format="RAW")
@@ -167,7 +166,7 @@ class ServerProcessor:
167
  print("%1.0f %1.0f %s" % (beg,end,o[2]),flush=True,file=sys.stderr)
168
  return "%1.0f %1.0f %s" % (beg,end,o[2])
169
  else:
170
- print(o,file=sys.stderr,flush=True)
171
  return None
172
 
173
  def send_result(self, o):
@@ -181,14 +180,13 @@ class ServerProcessor:
181
  while True:
182
  a = self.receive_audio_chunk()
183
  if a is None:
184
- print("break here",file=sys.stderr)
185
  break
186
  self.online_asr_proc.insert_audio_chunk(a)
187
  o = online.process_iter()
188
  try:
189
  self.send_result(o)
190
  except BrokenPipeError:
191
- print("broken pipe -- connection closed?",file=sys.stderr)
192
  break
193
 
194
  # o = online.finish() # this should be working
 
39
  if args.backend == "faster-whisper":
40
  from faster_whisper import WhisperModel
41
  asr_cls = FasterWhisperASR
42
+ logging.getLogger("faster_whisper").setLevel(logging.WARNING)
43
  else:
44
  import whisper
45
  import whisper_timestamped
 
81
  # warm up the ASR, because the very first transcribe takes much more time than the other
82
  asr.transcribe(a)
83
  else:
84
+ logging.debug("Whisper is not warmed up")
85
 
86
 
87
  ######### Server objects
 
136
  out = []
137
  while sum(len(x) for x in out) < self.min_chunk*SAMPLING_RATE:
138
  raw_bytes = self.connection.non_blocking_receive_audio()
 
 
139
  if not raw_bytes:
140
  break
141
  sf = soundfile.SoundFile(io.BytesIO(raw_bytes), channels=1,endian="LITTLE",samplerate=SAMPLING_RATE, subtype="PCM_16",format="RAW")
 
166
  print("%1.0f %1.0f %s" % (beg,end,o[2]),flush=True,file=sys.stderr)
167
  return "%1.0f %1.0f %s" % (beg,end,o[2])
168
  else:
169
+ # No text, so no output
170
  return None
171
 
172
  def send_result(self, o):
 
180
  while True:
181
  a = self.receive_audio_chunk()
182
  if a is None:
 
183
  break
184
  self.online_asr_proc.insert_audio_chunk(a)
185
  o = online.process_iter()
186
  try:
187
  self.send_result(o)
188
  except BrokenPipeError:
189
+ logging.info("broken pipe -- connection closed?")
190
  break
191
 
192
  # o = online.finish() # this should be working