Dominik Macháček commited on
Commit
f32eeef
·
2 Parent(s): ce215e6 bb93952

Merge branch 'tijszwinkels-online-from-factory'

Browse files
Files changed (2) hide show
  1. whisper_online.py +18 -17
  2. whisper_online_server.py +2 -8
whisper_online.py CHANGED
@@ -551,7 +551,7 @@ def add_shared_args(parser):
551
 
552
  def asr_factory(args, logfile=sys.stderr):
553
  """
554
- Creates and configures an ASR instance based on the specified backend and arguments.
555
  """
556
  backend = args.backend
557
  if backend == "openai-api":
@@ -576,8 +576,23 @@ def asr_factory(args, logfile=sys.stderr):
576
  print("Setting VAD filter", file=logfile)
577
  asr.use_vad()
578
 
579
- return asr
 
 
 
 
 
 
 
 
 
 
 
580
 
 
 
 
 
581
  ## main:
582
 
583
  if __name__ == "__main__":
@@ -605,22 +620,8 @@ if __name__ == "__main__":
605
  duration = len(load_audio(audio_path))/SAMPLING_RATE
606
  print("Audio duration is: %2.2f seconds" % duration, file=logfile)
607
 
608
- asr = asr_factory(args, logfile=logfile)
609
- language = args.lan
610
- if args.task == "translate":
611
- asr.set_translate_task()
612
- tgt_language = "en" # Whisper translates into English
613
- else:
614
- tgt_language = language # Whisper transcribes in this language
615
-
616
-
617
  min_chunk = args.min_chunk_size
618
- if args.buffer_trimming == "sentence":
619
- tokenizer = create_tokenizer(tgt_language)
620
- else:
621
- tokenizer = None
622
- online = OnlineASRProcessor(asr,tokenizer,logfile=logfile,buffer_trimming=(args.buffer_trimming, args.buffer_trimming_sec))
623
-
624
 
625
  # load the audio into the LRU cache before we start the timer
626
  a = load_audio_chunk(audio_path,0,1)
 
551
 
552
  def asr_factory(args, logfile=sys.stderr):
553
  """
554
+ Creates and configures an ASR and ASR Online instance based on the specified backend and arguments.
555
  """
556
  backend = args.backend
557
  if backend == "openai-api":
 
576
  print("Setting VAD filter", file=logfile)
577
  asr.use_vad()
578
 
579
+ language = args.lan
580
+ if args.task == "translate":
581
+ asr.set_translate_task()
582
+ tgt_language = "en" # Whisper translates into English
583
+ else:
584
+ tgt_language = language # Whisper transcribes in this language
585
+
586
+ # Create the tokenizer
587
+ if args.buffer_trimming == "sentence":
588
+ tokenizer = create_tokenizer(tgt_language)
589
+ else:
590
+ tokenizer = None
591
 
592
+ # Create the OnlineASRProcessor
593
+ online = OnlineASRProcessor(asr,tokenizer,logfile=logfile,buffer_trimming=(args.buffer_trimming, args.buffer_trimming_sec))
594
+
595
+ return asr, online
596
  ## main:
597
 
598
  if __name__ == "__main__":
 
620
  duration = len(load_audio(audio_path))/SAMPLING_RATE
621
  print("Audio duration is: %2.2f seconds" % duration, file=logfile)
622
 
623
+ asr, online = asr_factory(args, logfile=logfile)
 
 
 
 
 
 
 
 
624
  min_chunk = args.min_chunk_size
 
 
 
 
 
 
625
 
626
  # load the audio into the LRU cache before we start the timer
627
  a = load_audio_chunk(audio_path,0,1)
whisper_online_server.py CHANGED
@@ -25,16 +25,10 @@ SAMPLING_RATE = 16000
25
 
26
  size = args.model
27
  language = args.lan
28
-
29
- asr = asr_factory(args)
30
- if args.task == "translate":
31
- asr.set_translate_task()
32
- tgt_language = "en"
33
- else:
34
- tgt_language = language
35
-
36
  min_chunk = args.min_chunk_size
37
 
 
38
  if args.buffer_trimming == "sentence":
39
  tokenizer = create_tokenizer(tgt_language)
40
  else:
 
25
 
26
  size = args.model
27
  language = args.lan
28
+ asr, online = asr_factory(args)
 
 
 
 
 
 
 
29
  min_chunk = args.min_chunk_size
30
 
31
+
32
  if args.buffer_trimming == "sentence":
33
  tokenizer = create_tokenizer(tgt_language)
34
  else: