Merge branch 'tijszwinkels-online-from-factory'
Browse files- whisper_online.py +18 -17
- whisper_online_server.py +2 -8
whisper_online.py
CHANGED
@@ -551,7 +551,7 @@ def add_shared_args(parser):
|
|
551 |
|
552 |
def asr_factory(args, logfile=sys.stderr):
|
553 |
"""
|
554 |
-
Creates and configures an ASR instance based on the specified backend and arguments.
|
555 |
"""
|
556 |
backend = args.backend
|
557 |
if backend == "openai-api":
|
@@ -576,8 +576,23 @@ def asr_factory(args, logfile=sys.stderr):
|
|
576 |
print("Setting VAD filter", file=logfile)
|
577 |
asr.use_vad()
|
578 |
|
579 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
580 |
|
|
|
|
|
|
|
|
|
581 |
## main:
|
582 |
|
583 |
if __name__ == "__main__":
|
@@ -605,22 +620,8 @@ if __name__ == "__main__":
|
|
605 |
duration = len(load_audio(audio_path))/SAMPLING_RATE
|
606 |
print("Audio duration is: %2.2f seconds" % duration, file=logfile)
|
607 |
|
608 |
-
asr = asr_factory(args, logfile=logfile)
|
609 |
-
language = args.lan
|
610 |
-
if args.task == "translate":
|
611 |
-
asr.set_translate_task()
|
612 |
-
tgt_language = "en" # Whisper translates into English
|
613 |
-
else:
|
614 |
-
tgt_language = language # Whisper transcribes in this language
|
615 |
-
|
616 |
-
|
617 |
min_chunk = args.min_chunk_size
|
618 |
-
if args.buffer_trimming == "sentence":
|
619 |
-
tokenizer = create_tokenizer(tgt_language)
|
620 |
-
else:
|
621 |
-
tokenizer = None
|
622 |
-
online = OnlineASRProcessor(asr,tokenizer,logfile=logfile,buffer_trimming=(args.buffer_trimming, args.buffer_trimming_sec))
|
623 |
-
|
624 |
|
625 |
# load the audio into the LRU cache before we start the timer
|
626 |
a = load_audio_chunk(audio_path,0,1)
|
|
|
551 |
|
552 |
def asr_factory(args, logfile=sys.stderr):
|
553 |
"""
|
554 |
+
Creates and configures an ASR and ASR Online instance based on the specified backend and arguments.
|
555 |
"""
|
556 |
backend = args.backend
|
557 |
if backend == "openai-api":
|
|
|
576 |
print("Setting VAD filter", file=logfile)
|
577 |
asr.use_vad()
|
578 |
|
579 |
+
language = args.lan
|
580 |
+
if args.task == "translate":
|
581 |
+
asr.set_translate_task()
|
582 |
+
tgt_language = "en" # Whisper translates into English
|
583 |
+
else:
|
584 |
+
tgt_language = language # Whisper transcribes in this language
|
585 |
+
|
586 |
+
# Create the tokenizer
|
587 |
+
if args.buffer_trimming == "sentence":
|
588 |
+
tokenizer = create_tokenizer(tgt_language)
|
589 |
+
else:
|
590 |
+
tokenizer = None
|
591 |
|
592 |
+
# Create the OnlineASRProcessor
|
593 |
+
online = OnlineASRProcessor(asr,tokenizer,logfile=logfile,buffer_trimming=(args.buffer_trimming, args.buffer_trimming_sec))
|
594 |
+
|
595 |
+
return asr, online
|
596 |
## main:
|
597 |
|
598 |
if __name__ == "__main__":
|
|
|
620 |
duration = len(load_audio(audio_path))/SAMPLING_RATE
|
621 |
print("Audio duration is: %2.2f seconds" % duration, file=logfile)
|
622 |
|
623 |
+
asr, online = asr_factory(args, logfile=logfile)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
624 |
min_chunk = args.min_chunk_size
|
|
|
|
|
|
|
|
|
|
|
|
|
625 |
|
626 |
# load the audio into the LRU cache before we start the timer
|
627 |
a = load_audio_chunk(audio_path,0,1)
|
whisper_online_server.py
CHANGED
@@ -25,16 +25,10 @@ SAMPLING_RATE = 16000
|
|
25 |
|
26 |
size = args.model
|
27 |
language = args.lan
|
28 |
-
|
29 |
-
asr = asr_factory(args)
|
30 |
-
if args.task == "translate":
|
31 |
-
asr.set_translate_task()
|
32 |
-
tgt_language = "en"
|
33 |
-
else:
|
34 |
-
tgt_language = language
|
35 |
-
|
36 |
min_chunk = args.min_chunk_size
|
37 |
|
|
|
38 |
if args.buffer_trimming == "sentence":
|
39 |
tokenizer = create_tokenizer(tgt_language)
|
40 |
else:
|
|
|
25 |
|
26 |
size = args.model
|
27 |
language = args.lan
|
28 |
+
asr, online = asr_factory(args)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
min_chunk = args.min_chunk_size
|
30 |
|
31 |
+
|
32 |
if args.buffer_trimming == "sentence":
|
33 |
tokenizer = create_tokenizer(tgt_language)
|
34 |
else:
|