Dominik Macháček commited on
Commit
878f11c
·
1 Parent(s): 483badf

create_tokenizer in documentation

Browse files
Files changed (1) hide show
  1. README.md +3 -3
README.md CHANGED
@@ -126,14 +126,14 @@ from whisper_online import *
126
  src_lan = "en" # source language
127
  tgt_lan = "en" # target language -- same as source for ASR, "en" if translate task is used
128
 
129
-
130
  asr = FasterWhisperASR(lan, "large-v2") # loads and wraps Whisper model
131
  # set options:
132
  # asr.set_translate_task() # it will translate from lan into English
133
- # asr.use_vad() # set using VAD
134
 
 
135
 
136
- online = OnlineASRProcessor(tgt_lan, asr) # create processing object
137
 
138
 
139
  while audio_has_not_ended: # processing loop:
 
126
  src_lan = "en" # source language
127
  tgt_lan = "en" # target language -- same as source for ASR, "en" if translate task is used
128
 
 
129
  asr = FasterWhisperASR(lan, "large-v2") # loads and wraps Whisper model
130
  # set options:
131
  # asr.set_translate_task() # it will translate from lan into English
132
+ # asr.use_vad() # set using VAD
133
 
134
+ tokenizer = create_tokenizer(tgt_lan) # sentence segmenter for the target language
135
 
136
+ online = OnlineASRProcessor(asr, tokenizer) # create processing object
137
 
138
 
139
  while audio_has_not_ended: # processing loop: