Merge branch 'main' into TIAGo-WE-COBOT
Browse files
README.md
CHANGED
@@ -42,7 +42,7 @@ The unused one does not have to be installed. We integrate the following segment
|
|
42 |
|
43 |
## Usage
|
44 |
|
45 |
-
###
|
46 |
|
47 |
```
|
48 |
usage: whisper_online.py [-h] [--min-chunk-size MIN_CHUNK_SIZE] [--model {tiny.en,tiny,base.en,base,small.en,small,medium.en,medium,large-v1,large-v2,large}] [--model_cache_dir MODEL_CACHE_DIR] [--model_dir MODEL_DIR] [--lan LAN] [--task {transcribe,translate}]
|
@@ -126,14 +126,14 @@ from whisper_online import *
|
|
126 |
src_lan = "en" # source language
|
127 |
tgt_lan = "en" # target language -- same as source for ASR, "en" if translate task is used
|
128 |
|
129 |
-
|
130 |
asr = FasterWhisperASR(lan, "large-v2") # loads and wraps Whisper model
|
131 |
# set options:
|
132 |
# asr.set_translate_task() # it will translate from lan into English
|
133 |
-
# asr.use_vad() # set using VAD
|
134 |
|
|
|
135 |
|
136 |
-
online = OnlineASRProcessor(
|
137 |
|
138 |
|
139 |
while audio_has_not_ended: # processing loop:
|
@@ -149,7 +149,7 @@ print(o) # do something with the last output
|
|
149 |
online.init() # refresh if you're going to re-use the object for the next audio
|
150 |
```
|
151 |
|
152 |
-
### Server
|
153 |
|
154 |
`whisper_online_server.py` has the same model options as `whisper_online.py`, plus `--host` and `--port` of the TCP connection. See help message (`-h` option).
|
155 |
|
|
|
42 |
|
43 |
## Usage
|
44 |
|
45 |
+
### Real-time simulation from audio file
|
46 |
|
47 |
```
|
48 |
usage: whisper_online.py [-h] [--min-chunk-size MIN_CHUNK_SIZE] [--model {tiny.en,tiny,base.en,base,small.en,small,medium.en,medium,large-v1,large-v2,large}] [--model_cache_dir MODEL_CACHE_DIR] [--model_dir MODEL_DIR] [--lan LAN] [--task {transcribe,translate}]
|
|
|
126 |
src_lan = "en" # source language
|
127 |
tgt_lan = "en" # target language -- same as source for ASR, "en" if translate task is used
|
128 |
|
|
|
129 |
asr = FasterWhisperASR(lan, "large-v2") # loads and wraps Whisper model
|
130 |
# set options:
|
131 |
# asr.set_translate_task() # it will translate from lan into English
|
132 |
+
# asr.use_vad() # set using VAD
|
133 |
|
134 |
+
tokenizer = create_tokenizer(tgt_lan) # sentence segmenter for the target language
|
135 |
|
136 |
+
online = OnlineASRProcessor(asr, tokenizer) # create processing object
|
137 |
|
138 |
|
139 |
while audio_has_not_ended: # processing loop:
|
|
|
149 |
online.init() # refresh if you're going to re-use the object for the next audio
|
150 |
```
|
151 |
|
152 |
+
### Server -- real-time from mic
|
153 |
|
154 |
`whisper_online_server.py` has the same model options as `whisper_online.py`, plus `--host` and `--port` of the TCP connection. See help message (`-h` option).
|
155 |
|