Commit
Β·
4e95a39
1
Parent(s):
c879353
Update WAV File Naming and Dependencies ππ (#1091)
Browse files* Update infer_cli.py
* Update pyproject.toml
* formalized
---------
Co-authored-by: SWivid <[email protected]>
- pyproject.toml +1 -0
- src/f5_tts/infer/infer_cli.py +14 -0
pyproject.toml
CHANGED
@@ -38,6 +38,7 @@ dependencies = [
|
|
38 |
"tqdm>=4.65.0",
|
39 |
"transformers",
|
40 |
"transformers_stream_generator",
|
|
|
41 |
"vocos",
|
42 |
"wandb",
|
43 |
"x_transformers>=1.31.14",
|
|
|
38 |
"tqdm>=4.65.0",
|
39 |
"transformers",
|
40 |
"transformers_stream_generator",
|
41 |
+
"unidecode",
|
42 |
"vocos",
|
43 |
"wandb",
|
44 |
"x_transformers>=1.31.14",
|
src/f5_tts/infer/infer_cli.py
CHANGED
@@ -12,6 +12,7 @@ import tomli
|
|
12 |
from cached_path import cached_path
|
13 |
from hydra.utils import get_class
|
14 |
from omegaconf import OmegaConf
|
|
|
15 |
|
16 |
from f5_tts.infer.utils_infer import (
|
17 |
cfg_strength,
|
@@ -112,6 +113,11 @@ parser.add_argument(
|
|
112 |
action="store_true",
|
113 |
help="To save each audio chunks during inference",
|
114 |
)
|
|
|
|
|
|
|
|
|
|
|
115 |
parser.add_argument(
|
116 |
"--remove_silence",
|
117 |
action="store_true",
|
@@ -197,6 +203,12 @@ output_file = args.output_file or config.get(
|
|
197 |
)
|
198 |
|
199 |
save_chunk = args.save_chunk or config.get("save_chunk", False)
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
remove_silence = args.remove_silence or config.get("remove_silence", False)
|
201 |
load_vocoder_from_local = args.load_vocoder_from_local or config.get("load_vocoder_from_local", False)
|
202 |
|
@@ -344,6 +356,8 @@ def main():
|
|
344 |
if save_chunk:
|
345 |
if len(gen_text_) > 200:
|
346 |
gen_text_ = gen_text_[:200] + " ... "
|
|
|
|
|
347 |
sf.write(
|
348 |
os.path.join(output_chunk_dir, f"{len(generated_audio_segments) - 1}_{gen_text_}.wav"),
|
349 |
audio_segment,
|
|
|
12 |
from cached_path import cached_path
|
13 |
from hydra.utils import get_class
|
14 |
from omegaconf import OmegaConf
|
15 |
+
from unidecode import unidecode
|
16 |
|
17 |
from f5_tts.infer.utils_infer import (
|
18 |
cfg_strength,
|
|
|
113 |
action="store_true",
|
114 |
help="To save each audio chunks during inference",
|
115 |
)
|
116 |
+
parser.add_argument(
|
117 |
+
"--no_legacy_text",
|
118 |
+
action="store_false",
|
119 |
+
help="Not to use lossy ASCII transliterations of unicode text in saved file names.",
|
120 |
+
)
|
121 |
parser.add_argument(
|
122 |
"--remove_silence",
|
123 |
action="store_true",
|
|
|
203 |
)
|
204 |
|
205 |
save_chunk = args.save_chunk or config.get("save_chunk", False)
|
206 |
+
use_legacy_text = args.no_legacy_text or config.get("no_legacy_text", False) # no_legacy_text is a store_false arg
|
207 |
+
if save_chunk and use_legacy_text:
|
208 |
+
print(
|
209 |
+
"\nWarning to --save_chunk: lossy ASCII transliterations of unicode text for legacy (.wav) file names, --no_legacy_text to disable.\n"
|
210 |
+
)
|
211 |
+
|
212 |
remove_silence = args.remove_silence or config.get("remove_silence", False)
|
213 |
load_vocoder_from_local = args.load_vocoder_from_local or config.get("load_vocoder_from_local", False)
|
214 |
|
|
|
356 |
if save_chunk:
|
357 |
if len(gen_text_) > 200:
|
358 |
gen_text_ = gen_text_[:200] + " ... "
|
359 |
+
if use_legacy_text:
|
360 |
+
gen_text_ = unidecode(gen_text_)
|
361 |
sf.write(
|
362 |
os.path.join(output_chunk_dir, f"{len(generated_audio_segments) - 1}_{gen_text_}.wav"),
|
363 |
audio_segment,
|