Spaces:
Configuration error
Configuration error
add more detailed instruct. on inference. address #49 #50
Browse files- README.md +6 -0
- gradio_app.py +2 -0
- model/utils.py +2 -0
- test_infer_single.py +2 -0
README.md
CHANGED
@@ -57,11 +57,17 @@ Once your datasets are prepared, you can start the training process.
|
|
57 |
accelerate config
|
58 |
accelerate launch test_train.py
|
59 |
```
|
|
|
60 |
|
61 |
## Inference
|
62 |
|
63 |
To run inference with pretrained models, download the checkpoints from [🤗 Hugging Face](https://huggingface.co/SWivid/F5-TTS).
|
64 |
|
|
|
|
|
|
|
|
|
|
|
65 |
### Single Inference
|
66 |
|
67 |
You can test single inference using the following command. Before running the command, modify the config up to your need.
|
|
|
57 |
accelerate config
|
58 |
accelerate launch test_train.py
|
59 |
```
|
60 |
+
An initial guidance on Finetuning #57.
|
61 |
|
62 |
## Inference
|
63 |
|
64 |
To run inference with pretrained models, download the checkpoints from [🤗 Hugging Face](https://huggingface.co/SWivid/F5-TTS).
|
65 |
|
66 |
+
Currently support up to 30s generation, which is the **TOTAL** length of prompt audio and the generated. Batch inference with chunks is supported by Gradio APP now.
|
67 |
+
- To avoid inference failure, make sure you have seen through following instructions.
|
68 |
+
- Uppercased letters will be uttered letter by letter, so use lowercased letter for normal words.
|
69 |
+
- Add some spaces (blank: " ") or punctuations (e.g. "," ".") to explicitly introduce some pauses.
|
70 |
+
|
71 |
### Single Inference
|
72 |
|
73 |
You can test single inference using the following command. Before running the command, modify the config up to your need.
|
gradio_app.py
CHANGED
@@ -218,6 +218,8 @@ def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence,
|
|
218 |
|
219 |
for i, gen_text in enumerate(progress.tqdm(gen_text_batches)):
|
220 |
# Prepare the text
|
|
|
|
|
221 |
text_list = [ref_text + gen_text]
|
222 |
final_text_list = convert_char_to_pinyin(text_list)
|
223 |
|
|
|
218 |
|
219 |
for i, gen_text in enumerate(progress.tqdm(gen_text_batches)):
|
220 |
# Prepare the text
|
221 |
+
if len(ref_text[-1].encode('utf-8')) == 1:
|
222 |
+
ref_text = ref_text + " "
|
223 |
text_list = [ref_text + gen_text]
|
224 |
final_text_list = convert_char_to_pinyin(text_list)
|
225 |
|
model/utils.py
CHANGED
@@ -275,6 +275,8 @@ def get_inference_prompt(
|
|
275 |
ref_audio = resampler(ref_audio)
|
276 |
|
277 |
# Text
|
|
|
|
|
278 |
text = [prompt_text + gt_text]
|
279 |
if tokenizer == "pinyin":
|
280 |
text_list = convert_char_to_pinyin(text, polyphone = polyphone)
|
|
|
275 |
ref_audio = resampler(ref_audio)
|
276 |
|
277 |
# Text
|
278 |
+
if len(prompt_text[-1].encode('utf-8')) == 1:
|
279 |
+
prompt_text = prompt_text + " "
|
280 |
text = [prompt_text + gt_text]
|
281 |
if tokenizer == "pinyin":
|
282 |
text_list = convert_char_to_pinyin(text, polyphone = polyphone)
|
test_infer_single.py
CHANGED
@@ -116,6 +116,8 @@ if sr != target_sample_rate:
|
|
116 |
audio = audio.to(device)
|
117 |
|
118 |
# Text
|
|
|
|
|
119 |
text_list = [ref_text + gen_text]
|
120 |
if tokenizer == "pinyin":
|
121 |
final_text_list = convert_char_to_pinyin(text_list)
|
|
|
116 |
audio = audio.to(device)
|
117 |
|
118 |
# Text
|
119 |
+
if len(ref_text[-1].encode('utf-8')) == 1:
|
120 |
+
ref_text = ref_text + " "
|
121 |
text_list = [ref_text + gen_text]
|
122 |
if tokenizer == "pinyin":
|
123 |
final_text_list = convert_char_to_pinyin(text_list)
|