Spaces:
Running
on
Zero
Running
on
Zero
Sync from GitHub repo
Browse filesThis Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there
src/f5_tts/infer/utils_infer.py
CHANGED
|
@@ -186,17 +186,17 @@ def preprocess_ref_audio_text(ref_audio_orig, ref_text, show_info=print, device=
|
|
| 186 |
non_silent_segs = silence.split_on_silence(aseg, min_silence_len=1000, silence_thresh=-50, keep_silence=1000)
|
| 187 |
non_silent_wave = AudioSegment.silent(duration=0)
|
| 188 |
for non_silent_seg in non_silent_segs:
|
| 189 |
-
if len(non_silent_wave) >
|
| 190 |
show_info("Audio is over 15s, clipping short.")
|
| 191 |
break
|
| 192 |
non_silent_wave += non_silent_seg
|
| 193 |
|
| 194 |
# 2. try to find short silence for clipping if 1. failed
|
| 195 |
if len(non_silent_wave) > 15000:
|
| 196 |
-
non_silent_segs = silence.split_on_silence(aseg, min_silence_len=
|
| 197 |
non_silent_wave = AudioSegment.silent(duration=0)
|
| 198 |
for non_silent_seg in non_silent_segs:
|
| 199 |
-
if len(non_silent_wave) >
|
| 200 |
show_info("Audio is over 15s, clipping short.")
|
| 201 |
break
|
| 202 |
non_silent_wave += non_silent_seg
|
|
|
|
| 186 |
non_silent_segs = silence.split_on_silence(aseg, min_silence_len=1000, silence_thresh=-50, keep_silence=1000)
|
| 187 |
non_silent_wave = AudioSegment.silent(duration=0)
|
| 188 |
for non_silent_seg in non_silent_segs:
|
| 189 |
+
if len(non_silent_wave) > 6000 and len(non_silent_wave + non_silent_seg) > 16000:
|
| 190 |
show_info("Audio is over 15s, clipping short.")
|
| 191 |
break
|
| 192 |
non_silent_wave += non_silent_seg
|
| 193 |
|
| 194 |
# 2. try to find short silence for clipping if 1. failed
|
| 195 |
if len(non_silent_wave) > 15000:
|
| 196 |
+
non_silent_segs = silence.split_on_silence(aseg, min_silence_len=100, silence_thresh=-40, keep_silence=1000)
|
| 197 |
non_silent_wave = AudioSegment.silent(duration=0)
|
| 198 |
for non_silent_seg in non_silent_segs:
|
| 199 |
+
if len(non_silent_wave) > 6000 and len(non_silent_wave + non_silent_seg) > 16000:
|
| 200 |
show_info("Audio is over 15s, clipping short.")
|
| 201 |
break
|
| 202 |
non_silent_wave += non_silent_seg
|