Update utils.py
Browse files
utils.py
CHANGED
@@ -8,10 +8,41 @@ import re
|
|
8 |
import torch
|
9 |
import numpy as np
|
10 |
import os
|
|
|
|
|
11 |
|
12 |
_ref_audio_cache = {}
|
13 |
asr_pipe = None
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
def chunk_text(text, max_chars=135):
|
16 |
|
17 |
# print(text)
|
@@ -129,9 +160,7 @@ def preprocess_ref_audio_text(ref_audio_orig, ref_text, clip_short=True, show_in
|
|
129 |
|
130 |
show_info("Converting audio...")
|
131 |
|
132 |
-
|
133 |
-
|
134 |
-
os.system("sox " + ref_audio_orig + " -r 24000 -c 1 " + ref_audio_orig_converted)
|
135 |
|
136 |
ref_audio_orig = ref_audio_orig_converted
|
137 |
|
|
|
8 |
import torch
|
9 |
import numpy as np
|
10 |
import os
|
11 |
+
from scipy.io import wavfile
|
12 |
+
from scipy.signal import resample_poly
|
13 |
|
14 |
_ref_audio_cache = {}
|
15 |
asr_pipe = None
|
16 |
|
17 |
+
def resample_to_24khz(input_path: str, output_path: str):
|
18 |
+
"""
|
19 |
+
Resample WAV audio file to 24,000 Hz using scipy.
|
20 |
+
|
21 |
+
Parameters:
|
22 |
+
- input_path (str): Path to the input WAV file.
|
23 |
+
- output_path (str): Path to save the output WAV file.
|
24 |
+
"""
|
25 |
+
# Load WAV file
|
26 |
+
orig_sr, audio = wavfile.read(input_path)
|
27 |
+
|
28 |
+
# Convert to mono if stereo
|
29 |
+
if len(audio.shape) == 2:
|
30 |
+
audio = audio.mean(axis=1)
|
31 |
+
|
32 |
+
# Convert to float32 for processing
|
33 |
+
if audio.dtype != np.float32:
|
34 |
+
audio = audio.astype(np.float32) / np.iinfo(audio.dtype).max
|
35 |
+
|
36 |
+
# Resample
|
37 |
+
target_sr = 24000
|
38 |
+
resampled = resample_poly(audio, target_sr, orig_sr)
|
39 |
+
|
40 |
+
# Convert back to int16 for saving
|
41 |
+
resampled_int16 = (resampled * 32767).astype(np.int16)
|
42 |
+
|
43 |
+
# Save output
|
44 |
+
wavfile.write(output_path, target_sr, resampled_int16)
|
45 |
+
|
46 |
def chunk_text(text, max_chars=135):
|
47 |
|
48 |
# print(text)
|
|
|
160 |
|
161 |
show_info("Converting audio...")
|
162 |
|
163 |
+
resample_to_24khz(ref_audio_orig, ref_audio_orig_converted)
|
|
|
|
|
164 |
|
165 |
ref_audio_orig = ref_audio_orig_converted
|
166 |
|