hynt commited on
Commit
0ef75b8
·
verified ·
1 Parent(s): e11fe04

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +32 -3
utils.py CHANGED
@@ -8,10 +8,41 @@ import re
8
  import torch
9
  import numpy as np
10
  import os
 
 
11
 
12
  _ref_audio_cache = {}
13
  asr_pipe = None
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  def chunk_text(text, max_chars=135):
16
 
17
  # print(text)
@@ -129,9 +160,7 @@ def preprocess_ref_audio_text(ref_audio_orig, ref_text, clip_short=True, show_in
129
 
130
  show_info("Converting audio...")
131
 
132
- ref_audio_orig_converted = ref_audio_orig.replace(".wav", "_24k.wav").replace(".mp3", "_24k.mp3").replace(".m4a", "_24k.m4a").replace(".flac", "_24k.flac")
133
-
134
- os.system("sox " + ref_audio_orig + " -r 24000 -c 1 " + ref_audio_orig_converted)
135
 
136
  ref_audio_orig = ref_audio_orig_converted
137
 
 
8
  import torch
9
  import numpy as np
10
  import os
11
+ from scipy.io import wavfile
12
+ from scipy.signal import resample_poly
13
 
14
  _ref_audio_cache = {}
15
  asr_pipe = None
16
 
17
+ def resample_to_24khz(input_path: str, output_path: str):
18
+ """
19
+ Resample WAV audio file to 24,000 Hz using scipy.
20
+
21
+ Parameters:
22
+ - input_path (str): Path to the input WAV file.
23
+ - output_path (str): Path to save the output WAV file.
24
+ """
25
+ # Load WAV file
26
+ orig_sr, audio = wavfile.read(input_path)
27
+
28
+ # Convert to mono if stereo
29
+ if len(audio.shape) == 2:
30
+ audio = audio.mean(axis=1)
31
+
32
+ # Convert to float32 for processing
33
+ if audio.dtype != np.float32:
34
+ audio = audio.astype(np.float32) / np.iinfo(audio.dtype).max
35
+
36
+ # Resample
37
+ target_sr = 24000
38
+ resampled = resample_poly(audio, target_sr, orig_sr)
39
+
40
+ # Convert back to int16 for saving
41
+ resampled_int16 = (resampled * 32767).astype(np.int16)
42
+
43
+ # Save output
44
+ wavfile.write(output_path, target_sr, resampled_int16)
45
+
46
  def chunk_text(text, max_chars=135):
47
 
48
  # print(text)
 
160
 
161
  show_info("Converting audio...")
162
 
163
+ resample_to_24khz(ref_audio_orig, ref_audio_orig_converted)
 
 
164
 
165
  ref_audio_orig = ref_audio_orig_converted
166