Update app.py
Browse filesattempt to fix voice distortion when stretch is applied
app.py
CHANGED
@@ -102,7 +102,7 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
|
|
102 |
if target_duration_ms is not None and os.path.exists(audio_path):
|
103 |
audio = AudioSegment.from_mp3(audio_path)
|
104 |
audio_duration_ms = len(audio)
|
105 |
-
|
106 |
if audio_duration_ms > target_duration_ms and target_duration_ms > 0:
|
107 |
speed_factor = (audio_duration_ms / target_duration_ms) * speed_adjustment_factor
|
108 |
#print(f"Speed factor (after user adjustment): {speed_factor}") # Debug
|
@@ -111,7 +111,24 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
|
|
111 |
speed_factor = 1.0
|
112 |
y, sr = librosa.load(audio_path, sr=None)
|
113 |
y_stretched = librosa.effects.time_stretch(y, rate=speed_factor)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
sf.write(audio_path, y_stretched, sr)
|
|
|
115 |
else:
|
116 |
print("Generated audio is not longer than target duration, no speed adjustment.") # Debug
|
117 |
return audio_path
|
|
|
102 |
if target_duration_ms is not None and os.path.exists(audio_path):
|
103 |
audio = AudioSegment.from_mp3(audio_path)
|
104 |
audio_duration_ms = len(audio)
|
105 |
+
""" #print(f"Generated audio duration: {audio_duration_ms}ms, Target duration: {target_duration_ms}ms") # Debug
|
106 |
if audio_duration_ms > target_duration_ms and target_duration_ms > 0:
|
107 |
speed_factor = (audio_duration_ms / target_duration_ms) * speed_adjustment_factor
|
108 |
#print(f"Speed factor (after user adjustment): {speed_factor}") # Debug
|
|
|
111 |
speed_factor = 1.0
|
112 |
y, sr = librosa.load(audio_path, sr=None)
|
113 |
y_stretched = librosa.effects.time_stretch(y, rate=speed_factor)
|
114 |
+
sf.write(audio_path, y_stretched, sr) """
|
115 |
+
|
116 |
+
if audio_duration_ms > target_duration_ms and target_duration_ms > 0:
|
117 |
+
speed_factor = (audio_duration_ms / target_duration_ms) * speed_adjustment_factor
|
118 |
+
if speed_factor > 0:
|
119 |
+
if speed_factor < 1.0:
|
120 |
+
speed_factor = 1.0
|
121 |
+
y, sr = librosa.load(audio_path, sr=None)
|
122 |
+
|
123 |
+
# Use the phase vocoder for time stretching without pitch change
|
124 |
+
hop_length = 512 # You can adjust this parameter
|
125 |
+
phase_vocoder_output = librosa.phase_vocoder(y, rate=speed_factor, hop_length=hop_length)
|
126 |
+
|
127 |
+
# Reconstruct the audio signal from the phase vocoder output
|
128 |
+
y_stretched = librosa.istft(phase_vocoder_output, hop_length=hop_length, length=len(y) if speed_factor < 1 else None)
|
129 |
+
|
130 |
sf.write(audio_path, y_stretched, sr)
|
131 |
+
|
132 |
else:
|
133 |
print("Generated audio is not longer than target duration, no speed adjustment.") # Debug
|
134 |
return audio_path
|