Update app.py
Browse files
app.py
CHANGED
@@ -103,51 +103,25 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
|
|
103 |
audio = AudioSegment.from_mp3(audio_path)
|
104 |
audio_duration_ms = len(audio)
|
105 |
#print(f"Generated audio duration: {audio_duration_ms}ms, Target duration: {target_duration_ms}ms") # Debug
|
106 |
-
Offtext = """
|
107 |
-
if audio_duration_ms > target_duration_ms and target_duration_ms > 0:
|
108 |
-
speed_factor = (audio_duration_ms / target_duration_ms) * speed_adjustment_factor
|
109 |
-
#print(f"Speed factor (after user adjustment): {speed_factor}") # Debug
|
110 |
-
if speed_factor > 0:
|
111 |
-
if speed_factor < 1.0:
|
112 |
-
speed_factor = 1.0
|
113 |
-
y, sr = librosa.load(audio_path, sr=None)
|
114 |
-
y_stretched = librosa.effects.time_stretch(y, rate=speed_factor)
|
115 |
-
sf.write(audio_path, y_stretched, sr)
|
116 |
-
"""
|
117 |
|
118 |
if audio_duration_ms > target_duration_ms and target_duration_ms > 0:
|
119 |
speed_factor = (audio_duration_ms / target_duration_ms) * speed_adjustment_factor
|
|
|
120 |
if speed_factor > 0:
|
121 |
if speed_factor < 1.0:
|
122 |
speed_factor = 1.0
|
123 |
-
|
124 |
-
# Load the audio file
|
125 |
y, sr = librosa.load(audio_path, sr=None)
|
126 |
-
|
127 |
-
# Check if audio loading was successful
|
128 |
-
if y is None or sr is None:
|
129 |
-
raise ValueError(f"Error loading audio file: {audio_path}")
|
130 |
|
131 |
-
#
|
132 |
-
hop_length = 512 #
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
#
|
140 |
-
|
141 |
-
# Check if length is properly handled, based on speed_factor
|
142 |
-
if speed_factor < 1:
|
143 |
-
y_stretched = librosa.istft(phase_vocoder_output, hop_length=hop_length, length=len(y))
|
144 |
-
else:
|
145 |
-
y_stretched = librosa.istft(phase_vocoder_output, hop_length=hop_length)
|
146 |
-
except Exception as e:
|
147 |
-
raise ValueError(f"Error during istft: {e}")
|
148 |
-
|
149 |
-
# Save the time-stretched audio to the file
|
150 |
-
sf.write(audio_path, y_stretched, sr)
|
151 |
|
152 |
else:
|
153 |
print("Generated audio is not longer than target duration, no speed adjustment.") # Debug
|
|
|
103 |
audio = AudioSegment.from_mp3(audio_path)
|
104 |
audio_duration_ms = len(audio)
|
105 |
#print(f"Generated audio duration: {audio_duration_ms}ms, Target duration: {target_duration_ms}ms") # Debug
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
|
107 |
if audio_duration_ms > target_duration_ms and target_duration_ms > 0:
|
108 |
speed_factor = (audio_duration_ms / target_duration_ms) * speed_adjustment_factor
|
109 |
+
#print(f"Speed factor (after user adjustment): {speed_factor}") # Debug
|
110 |
if speed_factor > 0:
|
111 |
if speed_factor < 1.0:
|
112 |
speed_factor = 1.0
|
|
|
|
|
113 |
y, sr = librosa.load(audio_path, sr=None)
|
|
|
|
|
|
|
|
|
114 |
|
115 |
+
# Apply phase vocoder to stretch the audio
|
116 |
+
hop_length = 512 # Hop length, you can experiment with this
|
117 |
+
y_stretched = librosa.phase_vocoder(y, rate=speed_factor, hop_length=hop_length)
|
118 |
+
# Reconstruct the audio using ISTFT
|
119 |
+
y_reconstructed = librosa.istft(y_stretched, hop_length=hop_length)
|
120 |
+
# Save the stretched audio back to a file
|
121 |
+
sf.write(audio_path, y_reconstructed, sr)
|
122 |
+
|
123 |
+
#y_stretched = librosa.effects.time_stretch(y, rate=speed_factor)
|
124 |
+
#sf.write(audio_path, y_stretched, sr)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
else:
|
127 |
print("Generated audio is not longer than target duration, no speed adjustment.") # Debug
|