cnph001 commited on
Commit
ed3d5af
·
verified ·
1 Parent(s): d358db3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -37
app.py CHANGED
@@ -103,51 +103,25 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
103
  audio = AudioSegment.from_mp3(audio_path)
104
  audio_duration_ms = len(audio)
105
  #print(f"Generated audio duration: {audio_duration_ms}ms, Target duration: {target_duration_ms}ms") # Debug
106
- Offtext = """
107
- if audio_duration_ms > target_duration_ms and target_duration_ms > 0:
108
- speed_factor = (audio_duration_ms / target_duration_ms) * speed_adjustment_factor
109
- #print(f"Speed factor (after user adjustment): {speed_factor}") # Debug
110
- if speed_factor > 0:
111
- if speed_factor < 1.0:
112
- speed_factor = 1.0
113
- y, sr = librosa.load(audio_path, sr=None)
114
- y_stretched = librosa.effects.time_stretch(y, rate=speed_factor)
115
- sf.write(audio_path, y_stretched, sr)
116
- """
117
 
118
  if audio_duration_ms > target_duration_ms and target_duration_ms > 0:
119
  speed_factor = (audio_duration_ms / target_duration_ms) * speed_adjustment_factor
 
120
  if speed_factor > 0:
121
  if speed_factor < 1.0:
122
  speed_factor = 1.0
123
-
124
- # Load the audio file
125
  y, sr = librosa.load(audio_path, sr=None)
126
-
127
- # Check if audio loading was successful
128
- if y is None or sr is None:
129
- raise ValueError(f"Error loading audio file: {audio_path}")
130
 
131
- # Use the phase vocoder for time stretching without pitch change
132
- hop_length = 512 # You can adjust this parameter
133
- phase_vocoder_output = librosa.phase_vocoder(y, rate=speed_factor, hop_length=hop_length)
134
-
135
- # Check the shape of the phase vocoder output
136
- if phase_vocoder_output is None or len(phase_vocoder_output) == 0:
137
- raise ValueError("Phase vocoder output is empty or None.")
138
-
139
- # Reconstruct the audio signal from the phase vocoder output
140
- try:
141
- # Check if length is properly handled, based on speed_factor
142
- if speed_factor < 1:
143
- y_stretched = librosa.istft(phase_vocoder_output, hop_length=hop_length, length=len(y))
144
- else:
145
- y_stretched = librosa.istft(phase_vocoder_output, hop_length=hop_length)
146
- except Exception as e:
147
- raise ValueError(f"Error during istft: {e}")
148
-
149
- # Save the time-stretched audio to the file
150
- sf.write(audio_path, y_stretched, sr)
151
 
152
  else:
153
  print("Generated audio is not longer than target duration, no speed adjustment.") # Debug
 
103
  audio = AudioSegment.from_mp3(audio_path)
104
  audio_duration_ms = len(audio)
105
  #print(f"Generated audio duration: {audio_duration_ms}ms, Target duration: {target_duration_ms}ms") # Debug
 
 
 
 
 
 
 
 
 
 
 
106
 
107
  if audio_duration_ms > target_duration_ms and target_duration_ms > 0:
108
  speed_factor = (audio_duration_ms / target_duration_ms) * speed_adjustment_factor
109
+ #print(f"Speed factor (after user adjustment): {speed_factor}") # Debug
110
  if speed_factor > 0:
111
  if speed_factor < 1.0:
112
  speed_factor = 1.0
 
 
113
  y, sr = librosa.load(audio_path, sr=None)
 
 
 
 
114
 
115
+ # Apply phase vocoder to stretch the audio
116
+ hop_length = 512 # Hop length, you can experiment with this
117
+ y_stretched = librosa.phase_vocoder(y, rate=speed_factor, hop_length=hop_length)
118
+ # Reconstruct the audio using ISTFT
119
+ y_reconstructed = librosa.istft(y_stretched, hop_length=hop_length)
120
+ # Save the stretched audio back to a file
121
+ sf.write(audio_path, y_reconstructed, sr)
122
+
123
+ #y_stretched = librosa.effects.time_stretch(y, rate=speed_factor)
124
+ #sf.write(audio_path, y_stretched, sr)
 
 
 
 
 
 
 
 
 
 
125
 
126
  else:
127
  print("Generated audio is not longer than target duration, no speed adjustment.") # Debug