cnph001 commited on
Commit
ad48cb2
·
verified ·
1 Parent(s): 2899f8f

Update app.py

Browse files

attempt to fix voice distortion when stretch is applied

Files changed (1) hide show
  1. app.py +18 -1
app.py CHANGED
@@ -102,7 +102,7 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
102
  if target_duration_ms is not None and os.path.exists(audio_path):
103
  audio = AudioSegment.from_mp3(audio_path)
104
  audio_duration_ms = len(audio)
105
- #print(f"Generated audio duration: {audio_duration_ms}ms, Target duration: {target_duration_ms}ms") # Debug
106
  if audio_duration_ms > target_duration_ms and target_duration_ms > 0:
107
  speed_factor = (audio_duration_ms / target_duration_ms) * speed_adjustment_factor
108
  #print(f"Speed factor (after user adjustment): {speed_factor}") # Debug
@@ -111,7 +111,24 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
111
  speed_factor = 1.0
112
  y, sr = librosa.load(audio_path, sr=None)
113
  y_stretched = librosa.effects.time_stretch(y, rate=speed_factor)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  sf.write(audio_path, y_stretched, sr)
 
115
  else:
116
  print("Generated audio is not longer than target duration, no speed adjustment.") # Debug
117
  return audio_path
 
102
  if target_duration_ms is not None and os.path.exists(audio_path):
103
  audio = AudioSegment.from_mp3(audio_path)
104
  audio_duration_ms = len(audio)
105
+ """ #print(f"Generated audio duration: {audio_duration_ms}ms, Target duration: {target_duration_ms}ms") # Debug
106
  if audio_duration_ms > target_duration_ms and target_duration_ms > 0:
107
  speed_factor = (audio_duration_ms / target_duration_ms) * speed_adjustment_factor
108
  #print(f"Speed factor (after user adjustment): {speed_factor}") # Debug
 
111
  speed_factor = 1.0
112
  y, sr = librosa.load(audio_path, sr=None)
113
  y_stretched = librosa.effects.time_stretch(y, rate=speed_factor)
114
+ sf.write(audio_path, y_stretched, sr) """
115
+
116
+ if audio_duration_ms > target_duration_ms and target_duration_ms > 0:
117
+ speed_factor = (audio_duration_ms / target_duration_ms) * speed_adjustment_factor
118
+ if speed_factor > 0:
119
+ if speed_factor < 1.0:
120
+ speed_factor = 1.0
121
+ y, sr = librosa.load(audio_path, sr=None)
122
+
123
+ # Use the phase vocoder for time stretching without pitch change
124
+ hop_length = 512 # You can adjust this parameter
125
+ phase_vocoder_output = librosa.phase_vocoder(y, rate=speed_factor, hop_length=hop_length)
126
+
127
+ # Reconstruct the audio signal from the phase vocoder output
128
+ y_stretched = librosa.istft(phase_vocoder_output, hop_length=hop_length, length=len(y) if speed_factor < 1 else None)
129
+
130
  sf.write(audio_path, y_stretched, sr)
131
+
132
  else:
133
  print("Generated audio is not longer than target duration, no speed adjustment.") # Debug
134
  return audio_path