NGHIA_Test_Edge_TTS_transcript_w_timestamp

Sleeping

App Files Files Community

cnph001 commited on May 11

Commit

d358db3

verified ·

1 Parent(s): 26d60e8

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -2

app.py CHANGED Viewed

@@ -120,15 +120,33 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
                     if speed_factor > 0:
                         if speed_factor < 1.0:
                             speed_factor = 1.0
                         y, sr = librosa.load(audio_path, sr=None)
                         # Use the phase vocoder for time stretching without pitch change
                         hop_length = 512  # You can adjust this parameter
                         phase_vocoder_output = librosa.phase_vocoder(y, rate=speed_factor, hop_length=hop_length)
-                        # Reconstruct the audio signal from the phase vocoder output
-                        y_stretched = librosa.istft(phase_vocoder_output, hop_length=hop_length, length=len(y) if speed_factor < 1 else None)
                         sf.write(audio_path, y_stretched, sr)
                 else:

                     if speed_factor > 0:
                         if speed_factor < 1.0:
                             speed_factor = 1.0
+                        # Load the audio file
                         y, sr = librosa.load(audio_path, sr=None)
+                        # Check if audio loading was successful
+                        if y is None or sr is None:
+                            raise ValueError(f"Error loading audio file: {audio_path}")
                         # Use the phase vocoder for time stretching without pitch change
                         hop_length = 512  # You can adjust this parameter
                         phase_vocoder_output = librosa.phase_vocoder(y, rate=speed_factor, hop_length=hop_length)
+                        # Check the shape of the phase vocoder output
+                        if phase_vocoder_output is None or len(phase_vocoder_output) == 0:
+                            raise ValueError("Phase vocoder output is empty or None.")
+                        # Reconstruct the audio signal from the phase vocoder output
+                        try:
+                            # Check if length is properly handled, based on speed_factor
+                            if speed_factor < 1:
+                                y_stretched = librosa.istft(phase_vocoder_output, hop_length=hop_length, length=len(y))
+                            else:
+                                y_stretched = librosa.istft(phase_vocoder_output, hop_length=hop_length)
+                        except Exception as e:
+                            raise ValueError(f"Error during istft: {e}")
+                        # Save the time-stretched audio to the file
                         sf.write(audio_path, y_stretched, sr)
                 else: