Spaces:

fahadqazi
/

Sindhi-Text-to-Speech

Running

fahadqazi commited on Mar 28

Commit

0b3b1b2

verified ·

1 Parent(s): 5fc9763

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -172,26 +172,30 @@ def text_to_speech(text, audio_file=None):
     combined_audio = np.array([], dtype=np.int16)
     for segment in combined_segments:
-        if segment.strip() == "":
-            continue
-        with io.BytesIO() as buffer:
-            voice.synthesize(segment, buffer, **synthesize_args)
-            buffer.seek(0)
-            audio_segment, _ = sf.read(buffer, dtype='int16')
         combined_audio = np.concatenate((combined_audio, audio_segment))
-        # Add appropriate silence
-        if segment.endswith("..."):
             combined_audio = np.concatenate((combined_audio, long_silence))
-        else:
             combined_audio = np.concatenate((combined_audio, short_silence))
     # Save the final output to a WAV file
     output_file = f"{uuid.uuid4()}.wav"
     sf.write(output_file, combined_audio, 22050)  # Assuming 22050 Hz sample rate
     return output_file
 # def text_to_speech(text, audio_file=None):

     combined_audio = np.array([], dtype=np.int16)
     for segment in combined_segments:
+        # Create a BytesIO buffer to store synthesized speech
+        buffer = io.BytesIO()
+        # Use wave.open() to provide a compatible object
+        with wave.open(buffer, "wb") as wav_file:
+            voice.synthesize(segment, wav_file, **synthesize_args)
+        # Read the synthesized speech from the buffer
+        buffer.seek(0)
+        audio_segment, _ = sf.read(buffer, dtype='int16')
+        # Append synthesized audio
         combined_audio = np.concatenate((combined_audio, audio_segment))
+        # Add appropriate silence after each segment
+        if segment.endswith("...") or segment.endswith("…"):
             combined_audio = np.concatenate((combined_audio, long_silence))
+        elif segment.endswith(".") or segment.endswith("\n"):
             combined_audio = np.concatenate((combined_audio, short_silence))
     # Save the final output to a WAV file
     output_file = f"{uuid.uuid4()}.wav"
     sf.write(output_file, combined_audio, 22050)  # Assuming 22050 Hz sample rate
     return output_file
 # def text_to_speech(text, audio_file=None):