Spaces:

fahadqazi
/

Sindhi-Text-to-Speech

Running

App Files Files Community

fahadqazi commited on Mar 28

Commit

462ab9a

verified ·

1 Parent(s): a43b53b

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -25

app.py CHANGED Viewed

@@ -144,7 +144,7 @@ def text_to_speech(text, audio_file=None):
     segments = [x.strip() for x in segments]
     print("segments: ", segments)
     # Merge back the ellipsis with previous segment
     combined_segments = []
     temp_segment = ""
@@ -166,48 +166,46 @@ def text_to_speech(text, audio_file=None):
     print("combined_segments: ", combined_segments)
-    # Silence lengths (50ms for ".", "\n", 150ms for "...")
     short_silence = np.zeros(int(22050 * 0.05), dtype=np.int16)  # 50ms pause
     long_silence = np.zeros(int(22050 * 0.15), dtype=np.int16)   # 150ms pause for "..."
     # Create a temporary directory for storing individual segment WAV files
     temp_dir = tempfile.mkdtemp()
-    temp_files = []
     try:
-        # Synthesize and save each segment to a WAV file
-        for i, segment in enumerate(combined_segments):
-            segment_path = os.path.join(temp_dir, f"segment_{i}.wav")
-            with wave.open(segment_path, "wb") as wav_file:
-                voice.synthesize(segment, wav_file, **synthesize_args)
-            temp_files.append(segment_path)
-        # Concatenate all WAV files with appropriate silence
-        combined_audio = np.array([], dtype=np.int16)
-        for i, file_path in enumerate(temp_files):
-            audio_segment, _ = sf.read(file_path, dtype='int16')
-            combined_audio = np.concatenate((combined_audio, audio_segment))
-            # Add silence after each segment
-            segment = combined_segments[i]
-            if segment.endswith("...") or segment.endswith("…"):
-                combined_audio = np.concatenate((combined_audio, long_silence))
-            elif segment.endswith(".") or segment.endswith("\n") or segment.endswith("۔"):
-                combined_audio = np.concatenate((combined_audio, short_silence))
-        # Save the final output to a WAV file
-        output_file = f"{uuid.uuid4()}.wav"
-        sf.write(output_file, combined_audio, 22050)  # Assuming 22050 Hz sample rate
     finally:
         # Clean up the temporary directory
         shutil.rmtree(temp_dir)
-    return output_file
 # def text_to_speech(text, audio_file=None):
 #     # Normalize the input text

     segments = [x.strip() for x in segments]
     print("segments: ", segments)
     # Merge back the ellipsis with previous segment
     combined_segments = []
     temp_segment = ""
     print("combined_segments: ", combined_segments)
+    # Silence lengths (50ms for '.', '\n', '۔', 150ms for '...')
     short_silence = np.zeros(int(22050 * 0.05), dtype=np.int16)  # 50ms pause
     long_silence = np.zeros(int(22050 * 0.15), dtype=np.int16)   # 150ms pause for "..."
     # Create a temporary directory for storing individual segment WAV files
     temp_dir = tempfile.mkdtemp()
     try:
+        output_file = f"{uuid.uuid4()}.wav"
+        # Open the final output WAV file
+        with sf.SoundFile(output_file, 'w', samplerate=22050, channels=1, subtype='PCM_16') as output:
+            # Synthesize and save each segment to a WAV file
+            for i, segment in enumerate(combined_segments):
+                segment_path = os.path.join(temp_dir, f"segment_{i}.wav")
+                with wave.open(segment_path, "wb") as wav_file:
+                    voice.synthesize(segment, wav_file, **synthesize_args)
+                # Read the segment and write it to the final output
+                audio_segment, _ = sf.read(segment_path, dtype='int16')
+                output.write(audio_segment)
+                # Stream the current progress
+                yield output_file
+                # Add silence after each segment
+                if segment.endswith("...") or segment.endswith("…"):
+                    output.write(long_silence)
+                elif segment.endswith(".") or segment.endswith("\n") or segment.endswith("۔"):
+                    output.write(short_silence)
     finally:
         # Clean up the temporary directory
         shutil.rmtree(temp_dir)
+    # Return the final WAV file
+    yield output_file
 # def text_to_speech(text, audio_file=None):
 #     # Normalize the input text