Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -172,26 +172,30 @@ def text_to_speech(text, audio_file=None):
|
|
172 |
combined_audio = np.array([], dtype=np.int16)
|
173 |
|
174 |
for segment in combined_segments:
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
with io.BytesIO() as buffer:
|
179 |
-
voice.synthesize(segment, buffer, **synthesize_args)
|
180 |
-
buffer.seek(0)
|
181 |
-
audio_segment, _ = sf.read(buffer, dtype='int16')
|
182 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
combined_audio = np.concatenate((combined_audio, audio_segment))
|
184 |
|
185 |
-
# Add appropriate silence
|
186 |
-
if segment.endswith("..."):
|
187 |
combined_audio = np.concatenate((combined_audio, long_silence))
|
188 |
-
|
189 |
combined_audio = np.concatenate((combined_audio, short_silence))
|
190 |
|
191 |
# Save the final output to a WAV file
|
192 |
output_file = f"{uuid.uuid4()}.wav"
|
193 |
sf.write(output_file, combined_audio, 22050) # Assuming 22050 Hz sample rate
|
194 |
-
|
195 |
return output_file
|
196 |
|
197 |
# def text_to_speech(text, audio_file=None):
|
|
|
172 |
combined_audio = np.array([], dtype=np.int16)
|
173 |
|
174 |
for segment in combined_segments:
|
175 |
+
# Create a BytesIO buffer to store synthesized speech
|
176 |
+
buffer = io.BytesIO()
|
|
|
|
|
|
|
|
|
|
|
177 |
|
178 |
+
# Use wave.open() to provide a compatible object
|
179 |
+
with wave.open(buffer, "wb") as wav_file:
|
180 |
+
voice.synthesize(segment, wav_file, **synthesize_args)
|
181 |
+
|
182 |
+
# Read the synthesized speech from the buffer
|
183 |
+
buffer.seek(0)
|
184 |
+
audio_segment, _ = sf.read(buffer, dtype='int16')
|
185 |
+
|
186 |
+
# Append synthesized audio
|
187 |
combined_audio = np.concatenate((combined_audio, audio_segment))
|
188 |
|
189 |
+
# Add appropriate silence after each segment
|
190 |
+
if segment.endswith("...") or segment.endswith("…"):
|
191 |
combined_audio = np.concatenate((combined_audio, long_silence))
|
192 |
+
elif segment.endswith(".") or segment.endswith("\n"):
|
193 |
combined_audio = np.concatenate((combined_audio, short_silence))
|
194 |
|
195 |
# Save the final output to a WAV file
|
196 |
output_file = f"{uuid.uuid4()}.wav"
|
197 |
sf.write(output_file, combined_audio, 22050) # Assuming 22050 Hz sample rate
|
198 |
+
|
199 |
return output_file
|
200 |
|
201 |
# def text_to_speech(text, audio_file=None):
|