Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -164,38 +164,49 @@ def text_to_speech(text, audio_file=None):
|
|
164 |
if temp_segment:
|
165 |
combined_segments.append(temp_segment.strip())
|
166 |
|
167 |
-
|
168 |
-
short_silence = np.zeros(int(22050 * 0.05), dtype=np.int16) # 50ms for normal pause
|
169 |
-
long_silence = np.zeros(int(22050 * 0.15), dtype=np.int16) # 150ms for "..."
|
170 |
|
171 |
-
#
|
172 |
-
|
|
|
173 |
|
174 |
-
for segment
|
175 |
-
|
176 |
-
|
177 |
|
178 |
-
|
179 |
-
|
180 |
-
|
|
|
181 |
|
182 |
-
|
183 |
-
|
184 |
-
audio_segment, _ = sf.read(buffer, dtype='int16')
|
185 |
|
186 |
-
|
187 |
-
combined_audio = np.concatenate((combined_audio, audio_segment))
|
188 |
|
189 |
-
#
|
190 |
-
|
191 |
-
combined_audio = np.concatenate((combined_audio, long_silence))
|
192 |
-
elif segment.endswith(".") or segment.endswith("\n"):
|
193 |
-
combined_audio = np.concatenate((combined_audio, short_silence))
|
194 |
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
return output_file
|
200 |
|
201 |
# def text_to_speech(text, audio_file=None):
|
|
|
164 |
if temp_segment:
|
165 |
combined_segments.append(temp_segment.strip())
|
166 |
|
167 |
+
print(combined_segments)
|
|
|
|
|
168 |
|
169 |
+
# Silence lengths (50ms for ".", "\n", 150ms for "...")
|
170 |
+
short_silence = np.zeros(int(22050 * 0.05), dtype=np.int16) # 50ms pause
|
171 |
+
long_silence = np.zeros(int(22050 * 0.15), dtype=np.int16) # 150ms pause for "..."
|
172 |
|
173 |
+
# Create a temporary directory for storing individual segment WAV files
|
174 |
+
temp_dir = tempfile.mkdtemp()
|
175 |
+
temp_files = []
|
176 |
|
177 |
+
try:
|
178 |
+
# Synthesize and save each segment to a WAV file
|
179 |
+
for i, segment in enumerate(combined_segments):
|
180 |
+
segment_path = os.path.join(temp_dir, f"segment_{i}.wav")
|
181 |
|
182 |
+
with wave.open(segment_path, "wb") as wav_file:
|
183 |
+
voice.synthesize(segment, wav_file, **synthesize_args)
|
|
|
184 |
|
185 |
+
temp_files.append(segment_path)
|
|
|
186 |
|
187 |
+
# Concatenate all WAV files with appropriate silence
|
188 |
+
combined_audio = np.array([], dtype=np.int16)
|
|
|
|
|
|
|
189 |
|
190 |
+
for i, file_path in enumerate(temp_files):
|
191 |
+
audio_segment, _ = sf.read(file_path, dtype='int16')
|
192 |
+
combined_audio = np.concatenate((combined_audio, audio_segment))
|
193 |
|
194 |
+
# Add silence after each segment
|
195 |
+
segment = combined_segments[i]
|
196 |
+
if segment.endswith("...") or segment.endswith("…"):
|
197 |
+
combined_audio = np.concatenate((combined_audio, long_silence))
|
198 |
+
elif segment.endswith(".") or segment.endswith("\n"):
|
199 |
+
combined_audio = np.concatenate((combined_audio, short_silence))
|
200 |
+
|
201 |
+
# Save the final output to a WAV file
|
202 |
+
output_file = f"{uuid.uuid4()}.wav"
|
203 |
+
sf.write(output_file, combined_audio, 22050) # Assuming 22050 Hz sample rate
|
204 |
+
|
205 |
+
finally:
|
206 |
+
# Clean up the temporary directory
|
207 |
+
shutil.rmtree(temp_dir)
|
208 |
+
|
209 |
+
|
210 |
return output_file
|
211 |
|
212 |
# def text_to_speech(text, audio_file=None):
|