Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -144,7 +144,7 @@ def text_to_speech(text, audio_file=None):
|
|
144 |
segments = [x.strip() for x in segments]
|
145 |
|
146 |
print("segments: ", segments)
|
147 |
-
|
148 |
# Merge back the ellipsis with previous segment
|
149 |
combined_segments = []
|
150 |
temp_segment = ""
|
@@ -166,48 +166,46 @@ def text_to_speech(text, audio_file=None):
|
|
166 |
|
167 |
print("combined_segments: ", combined_segments)
|
168 |
|
169 |
-
# Silence lengths (50ms for
|
170 |
short_silence = np.zeros(int(22050 * 0.05), dtype=np.int16) # 50ms pause
|
171 |
long_silence = np.zeros(int(22050 * 0.15), dtype=np.int16) # 150ms pause for "..."
|
172 |
|
173 |
# Create a temporary directory for storing individual segment WAV files
|
174 |
temp_dir = tempfile.mkdtemp()
|
175 |
-
temp_files = []
|
176 |
|
177 |
try:
|
178 |
-
|
179 |
-
for i, segment in enumerate(combined_segments):
|
180 |
-
segment_path = os.path.join(temp_dir, f"segment_{i}.wav")
|
181 |
|
182 |
-
|
183 |
-
|
184 |
|
185 |
-
|
|
|
|
|
186 |
|
187 |
-
|
188 |
-
|
189 |
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
|
194 |
-
|
195 |
-
|
196 |
-
if segment.endswith("...") or segment.endswith("…"):
|
197 |
-
combined_audio = np.concatenate((combined_audio, long_silence))
|
198 |
-
elif segment.endswith(".") or segment.endswith("\n") or segment.endswith("۔"):
|
199 |
-
combined_audio = np.concatenate((combined_audio, short_silence))
|
200 |
|
201 |
-
|
202 |
-
|
203 |
-
|
|
|
|
|
204 |
|
205 |
finally:
|
206 |
# Clean up the temporary directory
|
207 |
shutil.rmtree(temp_dir)
|
208 |
|
209 |
-
|
210 |
-
|
|
|
211 |
|
212 |
# def text_to_speech(text, audio_file=None):
|
213 |
# # Normalize the input text
|
|
|
144 |
segments = [x.strip() for x in segments]
|
145 |
|
146 |
print("segments: ", segments)
|
147 |
+
|
148 |
# Merge back the ellipsis with previous segment
|
149 |
combined_segments = []
|
150 |
temp_segment = ""
|
|
|
166 |
|
167 |
print("combined_segments: ", combined_segments)
|
168 |
|
169 |
+
# Silence lengths (50ms for '.', '\n', '۔', 150ms for '...')
|
170 |
short_silence = np.zeros(int(22050 * 0.05), dtype=np.int16) # 50ms pause
|
171 |
long_silence = np.zeros(int(22050 * 0.15), dtype=np.int16) # 150ms pause for "..."
|
172 |
|
173 |
# Create a temporary directory for storing individual segment WAV files
|
174 |
temp_dir = tempfile.mkdtemp()
|
|
|
175 |
|
176 |
try:
|
177 |
+
output_file = f"{uuid.uuid4()}.wav"
|
|
|
|
|
178 |
|
179 |
+
# Open the final output WAV file
|
180 |
+
with sf.SoundFile(output_file, 'w', samplerate=22050, channels=1, subtype='PCM_16') as output:
|
181 |
|
182 |
+
# Synthesize and save each segment to a WAV file
|
183 |
+
for i, segment in enumerate(combined_segments):
|
184 |
+
segment_path = os.path.join(temp_dir, f"segment_{i}.wav")
|
185 |
|
186 |
+
with wave.open(segment_path, "wb") as wav_file:
|
187 |
+
voice.synthesize(segment, wav_file, **synthesize_args)
|
188 |
|
189 |
+
# Read the segment and write it to the final output
|
190 |
+
audio_segment, _ = sf.read(segment_path, dtype='int16')
|
191 |
+
output.write(audio_segment)
|
192 |
|
193 |
+
# Stream the current progress
|
194 |
+
yield output_file
|
|
|
|
|
|
|
|
|
195 |
|
196 |
+
# Add silence after each segment
|
197 |
+
if segment.endswith("...") or segment.endswith("…"):
|
198 |
+
output.write(long_silence)
|
199 |
+
elif segment.endswith(".") or segment.endswith("\n") or segment.endswith("۔"):
|
200 |
+
output.write(short_silence)
|
201 |
|
202 |
finally:
|
203 |
# Clean up the temporary directory
|
204 |
shutil.rmtree(temp_dir)
|
205 |
|
206 |
+
# Return the final WAV file
|
207 |
+
yield output_file
|
208 |
+
|
209 |
|
210 |
# def text_to_speech(text, audio_file=None):
|
211 |
# # Normalize the input text
|