fahadqazi commited on
Commit
4dbf45b
·
verified ·
1 Parent(s): 0b3b1b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -24
app.py CHANGED
@@ -164,38 +164,49 @@ def text_to_speech(text, audio_file=None):
164
  if temp_segment:
165
  combined_segments.append(temp_segment.strip())
166
 
167
- # Prepare silences
168
- short_silence = np.zeros(int(22050 * 0.05), dtype=np.int16) # 50ms for normal pause
169
- long_silence = np.zeros(int(22050 * 0.15), dtype=np.int16) # 150ms for "..."
170
 
171
- # Synthesize and concatenate audio
172
- combined_audio = np.array([], dtype=np.int16)
 
173
 
174
- for segment in combined_segments:
175
- # Create a BytesIO buffer to store synthesized speech
176
- buffer = io.BytesIO()
177
 
178
- # Use wave.open() to provide a compatible object
179
- with wave.open(buffer, "wb") as wav_file:
180
- voice.synthesize(segment, wav_file, **synthesize_args)
 
181
 
182
- # Read the synthesized speech from the buffer
183
- buffer.seek(0)
184
- audio_segment, _ = sf.read(buffer, dtype='int16')
185
 
186
- # Append synthesized audio
187
- combined_audio = np.concatenate((combined_audio, audio_segment))
188
 
189
- # Add appropriate silence after each segment
190
- if segment.endswith("...") or segment.endswith("…"):
191
- combined_audio = np.concatenate((combined_audio, long_silence))
192
- elif segment.endswith(".") or segment.endswith("\n"):
193
- combined_audio = np.concatenate((combined_audio, short_silence))
194
 
195
- # Save the final output to a WAV file
196
- output_file = f"{uuid.uuid4()}.wav"
197
- sf.write(output_file, combined_audio, 22050) # Assuming 22050 Hz sample rate
198
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
  return output_file
200
 
201
  # def text_to_speech(text, audio_file=None):
 
164
  if temp_segment:
165
  combined_segments.append(temp_segment.strip())
166
 
167
+ print(combined_segments)
 
 
168
 
169
+ # Silence lengths (50ms for ".", "\n", 150ms for "...")
170
+ short_silence = np.zeros(int(22050 * 0.05), dtype=np.int16) # 50ms pause
171
+ long_silence = np.zeros(int(22050 * 0.15), dtype=np.int16) # 150ms pause for "..."
172
 
173
+ # Create a temporary directory for storing individual segment WAV files
174
+ temp_dir = tempfile.mkdtemp()
175
+ temp_files = []
176
 
177
+ try:
178
+ # Synthesize and save each segment to a WAV file
179
+ for i, segment in enumerate(combined_segments):
180
+ segment_path = os.path.join(temp_dir, f"segment_{i}.wav")
181
 
182
+ with wave.open(segment_path, "wb") as wav_file:
183
+ voice.synthesize(segment, wav_file, **synthesize_args)
 
184
 
185
+ temp_files.append(segment_path)
 
186
 
187
+ # Concatenate all WAV files with appropriate silence
188
+ combined_audio = np.array([], dtype=np.int16)
 
 
 
189
 
190
+ for i, file_path in enumerate(temp_files):
191
+ audio_segment, _ = sf.read(file_path, dtype='int16')
192
+ combined_audio = np.concatenate((combined_audio, audio_segment))
193
 
194
+ # Add silence after each segment
195
+ segment = combined_segments[i]
196
+ if segment.endswith("...") or segment.endswith("…"):
197
+ combined_audio = np.concatenate((combined_audio, long_silence))
198
+ elif segment.endswith(".") or segment.endswith("\n"):
199
+ combined_audio = np.concatenate((combined_audio, short_silence))
200
+
201
+ # Save the final output to a WAV file
202
+ output_file = f"{uuid.uuid4()}.wav"
203
+ sf.write(output_file, combined_audio, 22050) # Assuming 22050 Hz sample rate
204
+
205
+ finally:
206
+ # Clean up the temporary directory
207
+ shutil.rmtree(temp_dir)
208
+
209
+
210
  return output_file
211
 
212
  # def text_to_speech(text, audio_file=None):