fahadqazi commited on
Commit
462ab9a
·
verified ·
1 Parent(s): a43b53b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -25
app.py CHANGED
@@ -144,7 +144,7 @@ def text_to_speech(text, audio_file=None):
144
  segments = [x.strip() for x in segments]
145
 
146
  print("segments: ", segments)
147
-
148
  # Merge back the ellipsis with previous segment
149
  combined_segments = []
150
  temp_segment = ""
@@ -166,48 +166,46 @@ def text_to_speech(text, audio_file=None):
166
 
167
  print("combined_segments: ", combined_segments)
168
 
169
- # Silence lengths (50ms for ".", "\n", 150ms for "...")
170
  short_silence = np.zeros(int(22050 * 0.05), dtype=np.int16) # 50ms pause
171
  long_silence = np.zeros(int(22050 * 0.15), dtype=np.int16) # 150ms pause for "..."
172
 
173
  # Create a temporary directory for storing individual segment WAV files
174
  temp_dir = tempfile.mkdtemp()
175
- temp_files = []
176
 
177
  try:
178
- # Synthesize and save each segment to a WAV file
179
- for i, segment in enumerate(combined_segments):
180
- segment_path = os.path.join(temp_dir, f"segment_{i}.wav")
181
 
182
- with wave.open(segment_path, "wb") as wav_file:
183
- voice.synthesize(segment, wav_file, **synthesize_args)
184
 
185
- temp_files.append(segment_path)
 
 
186
 
187
- # Concatenate all WAV files with appropriate silence
188
- combined_audio = np.array([], dtype=np.int16)
189
 
190
- for i, file_path in enumerate(temp_files):
191
- audio_segment, _ = sf.read(file_path, dtype='int16')
192
- combined_audio = np.concatenate((combined_audio, audio_segment))
193
 
194
- # Add silence after each segment
195
- segment = combined_segments[i]
196
- if segment.endswith("...") or segment.endswith("…"):
197
- combined_audio = np.concatenate((combined_audio, long_silence))
198
- elif segment.endswith(".") or segment.endswith("\n") or segment.endswith("۔"):
199
- combined_audio = np.concatenate((combined_audio, short_silence))
200
 
201
- # Save the final output to a WAV file
202
- output_file = f"{uuid.uuid4()}.wav"
203
- sf.write(output_file, combined_audio, 22050) # Assuming 22050 Hz sample rate
 
 
204
 
205
  finally:
206
  # Clean up the temporary directory
207
  shutil.rmtree(temp_dir)
208
 
209
-
210
- return output_file
 
211
 
212
  # def text_to_speech(text, audio_file=None):
213
  # # Normalize the input text
 
144
  segments = [x.strip() for x in segments]
145
 
146
  print("segments: ", segments)
147
+
148
  # Merge back the ellipsis with previous segment
149
  combined_segments = []
150
  temp_segment = ""
 
166
 
167
  print("combined_segments: ", combined_segments)
168
 
169
+ # Silence lengths (50ms for '.', '\n', '۔', 150ms for '...')
170
  short_silence = np.zeros(int(22050 * 0.05), dtype=np.int16) # 50ms pause
171
  long_silence = np.zeros(int(22050 * 0.15), dtype=np.int16) # 150ms pause for "..."
172
 
173
  # Create a temporary directory for storing individual segment WAV files
174
  temp_dir = tempfile.mkdtemp()
 
175
 
176
  try:
177
+ output_file = f"{uuid.uuid4()}.wav"
 
 
178
 
179
+ # Open the final output WAV file
180
+ with sf.SoundFile(output_file, 'w', samplerate=22050, channels=1, subtype='PCM_16') as output:
181
 
182
+ # Synthesize and save each segment to a WAV file
183
+ for i, segment in enumerate(combined_segments):
184
+ segment_path = os.path.join(temp_dir, f"segment_{i}.wav")
185
 
186
+ with wave.open(segment_path, "wb") as wav_file:
187
+ voice.synthesize(segment, wav_file, **synthesize_args)
188
 
189
+ # Read the segment and write it to the final output
190
+ audio_segment, _ = sf.read(segment_path, dtype='int16')
191
+ output.write(audio_segment)
192
 
193
+ # Stream the current progress
194
+ yield output_file
 
 
 
 
195
 
196
+ # Add silence after each segment
197
+ if segment.endswith("...") or segment.endswith("…"):
198
+ output.write(long_silence)
199
+ elif segment.endswith(".") or segment.endswith("\n") or segment.endswith("۔"):
200
+ output.write(short_silence)
201
 
202
  finally:
203
  # Clean up the temporary directory
204
  shutil.rmtree(temp_dir)
205
 
206
+ # Return the final WAV file
207
+ yield output_file
208
+
209
 
210
  # def text_to_speech(text, audio_file=None):
211
  # # Normalize the input text