cnph001 commited on
Commit
9733dac
·
verified ·
1 Parent(s): d474bed

Update app.py

Browse files

Restore working version without post filter

Files changed (1) hide show
  1. app.py +2 -91
app.py CHANGED
@@ -12,45 +12,6 @@ import soundfile as sf
12
  import numpy as np
13
  from pydub import AudioSegment
14
  from pydub.playback import play
15
- import math
16
- from scipy.signal import butter, sosfiltfilt
17
-
18
-
19
- def apply_low_pass_filter(audio_segment, cutoff_freq, order=6):
20
- """
21
- Applies a low-pass filter to an AudioSegment.
22
-
23
- Args:
24
- audio_segment: The AudioSegment to filter.
25
- cutoff_freq: The cutoff frequency in Hz.
26
- order: The order of the Butterworth filter.
27
-
28
- Returns:
29
- A new AudioSegment with the filtered audio.
30
- """
31
- segment_array = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
32
- frame_rate = audio_segment.frame_rate
33
- nyquist_freq = 0.5 * frame_rate
34
- normalized_cutoff = cutoff_freq / nyquist_freq
35
- sos = butter(order, normalized_cutoff, btype='low', output='sos')
36
- filtered_array = sosfiltfilt(sos, segment_array)
37
-
38
- sample_width = audio_segment.sample_width
39
- dtype = None
40
- if sample_width == 1:
41
- dtype = np.int8
42
- elif sample_width == 2:
43
- dtype = np.int16
44
- elif sample_width == 3:
45
- dtype = np.int32 # Or potentially a custom type depending on the library
46
- elif sample_width == 4:
47
- dtype = np.int32
48
-
49
- if dtype is not None:
50
- return audio_segment._spawn(filtered_array.astype(dtype))
51
- else:
52
- raise ValueError(f"Unsupported sample width: {sample_width}")
53
-
54
 
55
 
56
  def get_silence(duration_ms=1000):
@@ -128,19 +89,7 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
128
  match = re.search(r'([A-Za-z]+)([-]?\d*)', processed_text)
129
  if match:
130
  prefix_pitch = match.group(1)
131
- number_str = match.group(2)
132
- if number_str: # Check if the second group (number part) is not empty
133
- try:
134
- number = int(number_str)
135
- # Now you can use the 'number' variable
136
- print(f"Prefix: {prefix_pitch}, Number: {number}") # Example usage
137
- except ValueError as e:
138
- print(f"Error converting number string to int: {e}")
139
- number = 0 # Or some other default value
140
- else:
141
- number = 0 # Or some other default value if no number is found
142
- print(f"Prefix: {prefix_pitch}, No number found.") # Example handling
143
-
144
  if prefix_pitch in voice_map:
145
  current_pitch += number
146
  #processed_text = re.sub(r'[A-Za-z]+-?\d+', '', processed_text, count=1).strip()
@@ -278,32 +227,18 @@ async def transcript_to_speech(transcript_text, voice, rate, pitch, speed_adjust
278
  os.remove(path)
279
  except FileNotFoundError:
280
  print(f"Warning: Audio file not found: {path}")
281
- Rem1='''
282
- if combined_line_audio and overall_duration_ms is not None and overall_duration_ms > 0 and total_generated_duration_ms > overall_duration_ms:
283
- speed_factor = (total_generated_duration_ms / overall_duration_ms) * speed_adjustment_factor
284
- if speed_factor > 0:
285
- if speed_factor < 1.0:
286
- speed_factor = 1.0
287
- combined_line_audio = combined_line_audio.speedup(playback_speed=speed_factor)
288
 
289
- if combined_line_audio:
290
- timed_audio_segments.append({'start': start_time, 'audio': combined_line_audio})
291
- max_end_time_ms = max(max_end_time_ms, start_time + len(combined_line_audio))
292
- '''
293
  if combined_line_audio and overall_duration_ms is not None and overall_duration_ms > 0 and total_generated_duration_ms > overall_duration_ms:
294
  speed_factor = (total_generated_duration_ms / overall_duration_ms) * speed_adjustment_factor
295
  if speed_factor > 0:
296
  if speed_factor < 1.0:
297
  speed_factor = 1.0
298
  combined_line_audio = combined_line_audio.speedup(playback_speed=speed_factor)
299
- # Apply low-pass filter AFTER speed adjustment
300
- cutoff_freq = 7000.0 # Adjust as needed
301
- combined_line_audio = apply_low_pass_filter(combined_line_audio, cutoff_freq)
302
 
303
  if combined_line_audio:
304
  timed_audio_segments.append({'start': start_time, 'audio': combined_line_audio})
305
  max_end_time_ms = max(max_end_time_ms, start_time + len(combined_line_audio))
306
-
307
  elif audio_paths:
308
  for path in audio_paths:
309
  if path:
@@ -311,38 +246,14 @@ async def transcript_to_speech(transcript_text, voice, rate, pitch, speed_adjust
311
  os.remove(path)
312
  except FileNotFoundError:
313
  pass # Clean up even if no timestamp
314
-
315
-
316
 
317
  if not timed_audio_segments:
318
  return None, "No processable audio segments found."
319
 
320
- oldx= '''
321
  final_audio = AudioSegment.silent(duration=max_end_time_ms, frame_rate=24000)
322
  for segment in timed_audio_segments:
323
  final_audio = final_audio.overlay(segment['audio'], position=segment['start'])
324
- '''
325
- final_audio = AudioSegment.silent(duration=int(max_end_time_ms * 1000 + 500), frame_rate=24000)
326
-
327
- for segment in timed_audio_segments:
328
- start_position_ms = int(segment['start'] * 1000)
329
- audio_to_overlay = segment['audio']
330
-
331
- if start_position_ms + len(audio_to_overlay) > len(final_audio):
332
- padding_needed = (start_position_ms + len(audio_to_overlay)) - len(final_audio)
333
- final_audio += AudioSegment.silent(duration=padding_needed + 100, frame_rate=final_audio.frame_rate)
334
-
335
- try:
336
- final_audio = final_audio.overlay(audio_to_overlay, position=start_position_ms)
337
- except Exception as e:
338
- print(f"Error during overlay: {e}")
339
- print(f" - Start position (ms): {start_position_ms}")
340
- print(f" - Length of audio to overlay (ms): {len(audio_to_overlay)}")
341
- print(f" - Length of final_audio (ms): {len(final_audio)}")
342
- # Consider adding logic here to handle the error, e.g., truncating audio_to_overlay
343
- # or skipping the overlay if it consistently fails.
344
 
345
-
346
  combined_audio_path = tempfile.mktemp(suffix=".mp3")
347
  final_audio.export(combined_audio_path, format="mp3")
348
  return combined_audio_path, None
 
12
  import numpy as np
13
  from pydub import AudioSegment
14
  from pydub.playback import play
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
 
17
  def get_silence(duration_ms=1000):
 
89
  match = re.search(r'([A-Za-z]+)([-]?\d*)', processed_text)
90
  if match:
91
  prefix_pitch = match.group(1)
92
+ number = int(match.group(2))
 
 
 
 
 
 
 
 
 
 
 
 
93
  if prefix_pitch in voice_map:
94
  current_pitch += number
95
  #processed_text = re.sub(r'[A-Za-z]+-?\d+', '', processed_text, count=1).strip()
 
227
  os.remove(path)
228
  except FileNotFoundError:
229
  print(f"Warning: Audio file not found: {path}")
 
 
 
 
 
 
 
230
 
 
 
 
 
231
  if combined_line_audio and overall_duration_ms is not None and overall_duration_ms > 0 and total_generated_duration_ms > overall_duration_ms:
232
  speed_factor = (total_generated_duration_ms / overall_duration_ms) * speed_adjustment_factor
233
  if speed_factor > 0:
234
  if speed_factor < 1.0:
235
  speed_factor = 1.0
236
  combined_line_audio = combined_line_audio.speedup(playback_speed=speed_factor)
 
 
 
237
 
238
  if combined_line_audio:
239
  timed_audio_segments.append({'start': start_time, 'audio': combined_line_audio})
240
  max_end_time_ms = max(max_end_time_ms, start_time + len(combined_line_audio))
241
+
242
  elif audio_paths:
243
  for path in audio_paths:
244
  if path:
 
246
  os.remove(path)
247
  except FileNotFoundError:
248
  pass # Clean up even if no timestamp
 
 
249
 
250
  if not timed_audio_segments:
251
  return None, "No processable audio segments found."
252
 
 
253
  final_audio = AudioSegment.silent(duration=max_end_time_ms, frame_rate=24000)
254
  for segment in timed_audio_segments:
255
  final_audio = final_audio.overlay(segment['audio'], position=segment['start'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
 
 
257
  combined_audio_path = tempfile.mktemp(suffix=".mp3")
258
  final_audio.export(combined_audio_path, format="mp3")
259
  return combined_audio_path, None