cnph001 commited on
Commit
33573d8
·
verified ·
1 Parent(s): 49d3ed3

Update app.py

Browse files

Multi send to Edge to speed up

Files changed (1) hide show
  1. app.py +7 -11
app.py CHANGED
@@ -1,5 +1,3 @@
1
- ## Low pass filtering applied to final Audio
2
-
3
  import spaces
4
  import gradio as gr
5
  import edge_tts
@@ -36,9 +34,9 @@ def apply_low_pass_filter(audio_segment, cutoff_freq, sample_rate, order=5):
36
 
37
  filtered_data_int16 = (filtered_data * (2**15 - 1)).astype(np.int16)
38
  filtered_audio = AudioSegment(filtered_data_int16.tobytes(),
39
- frame_rate=sample_rate,
40
- sample_width=audio_segment.sample_width,
41
- channels=audio_segment.channels)
42
  return filtered_audio
43
 
44
 
@@ -113,7 +111,7 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
113
  processed_text = processed_text[len(prefix):].strip() #this removes the Prefix and leave only number or text after it.
114
  break
115
  #match = re.search(r'([A-Za-z]+)-?(\d+)', processed_text)
116
- match = re.search(r"^(-?\d+)\s*(.*)", processed_text)
117
  if match:
118
  #prefix_pitch = match.group(1)
119
  number = match.group(1)
@@ -123,7 +121,7 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
123
  #processed_text = re.sub(r'([A-Za-z]+)([-]?\d*)', '', processed_text, count=1).strip()
124
  processed_text = match.group(2)
125
  #elif detect:
126
- # processed_text = processed_text.lstrip('-0123456789').strip() # Remove potential leftover numbers
127
 
128
  if processed_text:
129
  rate_str = f"{current_rate:+d}%"
@@ -281,10 +279,9 @@ async def transcript_to_speech(transcript_text, voice, rate, pitch, speed_adjust
281
  final_audio = AudioSegment.silent(duration=max_end_time_ms, frame_rate=24000)
282
  for segment in timed_audio_segments:
283
  final_audio = final_audio.overlay(segment['audio'], position=segment['start'])
284
-
285
  # Apply the low-pass filter here
286
  cutoff_frequency = 3500 # 3.5 kHz (you can make this a user-configurable parameter later)
287
- print(f"Applying Low pass filter, cut off frequency: {cutoff_frequency}")
288
  filtered_final_audio = apply_low_pass_filter(final_audio, cutoff_frequency, final_audio.frame_rate)
289
 
290
  combined_audio_path = tempfile.mktemp(suffix=".mp3")
@@ -292,7 +289,6 @@ async def transcript_to_speech(transcript_text, voice, rate, pitch, speed_adjust
292
  filtered_final_audio.export(combined_audio_path, format="mp3")
293
  return combined_audio_path, None
294
 
295
-
296
  @spaces.GPU
297
  def tts_interface(transcript, voice, rate, pitch, speed_adjustment_factor):
298
  audio, warning = asyncio.run(transcript_to_speech(transcript, voice, rate, pitch, speed_adjustment_factor))
@@ -352,4 +348,4 @@ async def create_demo():
352
 
353
  if __name__ == "__main__":
354
  demo = asyncio.run(create_demo())
355
- demo.launch()
 
 
 
1
  import spaces
2
  import gradio as gr
3
  import edge_tts
 
34
 
35
  filtered_data_int16 = (filtered_data * (2**15 - 1)).astype(np.int16)
36
  filtered_audio = AudioSegment(filtered_data_int16.tobytes(),
37
+ frame_rate=sample_rate,
38
+ sample_width=audio_segment.sample_width,
39
+ channels=audio_segment.channels)
40
  return filtered_audio
41
 
42
 
 
111
  processed_text = processed_text[len(prefix):].strip() #this removes the Prefix and leave only number or text after it.
112
  break
113
  #match = re.search(r'([A-Za-z]+)-?(\d+)', processed_text)
114
+ match = re.search(r"^(-?\d+)\s*(.*)", processed_text)
115
  if match:
116
  #prefix_pitch = match.group(1)
117
  number = match.group(1)
 
121
  #processed_text = re.sub(r'([A-Za-z]+)([-]?\d*)', '', processed_text, count=1).strip()
122
  processed_text = match.group(2)
123
  #elif detect:
124
+ # processed_text = processed_text.lstrip('-0123456789').strip() # Remove potential leftover numbers
125
 
126
  if processed_text:
127
  rate_str = f"{current_rate:+d}%"
 
279
  final_audio = AudioSegment.silent(duration=max_end_time_ms, frame_rate=24000)
280
  for segment in timed_audio_segments:
281
  final_audio = final_audio.overlay(segment['audio'], position=segment['start'])
282
+
283
  # Apply the low-pass filter here
284
  cutoff_frequency = 3500 # 3.5 kHz (you can make this a user-configurable parameter later)
 
285
  filtered_final_audio = apply_low_pass_filter(final_audio, cutoff_frequency, final_audio.frame_rate)
286
 
287
  combined_audio_path = tempfile.mktemp(suffix=".mp3")
 
289
  filtered_final_audio.export(combined_audio_path, format="mp3")
290
  return combined_audio_path, None
291
 
 
292
  @spaces.GPU
293
  def tts_interface(transcript, voice, rate, pitch, speed_adjustment_factor):
294
  audio, warning = asyncio.run(transcript_to_speech(transcript, voice, rate, pitch, speed_adjustment_factor))
 
348
 
349
  if __name__ == "__main__":
350
  demo = asyncio.run(create_demo())
351
+ demo.launch()