Update app.py
Browse filesMulti send to Edge to speed up
app.py
CHANGED
@@ -1,5 +1,3 @@
|
|
1 |
-
## Low pass filtering applied to final Audio
|
2 |
-
|
3 |
import spaces
|
4 |
import gradio as gr
|
5 |
import edge_tts
|
@@ -36,9 +34,9 @@ def apply_low_pass_filter(audio_segment, cutoff_freq, sample_rate, order=5):
|
|
36 |
|
37 |
filtered_data_int16 = (filtered_data * (2**15 - 1)).astype(np.int16)
|
38 |
filtered_audio = AudioSegment(filtered_data_int16.tobytes(),
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
return filtered_audio
|
43 |
|
44 |
|
@@ -113,7 +111,7 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
|
|
113 |
processed_text = processed_text[len(prefix):].strip() #this removes the Prefix and leave only number or text after it.
|
114 |
break
|
115 |
#match = re.search(r'([A-Za-z]+)-?(\d+)', processed_text)
|
116 |
-
match = re.search(r"^(-?\d+)\s*(.*)", processed_text)
|
117 |
if match:
|
118 |
#prefix_pitch = match.group(1)
|
119 |
number = match.group(1)
|
@@ -123,7 +121,7 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
|
|
123 |
#processed_text = re.sub(r'([A-Za-z]+)([-]?\d*)', '', processed_text, count=1).strip()
|
124 |
processed_text = match.group(2)
|
125 |
#elif detect:
|
126 |
-
#
|
127 |
|
128 |
if processed_text:
|
129 |
rate_str = f"{current_rate:+d}%"
|
@@ -281,10 +279,9 @@ async def transcript_to_speech(transcript_text, voice, rate, pitch, speed_adjust
|
|
281 |
final_audio = AudioSegment.silent(duration=max_end_time_ms, frame_rate=24000)
|
282 |
for segment in timed_audio_segments:
|
283 |
final_audio = final_audio.overlay(segment['audio'], position=segment['start'])
|
284 |
-
|
285 |
# Apply the low-pass filter here
|
286 |
cutoff_frequency = 3500 # 3.5 kHz (you can make this a user-configurable parameter later)
|
287 |
-
print(f"Applying Low pass filter, cut off frequency: {cutoff_frequency}")
|
288 |
filtered_final_audio = apply_low_pass_filter(final_audio, cutoff_frequency, final_audio.frame_rate)
|
289 |
|
290 |
combined_audio_path = tempfile.mktemp(suffix=".mp3")
|
@@ -292,7 +289,6 @@ async def transcript_to_speech(transcript_text, voice, rate, pitch, speed_adjust
|
|
292 |
filtered_final_audio.export(combined_audio_path, format="mp3")
|
293 |
return combined_audio_path, None
|
294 |
|
295 |
-
|
296 |
@spaces.GPU
|
297 |
def tts_interface(transcript, voice, rate, pitch, speed_adjustment_factor):
|
298 |
audio, warning = asyncio.run(transcript_to_speech(transcript, voice, rate, pitch, speed_adjustment_factor))
|
@@ -352,4 +348,4 @@ async def create_demo():
|
|
352 |
|
353 |
if __name__ == "__main__":
|
354 |
demo = asyncio.run(create_demo())
|
355 |
-
demo.launch()
|
|
|
|
|
|
|
1 |
import spaces
|
2 |
import gradio as gr
|
3 |
import edge_tts
|
|
|
34 |
|
35 |
filtered_data_int16 = (filtered_data * (2**15 - 1)).astype(np.int16)
|
36 |
filtered_audio = AudioSegment(filtered_data_int16.tobytes(),
|
37 |
+
frame_rate=sample_rate,
|
38 |
+
sample_width=audio_segment.sample_width,
|
39 |
+
channels=audio_segment.channels)
|
40 |
return filtered_audio
|
41 |
|
42 |
|
|
|
111 |
processed_text = processed_text[len(prefix):].strip() #this removes the Prefix and leave only number or text after it.
|
112 |
break
|
113 |
#match = re.search(r'([A-Za-z]+)-?(\d+)', processed_text)
|
114 |
+
match = re.search(r"^(-?\d+)\s*(.*)", processed_text)
|
115 |
if match:
|
116 |
#prefix_pitch = match.group(1)
|
117 |
number = match.group(1)
|
|
|
121 |
#processed_text = re.sub(r'([A-Za-z]+)([-]?\d*)', '', processed_text, count=1).strip()
|
122 |
processed_text = match.group(2)
|
123 |
#elif detect:
|
124 |
+
# processed_text = processed_text.lstrip('-0123456789').strip() # Remove potential leftover numbers
|
125 |
|
126 |
if processed_text:
|
127 |
rate_str = f"{current_rate:+d}%"
|
|
|
279 |
final_audio = AudioSegment.silent(duration=max_end_time_ms, frame_rate=24000)
|
280 |
for segment in timed_audio_segments:
|
281 |
final_audio = final_audio.overlay(segment['audio'], position=segment['start'])
|
282 |
+
|
283 |
# Apply the low-pass filter here
|
284 |
cutoff_frequency = 3500 # 3.5 kHz (you can make this a user-configurable parameter later)
|
|
|
285 |
filtered_final_audio = apply_low_pass_filter(final_audio, cutoff_frequency, final_audio.frame_rate)
|
286 |
|
287 |
combined_audio_path = tempfile.mktemp(suffix=".mp3")
|
|
|
289 |
filtered_final_audio.export(combined_audio_path, format="mp3")
|
290 |
return combined_audio_path, None
|
291 |
|
|
|
292 |
@spaces.GPU
|
293 |
def tts_interface(transcript, voice, rate, pitch, speed_adjustment_factor):
|
294 |
audio, warning = asyncio.run(transcript_to_speech(transcript, voice, rate, pitch, speed_adjustment_factor))
|
|
|
348 |
|
349 |
if __name__ == "__main__":
|
350 |
demo = asyncio.run(create_demo())
|
351 |
+
demo.launch()
|