Update app.py
Browse filesRestore working version without post filter
app.py
CHANGED
@@ -12,45 +12,6 @@ import soundfile as sf
|
|
12 |
import numpy as np
|
13 |
from pydub import AudioSegment
|
14 |
from pydub.playback import play
|
15 |
-
import math
|
16 |
-
from scipy.signal import butter, sosfiltfilt
|
17 |
-
|
18 |
-
|
19 |
-
def apply_low_pass_filter(audio_segment, cutoff_freq, order=6):
|
20 |
-
"""
|
21 |
-
Applies a low-pass filter to an AudioSegment.
|
22 |
-
|
23 |
-
Args:
|
24 |
-
audio_segment: The AudioSegment to filter.
|
25 |
-
cutoff_freq: The cutoff frequency in Hz.
|
26 |
-
order: The order of the Butterworth filter.
|
27 |
-
|
28 |
-
Returns:
|
29 |
-
A new AudioSegment with the filtered audio.
|
30 |
-
"""
|
31 |
-
segment_array = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
|
32 |
-
frame_rate = audio_segment.frame_rate
|
33 |
-
nyquist_freq = 0.5 * frame_rate
|
34 |
-
normalized_cutoff = cutoff_freq / nyquist_freq
|
35 |
-
sos = butter(order, normalized_cutoff, btype='low', output='sos')
|
36 |
-
filtered_array = sosfiltfilt(sos, segment_array)
|
37 |
-
|
38 |
-
sample_width = audio_segment.sample_width
|
39 |
-
dtype = None
|
40 |
-
if sample_width == 1:
|
41 |
-
dtype = np.int8
|
42 |
-
elif sample_width == 2:
|
43 |
-
dtype = np.int16
|
44 |
-
elif sample_width == 3:
|
45 |
-
dtype = np.int32 # Or potentially a custom type depending on the library
|
46 |
-
elif sample_width == 4:
|
47 |
-
dtype = np.int32
|
48 |
-
|
49 |
-
if dtype is not None:
|
50 |
-
return audio_segment._spawn(filtered_array.astype(dtype))
|
51 |
-
else:
|
52 |
-
raise ValueError(f"Unsupported sample width: {sample_width}")
|
53 |
-
|
54 |
|
55 |
|
56 |
def get_silence(duration_ms=1000):
|
@@ -128,19 +89,7 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
|
|
128 |
match = re.search(r'([A-Za-z]+)([-]?\d*)', processed_text)
|
129 |
if match:
|
130 |
prefix_pitch = match.group(1)
|
131 |
-
|
132 |
-
if number_str: # Check if the second group (number part) is not empty
|
133 |
-
try:
|
134 |
-
number = int(number_str)
|
135 |
-
# Now you can use the 'number' variable
|
136 |
-
print(f"Prefix: {prefix_pitch}, Number: {number}") # Example usage
|
137 |
-
except ValueError as e:
|
138 |
-
print(f"Error converting number string to int: {e}")
|
139 |
-
number = 0 # Or some other default value
|
140 |
-
else:
|
141 |
-
number = 0 # Or some other default value if no number is found
|
142 |
-
print(f"Prefix: {prefix_pitch}, No number found.") # Example handling
|
143 |
-
|
144 |
if prefix_pitch in voice_map:
|
145 |
current_pitch += number
|
146 |
#processed_text = re.sub(r'[A-Za-z]+-?\d+', '', processed_text, count=1).strip()
|
@@ -278,32 +227,18 @@ async def transcript_to_speech(transcript_text, voice, rate, pitch, speed_adjust
|
|
278 |
os.remove(path)
|
279 |
except FileNotFoundError:
|
280 |
print(f"Warning: Audio file not found: {path}")
|
281 |
-
Rem1='''
|
282 |
-
if combined_line_audio and overall_duration_ms is not None and overall_duration_ms > 0 and total_generated_duration_ms > overall_duration_ms:
|
283 |
-
speed_factor = (total_generated_duration_ms / overall_duration_ms) * speed_adjustment_factor
|
284 |
-
if speed_factor > 0:
|
285 |
-
if speed_factor < 1.0:
|
286 |
-
speed_factor = 1.0
|
287 |
-
combined_line_audio = combined_line_audio.speedup(playback_speed=speed_factor)
|
288 |
|
289 |
-
if combined_line_audio:
|
290 |
-
timed_audio_segments.append({'start': start_time, 'audio': combined_line_audio})
|
291 |
-
max_end_time_ms = max(max_end_time_ms, start_time + len(combined_line_audio))
|
292 |
-
'''
|
293 |
if combined_line_audio and overall_duration_ms is not None and overall_duration_ms > 0 and total_generated_duration_ms > overall_duration_ms:
|
294 |
speed_factor = (total_generated_duration_ms / overall_duration_ms) * speed_adjustment_factor
|
295 |
if speed_factor > 0:
|
296 |
if speed_factor < 1.0:
|
297 |
speed_factor = 1.0
|
298 |
combined_line_audio = combined_line_audio.speedup(playback_speed=speed_factor)
|
299 |
-
# Apply low-pass filter AFTER speed adjustment
|
300 |
-
cutoff_freq = 7000.0 # Adjust as needed
|
301 |
-
combined_line_audio = apply_low_pass_filter(combined_line_audio, cutoff_freq)
|
302 |
|
303 |
if combined_line_audio:
|
304 |
timed_audio_segments.append({'start': start_time, 'audio': combined_line_audio})
|
305 |
max_end_time_ms = max(max_end_time_ms, start_time + len(combined_line_audio))
|
306 |
-
|
307 |
elif audio_paths:
|
308 |
for path in audio_paths:
|
309 |
if path:
|
@@ -311,38 +246,14 @@ async def transcript_to_speech(transcript_text, voice, rate, pitch, speed_adjust
|
|
311 |
os.remove(path)
|
312 |
except FileNotFoundError:
|
313 |
pass # Clean up even if no timestamp
|
314 |
-
|
315 |
-
|
316 |
|
317 |
if not timed_audio_segments:
|
318 |
return None, "No processable audio segments found."
|
319 |
|
320 |
-
oldx= '''
|
321 |
final_audio = AudioSegment.silent(duration=max_end_time_ms, frame_rate=24000)
|
322 |
for segment in timed_audio_segments:
|
323 |
final_audio = final_audio.overlay(segment['audio'], position=segment['start'])
|
324 |
-
'''
|
325 |
-
final_audio = AudioSegment.silent(duration=int(max_end_time_ms * 1000 + 500), frame_rate=24000)
|
326 |
-
|
327 |
-
for segment in timed_audio_segments:
|
328 |
-
start_position_ms = int(segment['start'] * 1000)
|
329 |
-
audio_to_overlay = segment['audio']
|
330 |
-
|
331 |
-
if start_position_ms + len(audio_to_overlay) > len(final_audio):
|
332 |
-
padding_needed = (start_position_ms + len(audio_to_overlay)) - len(final_audio)
|
333 |
-
final_audio += AudioSegment.silent(duration=padding_needed + 100, frame_rate=final_audio.frame_rate)
|
334 |
-
|
335 |
-
try:
|
336 |
-
final_audio = final_audio.overlay(audio_to_overlay, position=start_position_ms)
|
337 |
-
except Exception as e:
|
338 |
-
print(f"Error during overlay: {e}")
|
339 |
-
print(f" - Start position (ms): {start_position_ms}")
|
340 |
-
print(f" - Length of audio to overlay (ms): {len(audio_to_overlay)}")
|
341 |
-
print(f" - Length of final_audio (ms): {len(final_audio)}")
|
342 |
-
# Consider adding logic here to handle the error, e.g., truncating audio_to_overlay
|
343 |
-
# or skipping the overlay if it consistently fails.
|
344 |
|
345 |
-
|
346 |
combined_audio_path = tempfile.mktemp(suffix=".mp3")
|
347 |
final_audio.export(combined_audio_path, format="mp3")
|
348 |
return combined_audio_path, None
|
|
|
12 |
import numpy as np
|
13 |
from pydub import AudioSegment
|
14 |
from pydub.playback import play
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
|
17 |
def get_silence(duration_ms=1000):
|
|
|
89 |
match = re.search(r'([A-Za-z]+)([-]?\d*)', processed_text)
|
90 |
if match:
|
91 |
prefix_pitch = match.group(1)
|
92 |
+
number = int(match.group(2))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
if prefix_pitch in voice_map:
|
94 |
current_pitch += number
|
95 |
#processed_text = re.sub(r'[A-Za-z]+-?\d+', '', processed_text, count=1).strip()
|
|
|
227 |
os.remove(path)
|
228 |
except FileNotFoundError:
|
229 |
print(f"Warning: Audio file not found: {path}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
230 |
|
|
|
|
|
|
|
|
|
231 |
if combined_line_audio and overall_duration_ms is not None and overall_duration_ms > 0 and total_generated_duration_ms > overall_duration_ms:
|
232 |
speed_factor = (total_generated_duration_ms / overall_duration_ms) * speed_adjustment_factor
|
233 |
if speed_factor > 0:
|
234 |
if speed_factor < 1.0:
|
235 |
speed_factor = 1.0
|
236 |
combined_line_audio = combined_line_audio.speedup(playback_speed=speed_factor)
|
|
|
|
|
|
|
237 |
|
238 |
if combined_line_audio:
|
239 |
timed_audio_segments.append({'start': start_time, 'audio': combined_line_audio})
|
240 |
max_end_time_ms = max(max_end_time_ms, start_time + len(combined_line_audio))
|
241 |
+
|
242 |
elif audio_paths:
|
243 |
for path in audio_paths:
|
244 |
if path:
|
|
|
246 |
os.remove(path)
|
247 |
except FileNotFoundError:
|
248 |
pass # Clean up even if no timestamp
|
|
|
|
|
249 |
|
250 |
if not timed_audio_segments:
|
251 |
return None, "No processable audio segments found."
|
252 |
|
|
|
253 |
final_audio = AudioSegment.silent(duration=max_end_time_ms, frame_rate=24000)
|
254 |
for segment in timed_audio_segments:
|
255 |
final_audio = final_audio.overlay(segment['audio'], position=segment['start'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
|
|
|
257 |
combined_audio_path = tempfile.mktemp(suffix=".mp3")
|
258 |
final_audio.export(combined_audio_path, format="mp3")
|
259 |
return combined_audio_path, None
|