Spaces:
Sleeping
Sleeping
Update app.py
Browse filesfixing sillence
app.py
CHANGED
@@ -5,12 +5,29 @@ import asyncio
|
|
5 |
import tempfile
|
6 |
import os
|
7 |
import re # Import the regular expression module
|
8 |
-
|
9 |
-
|
10 |
-
#
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
# Text-to-speech function for a single paragraph with SS handling
|
16 |
async def paragraph_to_speech(text, voice, rate, pitch):
|
@@ -27,15 +44,15 @@ async def paragraph_to_speech(text, voice, rate, pitch):
|
|
27 |
return None, [] # Return None for audio path and empty list for silence
|
28 |
|
29 |
audio_segments = []
|
30 |
-
|
31 |
parts = re.split(r'(SS\d+\.?\d*)', text)
|
32 |
|
33 |
for part in parts:
|
34 |
if re.match(r'SS\d+\.?\d*', part):
|
35 |
try:
|
36 |
silence_duration = float(part[2:])
|
37 |
-
|
38 |
-
audio_segments.append(
|
39 |
except ValueError:
|
40 |
print(f"Warning: Invalid silence duration format: {part}")
|
41 |
elif part.strip():
|
@@ -71,8 +88,6 @@ async def paragraph_to_speech(text, voice, rate, pitch):
|
|
71 |
current_pitch = -30
|
72 |
current_rate = -20
|
73 |
else:
|
74 |
-
# Use selected voice, or fallback to default
|
75 |
-
#voice_short_name = (voice or default_voice).split(" - ")[0]
|
76 |
current_voice = (voice or default_voice).split(" - ")[0]
|
77 |
processed_text=part[:]
|
78 |
rate_str = f"{current_rate:+d}%"
|
@@ -83,6 +98,7 @@ async def paragraph_to_speech(text, voice, rate, pitch):
|
|
83 |
await communicate.save(tmp_path)
|
84 |
audio_segments.append(tmp_path)
|
85 |
else:
|
|
|
86 |
audio_segments.append(None) # Empty string
|
87 |
|
88 |
return audio_segments, silence_durations
|
|
|
5 |
import tempfile
|
6 |
import os
|
7 |
import re # Import the regular expression module
|
8 |
+
import struct
|
9 |
+
|
10 |
+
# Function to create a temporary silent MP3 file (basic approximation)
|
11 |
+
def create_silent_mp3(duration, temp_dir):
|
12 |
+
frame_rate = 44100
|
13 |
+
num_channels = 1
|
14 |
+
sample_width = 2 # bytes (16-bit)
|
15 |
+
num_frames = int(duration * frame_rate)
|
16 |
+
silent_data = b'\x00' * (num_frames * num_channels * sample_width)
|
17 |
+
|
18 |
+
temp_silent_file = os.path.join(temp_dir, f"silent_{duration}.raw")
|
19 |
+
with open(temp_silent_file, 'wb') as f:
|
20 |
+
f.write(silent_data)
|
21 |
+
|
22 |
+
# This is a very basic way to make it look like an MP3 - it won't be a valid MP3.
|
23 |
+
# For a proper MP3, you'd need an MP3 encoding library or ffmpeg.
|
24 |
+
temp_mp3_path = os.path.join(temp_dir, f"silent_{duration}.mp3")
|
25 |
+
with open(temp_mp3_path, 'wb') as f:
|
26 |
+
f.write(b'\xff\xfb\x90\x00\x00\x00\x00') # Minimal MP3 header (very simplified)
|
27 |
+
f.write(silent_data) # Append raw silence
|
28 |
+
|
29 |
+
os.remove(temp_silent_file) # Clean up the raw file
|
30 |
+
return temp_mp3_path
|
31 |
|
32 |
# Text-to-speech function for a single paragraph with SS handling
|
33 |
async def paragraph_to_speech(text, voice, rate, pitch):
|
|
|
44 |
return None, [] # Return None for audio path and empty list for silence
|
45 |
|
46 |
audio_segments = []
|
47 |
+
temp_dir = tempfile.gettempdir()
|
48 |
parts = re.split(r'(SS\d+\.?\d*)', text)
|
49 |
|
50 |
for part in parts:
|
51 |
if re.match(r'SS\d+\.?\d*', part):
|
52 |
try:
|
53 |
silence_duration = float(part[2:])
|
54 |
+
silent_mp3_path = create_silent_mp3(silence_duration, temp_dir)
|
55 |
+
audio_segments.append(silent_mp3_path)
|
56 |
except ValueError:
|
57 |
print(f"Warning: Invalid silence duration format: {part}")
|
58 |
elif part.strip():
|
|
|
88 |
current_pitch = -30
|
89 |
current_rate = -20
|
90 |
else:
|
|
|
|
|
91 |
current_voice = (voice or default_voice).split(" - ")[0]
|
92 |
processed_text=part[:]
|
93 |
rate_str = f"{current_rate:+d}%"
|
|
|
98 |
await communicate.save(tmp_path)
|
99 |
audio_segments.append(tmp_path)
|
100 |
else:
|
101 |
+
#pass # Ignore empty parts
|
102 |
audio_segments.append(None) # Empty string
|
103 |
|
104 |
return audio_segments, silence_durations
|