cnph001 commited on
Commit
7042e46
·
verified ·
1 Parent(s): ef4c8b8

Update app.py

Browse files

fixing sillence

Files changed (1) hide show
  1. app.py +27 -11
app.py CHANGED
@@ -5,12 +5,29 @@ import asyncio
5
  import tempfile
6
  import os
7
  import re # Import the regular expression module
8
-
9
-
10
- # Get all available voices
11
- async def get_voices():
12
- voices = await edge_tts.list_voices()
13
- return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  # Text-to-speech function for a single paragraph with SS handling
16
  async def paragraph_to_speech(text, voice, rate, pitch):
@@ -27,15 +44,15 @@ async def paragraph_to_speech(text, voice, rate, pitch):
27
  return None, [] # Return None for audio path and empty list for silence
28
 
29
  audio_segments = []
30
- silence_durations = []
31
  parts = re.split(r'(SS\d+\.?\d*)', text)
32
 
33
  for part in parts:
34
  if re.match(r'SS\d+\.?\d*', part):
35
  try:
36
  silence_duration = float(part[2:])
37
- silence_durations.append(silence_duration)
38
- audio_segments.append(None) # Placeholder for silence
39
  except ValueError:
40
  print(f"Warning: Invalid silence duration format: {part}")
41
  elif part.strip():
@@ -71,8 +88,6 @@ async def paragraph_to_speech(text, voice, rate, pitch):
71
  current_pitch = -30
72
  current_rate = -20
73
  else:
74
- # Use selected voice, or fallback to default
75
- #voice_short_name = (voice or default_voice).split(" - ")[0]
76
  current_voice = (voice or default_voice).split(" - ")[0]
77
  processed_text=part[:]
78
  rate_str = f"{current_rate:+d}%"
@@ -83,6 +98,7 @@ async def paragraph_to_speech(text, voice, rate, pitch):
83
  await communicate.save(tmp_path)
84
  audio_segments.append(tmp_path)
85
  else:
 
86
  audio_segments.append(None) # Empty string
87
 
88
  return audio_segments, silence_durations
 
5
  import tempfile
6
  import os
7
  import re # Import the regular expression module
8
+ import struct
9
+
10
+ # Function to create a temporary silent MP3 file (basic approximation)
11
+ def create_silent_mp3(duration, temp_dir):
12
+ frame_rate = 44100
13
+ num_channels = 1
14
+ sample_width = 2 # bytes (16-bit)
15
+ num_frames = int(duration * frame_rate)
16
+ silent_data = b'\x00' * (num_frames * num_channels * sample_width)
17
+
18
+ temp_silent_file = os.path.join(temp_dir, f"silent_{duration}.raw")
19
+ with open(temp_silent_file, 'wb') as f:
20
+ f.write(silent_data)
21
+
22
+ # This is a very basic way to make it look like an MP3 - it won't be a valid MP3.
23
+ # For a proper MP3, you'd need an MP3 encoding library or ffmpeg.
24
+ temp_mp3_path = os.path.join(temp_dir, f"silent_{duration}.mp3")
25
+ with open(temp_mp3_path, 'wb') as f:
26
+ f.write(b'\xff\xfb\x90\x00\x00\x00\x00') # Minimal MP3 header (very simplified)
27
+ f.write(silent_data) # Append raw silence
28
+
29
+ os.remove(temp_silent_file) # Clean up the raw file
30
+ return temp_mp3_path
31
 
32
  # Text-to-speech function for a single paragraph with SS handling
33
  async def paragraph_to_speech(text, voice, rate, pitch):
 
44
  return None, [] # Return None for audio path and empty list for silence
45
 
46
  audio_segments = []
47
+ temp_dir = tempfile.gettempdir()
48
  parts = re.split(r'(SS\d+\.?\d*)', text)
49
 
50
  for part in parts:
51
  if re.match(r'SS\d+\.?\d*', part):
52
  try:
53
  silence_duration = float(part[2:])
54
+ silent_mp3_path = create_silent_mp3(silence_duration, temp_dir)
55
+ audio_segments.append(silent_mp3_path)
56
  except ValueError:
57
  print(f"Warning: Invalid silence duration format: {part}")
58
  elif part.strip():
 
88
  current_pitch = -30
89
  current_rate = -20
90
  else:
 
 
91
  current_voice = (voice or default_voice).split(" - ")[0]
92
  processed_text=part[:]
93
  rate_str = f"{current_rate:+d}%"
 
98
  await communicate.save(tmp_path)
99
  audio_segments.append(tmp_path)
100
  else:
101
+ #pass # Ignore empty parts
102
  audio_segments.append(None) # Empty string
103
 
104
  return audio_segments, silence_durations