Update app.py
Browse files
app.py
CHANGED
@@ -41,25 +41,24 @@ async def get_voices():
|
|
41 |
|
42 |
# Text-to-speech function for a single paragraph with SS handling
|
43 |
async def paragraph_to_speech(text, voice, rate, pitch):
|
44 |
-
|
45 |
-
#voice1F ="en-US-EmmaNeural - en-US (Female)"
|
46 |
voice1F ="en-GB-SoniaNeural - en-GB (Female)"
|
47 |
-
#voice2 = "it-IT-GiuseppeMultilingualNeural - it-IT (Male)"
|
48 |
voice2 = "en-GB-RyanNeural - en-GB (Male)"
|
49 |
voice2F = "en-US-JennyNeural - en-US (Female)"
|
50 |
-
|
51 |
voice3F = "en-HK-YanNeural - en-HK (Female)"
|
52 |
-
voice4 = "en-GB-
|
|
|
53 |
voice5 = "en-GB-RyanNeural - en-GB (Male)" #Old Man
|
|
|
54 |
|
55 |
if not text.strip():
|
56 |
return None, [] # Return None for audio path and empty list for silence
|
57 |
|
58 |
audio_segments = []
|
59 |
silence_durations = []
|
60 |
-
parts = re.split(r'(SS\d+\.?\d*)', text)
|
61 |
-
for part in parts:
|
62 |
-
|
63 |
if re.match(r'SS\d+\.?\d*', part): #Check if there is Silence tag
|
64 |
# At the top of your file:
|
65 |
#SILENCE_PATH = Path(__file__).parent.absolute() / "Silence.mp3"
|
@@ -76,37 +75,44 @@ async def paragraph_to_speech(text, voice, rate, pitch):
|
|
76 |
silence_file_path = get_silence(silence_duration) # Store the returned filename
|
77 |
audio_segments.append(silence_file_path) # Use the stored filename
|
78 |
elif part.strip():
|
|
|
79 |
processed_text = part
|
80 |
current_voice = voice
|
81 |
current_rate = rate
|
82 |
current_pitch = pitch
|
83 |
if part.startswith("1F"):
|
84 |
-
|
85 |
current_voice = voice1F.split(" - ")[0]
|
86 |
current_pitch = 25
|
87 |
elif part.startswith("2F"):
|
88 |
-
|
89 |
current_voice = voice2F.split(" - ")[0]
|
90 |
elif part.startswith("3F"):
|
91 |
-
|
92 |
current_voice = voice3F.split(" - ")[0]
|
|
|
|
|
|
|
93 |
elif part.startswith("1M"):
|
94 |
-
|
95 |
current_voice = voice1.split(" - ")[0]
|
96 |
elif part.startswith("2M"):
|
97 |
-
|
98 |
current_voice = voice2.split(" - ")[0]
|
99 |
elif part.startswith("3M"):
|
100 |
-
|
101 |
current_voice = voice3.split(" - ")[0]
|
102 |
-
elif part.startswith("
|
103 |
-
|
104 |
-
current_voice = voice4.split(" - ")[0]
|
105 |
-
elif part.startswith("1O"):
|
106 |
-
|
107 |
current_voice = voice5.split(" - ")[0]
|
108 |
current_pitch = -20
|
109 |
current_rate = -10
|
|
|
|
|
|
|
110 |
else:
|
111 |
# Use selected voice, or fallback to default
|
112 |
#voice_short_name = (voice or default_voice).split(" - ")[0]
|
@@ -125,7 +131,8 @@ async def paragraph_to_speech(text, voice, rate, pitch):
|
|
125 |
#processed_text = new_text[2:] #cut out the prefix like 1F, 3M etc
|
126 |
processed_text = new_text[len(prefix):] # Dynamically remove the prefix part
|
127 |
else:
|
128 |
-
|
|
|
129 |
rate_str = f"{current_rate:+d}%"
|
130 |
#if part[2:4].isdigit():
|
131 |
# processed_text = part[4:]
|
@@ -192,10 +199,12 @@ async def create_demo():
|
|
192 |
voices = await get_voices()
|
193 |
default_voice = "en-US-AndrewMultilingualNeural - en-US (Male)" # 👈 Pick one of the available voices
|
194 |
description = """
|
195 |
-
Default =
|
196 |
-
|
197 |
-
|
198 |
-
|
|
|
|
|
199 |
"""
|
200 |
|
201 |
demo = gr.Interface(
|
@@ -210,7 +219,7 @@ async def create_demo():
|
|
210 |
gr.Audio(label="Generated Audio", type="filepath"),
|
211 |
gr.Markdown(label="Warning", visible=False)
|
212 |
],
|
213 |
-
title="
|
214 |
description=description,
|
215 |
article="Process text paragraph by paragraph for smoother output and insert silence markers.",
|
216 |
analytics_enabled=False,
|
|
|
41 |
|
42 |
# Text-to-speech function for a single paragraph with SS handling
|
43 |
async def paragraph_to_speech(text, voice, rate, pitch):
|
44 |
+
voice1 = "en-AU-WilliamNeural - en-AU (Male)"
|
|
|
45 |
voice1F ="en-GB-SoniaNeural - en-GB (Female)"
|
|
|
46 |
voice2 = "en-GB-RyanNeural - en-GB (Male)"
|
47 |
voice2F = "en-US-JennyNeural - en-US (Female)"
|
48 |
+
voice3 ="en-US-BrianMultilingualNeural - en-US (Male)" #good for reading
|
49 |
voice3F = "en-HK-YanNeural - en-HK (Female)"
|
50 |
+
voice4 = "en-GB-ThomasNeural - en-GB (Male)"
|
51 |
+
voice4F ="en-US-EmmaNeural - en-US (Female)"
|
52 |
voice5 = "en-GB-RyanNeural - en-GB (Male)" #Old Man
|
53 |
+
voice6 = "en-GB-MaisieNeural - en-GB (Female)" #Child
|
54 |
|
55 |
if not text.strip():
|
56 |
return None, [] # Return None for audio path and empty list for silence
|
57 |
|
58 |
audio_segments = []
|
59 |
silence_durations = []
|
60 |
+
parts = re.split(r'(SS\d+\.?\d*)', text) #this one separtate the SS## tag if any in the text.
|
61 |
+
for part in parts:
|
|
|
62 |
if re.match(r'SS\d+\.?\d*', part): #Check if there is Silence tag
|
63 |
# At the top of your file:
|
64 |
#SILENCE_PATH = Path(__file__).parent.absolute() / "Silence.mp3"
|
|
|
75 |
silence_file_path = get_silence(silence_duration) # Store the returned filename
|
76 |
audio_segments.append(silence_file_path) # Use the stored filename
|
77 |
elif part.strip():
|
78 |
+
detect=0
|
79 |
processed_text = part
|
80 |
current_voice = voice
|
81 |
current_rate = rate
|
82 |
current_pitch = pitch
|
83 |
if part.startswith("1F"):
|
84 |
+
detect=1
|
85 |
current_voice = voice1F.split(" - ")[0]
|
86 |
current_pitch = 25
|
87 |
elif part.startswith("2F"):
|
88 |
+
detect=1
|
89 |
current_voice = voice2F.split(" - ")[0]
|
90 |
elif part.startswith("3F"):
|
91 |
+
detect=1
|
92 |
current_voice = voice3F.split(" - ")[0]
|
93 |
+
elif part.startswith("4F"):
|
94 |
+
#detect=1
|
95 |
+
current_voice = voice4F.split(" - ")[0]
|
96 |
elif part.startswith("1M"):
|
97 |
+
detect=1
|
98 |
current_voice = voice1.split(" - ")[0]
|
99 |
elif part.startswith("2M"):
|
100 |
+
detect=1
|
101 |
current_voice = voice2.split(" - ")[0]
|
102 |
elif part.startswith("3M"):
|
103 |
+
detect=1
|
104 |
current_voice = voice3.split(" - ")[0]
|
105 |
+
elif part.startswith("4M"):
|
106 |
+
detect=1
|
107 |
+
current_voice = voice4.split(" - ")[0]
|
108 |
+
elif part.startswith("1O"): # Old man voice
|
109 |
+
detect=1
|
110 |
current_voice = voice5.split(" - ")[0]
|
111 |
current_pitch = -20
|
112 |
current_rate = -10
|
113 |
+
elif part.startswith("1C"): #Child voice
|
114 |
+
detect=1
|
115 |
+
current_voice = voice6.split(" - ")[0]
|
116 |
else:
|
117 |
# Use selected voice, or fallback to default
|
118 |
#voice_short_name = (voice or default_voice).split(" - ")[0]
|
|
|
131 |
#processed_text = new_text[2:] #cut out the prefix like 1F, 3M etc
|
132 |
processed_text = new_text[len(prefix):] # Dynamically remove the prefix part
|
133 |
else:
|
134 |
+
if detect:
|
135 |
+
processed_text = part[2:]
|
136 |
rate_str = f"{current_rate:+d}%"
|
137 |
#if part[2:4].isdigit():
|
138 |
# processed_text = part[4:]
|
|
|
199 |
voices = await get_voices()
|
200 |
default_voice = "en-US-AndrewMultilingualNeural - en-US (Male)" # 👈 Pick one of the available voices
|
201 |
description = """
|
202 |
+
Default = <b>"en-US-AndrewMultilingualNeural - en-US (Male),
|
203 |
+
other voices 1F:en-GB-SoniaNeural, 2F:en-US-JennyNeural, 3F:en-HK-YanNeural, 4F:en-US-EmmaNeural
|
204 |
+
1M:en-AU-WilliamNeural, 2M:en-GB-RyanNeural, 3M:en-US-BrianMultilingualNeural, 4M:en-GB-ThomasNeural
|
205 |
+
1C: en-GB-MaisieNeural (Childvoice), 1O = en-GB-RyanNeural (OldMan)"</b>
|
206 |
+
You can insert silence using the marker 'SS##' example "SS2.0"
|
207 |
+
Enter your text, select a voice, and adjust the speech rate and pitch. Can also set like 1F-20 or 1M24.
|
208 |
"""
|
209 |
|
210 |
demo = gr.Interface(
|
|
|
219 |
gr.Audio(label="Generated Audio", type="filepath"),
|
220 |
gr.Markdown(label="Warning", visible=False)
|
221 |
],
|
222 |
+
title="TTS using Edge Engine.. ENGLISH!",
|
223 |
description=description,
|
224 |
article="Process text paragraph by paragraph for smoother output and insert silence markers.",
|
225 |
analytics_enabled=False,
|