NGHIA_Test_Edge_TTS_transcript_w_timestamp

Sleeping

App Files Files Community

cnph001 commited on Apr 27

Commit

aa5ea31

verified ·

1 Parent(s): f067030

Add pitch to Voice tag, eg 1F20 1M-20

Browse files

Files changed (1) hide show

app.py +12 -3

app.py CHANGED Viewed

@@ -45,6 +45,7 @@ async def paragraph_to_speech(text, voice, rate, pitch):
     #voice1F ="en-US-EmmaNeural - en-US (Female)"
     voice1F ="en-GB-SoniaNeural - en-GB (Female)"
     voice2 = "it-IT-GiuseppeMultilingualNeural - it-IT (Male)"
     voice2F = "en-US-JennyNeural - en-US (Female)"
     voice1 = "en-AU-WilliamNeural - en-AU (Male)"
     voice3F = "en-HK-YanNeural - en-HK (Female)"
@@ -58,7 +59,13 @@ async def paragraph_to_speech(text, voice, rate, pitch):
     silence_durations = []
     parts = re.split(r'(SS\d+\.?\d*)', text)
     for part in parts:
-        if re.match(r'SS\d+\.?\d*', part):
             # At the top of your file:
             #SILENCE_PATH = Path(__file__).parent.absolute() / "Silence.mp3"
             # At the top of your file (assuming you uploaded "Silence.mp3" to root)
@@ -73,7 +80,6 @@ async def paragraph_to_speech(text, voice, rate, pitch):
             print(f"Silence.mp3 file NOT FOUND")
             silence_file_path = get_silence(silence_duration)  # Store the returned filename
             audio_segments.append(silence_file_path)  # Use the stored filename
         elif part.strip():
             processed_text = part
             current_voice = voice
@@ -112,6 +118,9 @@ async def paragraph_to_speech(text, voice, rate, pitch):
                 current_voice = (voice or default_voice).split(" - ")[0]
                 processed_text=part[:]
             rate_str = f"{current_rate:+d}%"
             pitch_str = f"{current_pitch:+d}Hz"
             communicate = edge_tts.Communicate(processed_text, current_voice, rate=rate_str, pitch=pitch_str)
             with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
@@ -176,7 +185,7 @@ async def create_demo():
     description = """
     Default = male, other voices 1F:US_Emma, 2F:US_Jenny, 3F:HK_Yan, 1M:AU_Will, 2M:IT_Guiseppe,3M:US_Brian,  1C: Childvoice, 1O = OldMan
     You can insert silence using the marker 'SS' (This will insert a Silence period from the Silence.mp3 file).
-    Enter your text, select a voice, and adjust the speech rate and pitch.
     The application will process your text paragraph by paragraph (separated by two blank lines).
     """

     #voice1F ="en-US-EmmaNeural - en-US (Female)"
     voice1F ="en-GB-SoniaNeural - en-GB (Female)"
     voice2 = "it-IT-GiuseppeMultilingualNeural - it-IT (Male)"
+    voice2 = "en-GB-RyanNeural - en-GB (Male)"
     voice2F = "en-US-JennyNeural - en-US (Female)"
     voice1 = "en-AU-WilliamNeural - en-AU (Male)"
     voice3F = "en-HK-YanNeural - en-HK (Female)"
     silence_durations = []
     parts = re.split(r'(SS\d+\.?\d*)', text)
     for part in parts:
+        if (re.search(r'-?\d+', part)):   #if there are any digit following the voice tag, eg 1F20 or 1F-20
+            match = re.search(r'-?\d+', part)
+            pitch = match.group()         #Set pitch to set value as noted in the tag
+            # Remove only the first integer found
+            part = re.sub(r'-?\d+', '', part1, count=1).strip()  #cut out the pitch int from text part
+        if re.match(r'SS\d+\.?\d*', part):  #Check if there is Silence tag
             # At the top of your file:
             #SILENCE_PATH = Path(__file__).parent.absolute() / "Silence.mp3"
             # At the top of your file (assuming you uploaded "Silence.mp3" to root)
             print(f"Silence.mp3 file NOT FOUND")
             silence_file_path = get_silence(silence_duration)  # Store the returned filename
             audio_segments.append(silence_file_path)  # Use the stored filename
         elif part.strip():
             processed_text = part
             current_voice = voice
                 current_voice = (voice or default_voice).split(" - ")[0]
                 processed_text=part[:]
             rate_str = f"{current_rate:+d}%"
+            if  part[2:4].isdigit():
+                processed_text = part[4:]
+                pitch = int(part[2:4])
             pitch_str = f"{current_pitch:+d}Hz"
             communicate = edge_tts.Communicate(processed_text, current_voice, rate=rate_str, pitch=pitch_str)
             with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
     description = """
     Default = male, other voices 1F:US_Emma, 2F:US_Jenny, 3F:HK_Yan, 1M:AU_Will, 2M:IT_Guiseppe,3M:US_Brian,  1C: Childvoice, 1O = OldMan
     You can insert silence using the marker 'SS' (This will insert a Silence period from the Silence.mp3 file).
+    Enter your text, select a voice, and adjust the speech rate and pitch. Can also set like 1F-20  or 1M24
     The application will process your text paragraph by paragraph (separated by two blank lines).
     """