Edge_TTS_NGHIA_transcript

Sleeping

cnph001 commited on May 13

Commit

dd400d4

verified ·

1 Parent(s): 74db9d2

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -62,9 +62,17 @@ async def get_voices():
     voices = await edge_tts.list_voices()
     return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
 async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pitch):
-    """Generates audio for a text segment, handling permanent and temporary voice changes with new rules."""
     # Define the voice map for reference
     voice_map = {
         "1F": ("en-GB-SoniaNeural", 25, 0),
@@ -112,6 +120,7 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
                 current_pitch += pitch_adj
                 current_rate += rate_adj
                 result.append(f"<perm>{prefix}P")  # Mark as permanent change
             elif pitch_modifier:
                 # Temporary pitch adjustment (e.g., "4V-10" or "4V+5")
                 pitch_adjustment = int(pitch_modifier)
@@ -158,6 +167,8 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
     return None
 async def process_transcript_line(line, default_voice, rate, pitch):
     """Processes a single transcript line with HH:MM:SS.milliseconds timestamp and quoted text segments."""
     match = re.match(r'(\d{2}):(\d{2}):(\d{2}),(\d{3})\s+(.*)', line) # Modified timestamp regex

     voices = await edge_tts.list_voices()
     return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
+## EDIT
+import re
+import tempfile
+from pydub import AudioSegment
+import asyncio
 async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pitch):
+    """Generates audio for a text segment, handling permanent and temporary voice changes."""
     # Define the voice map for reference
     voice_map = {
         "1F": ("en-GB-SoniaNeural", 25, 0),
                 current_pitch += pitch_adj
                 current_rate += rate_adj
                 result.append(f"<perm>{prefix}P")  # Mark as permanent change
+                temp_voice = None  # Clear temporary voice changes
             elif pitch_modifier:
                 # Temporary pitch adjustment (e.g., "4V-10" or "4V+5")
                 pitch_adjustment = int(pitch_modifier)
     return None
+### END EDIT
 async def process_transcript_line(line, default_voice, rate, pitch):
     """Processes a single transcript line with HH:MM:SS.milliseconds timestamp and quoted text segments."""
     match = re.match(r'(\d{2}):(\d{2}):(\d{2}),(\d{3})\s+(.*)', line) # Modified timestamp regex