Edge_TTS_NGHIA_transcript

Sleeping

App Files Files Community

cnph001 commited on May 17

Commit

41426a6

verified ·

1 Parent(s): cc5bb17

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -3

app.py CHANGED Viewed

@@ -14,6 +14,7 @@ from pydub.silence import detect_nonsilent
 from pydub import AudioSegment
 default_voice_short= ""
 def strip_silence(audio: AudioSegment, silence_thresh=-40, min_silence_len=100, silence_padding_ms=100):
     from pydub.silence import detect_nonsilent
@@ -66,6 +67,7 @@ async def get_voices():
 async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pitch):
     global default_voice_short  # Use the global variable
     """Generates audio for a text segment, handling voice prefixes, retries, and fallback."""
     print(f"Text: {text_segment}")  #Debug
     voice_map = {
@@ -132,8 +134,10 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
                     await communicate.save(audio_path)
                 audio = AudioSegment.from_mp3(audio_path)
-                audio = strip_silence(audio, silence_thresh=-40, min_silence_len=100)
                 stripped_path = tempfile.mktemp(suffix=".mp3")
                 audio.export(stripped_path, format="mp3")
                 return stripped_path
@@ -153,6 +157,7 @@ async def process_transcript_line(line, default_voice, rate, pitch):
     """Processes a single transcript line with HH:MM:SS.milliseconds timestamp and quoted text segments."""
     match = re.match(r'(\d{2}):(\d{2}):(\d{2}),(\d{3})\s+(.*)', line) # Modified timestamp regex
     if match:
         hours, minutes, seconds, milliseconds, text_parts = match.groups()
         start_time_ms = (
             int(hours) * 3600000 +
@@ -162,13 +167,18 @@ async def process_transcript_line(line, default_voice, rate, pitch):
         )
         audio_segments = []
         split_parts = re.split(r'(")', text_parts)  # Split by quote marks, keeping the quotes
         process_next = False
         for part in split_parts:
             if part == '"':
                 process_next = not process_next
                 continue
             if process_next and part.strip():
                 audio_path = await generate_audio_with_voice_prefix(part, default_voice, rate, pitch)
                 if audio_path:
                     audio_segments.append(audio_path)

 from pydub import AudioSegment
 default_voice_short= ""
+check1 = False  # set global variable to check to see if process_text is begin of transcript line or not.
 def strip_silence(audio: AudioSegment, silence_thresh=-40, min_silence_len=100, silence_padding_ms=100):
     from pydub.silence import detect_nonsilent
 async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pitch):
     global default_voice_short  # Use the global variable
+    global check1   # Use the global variable
     """Generates audio for a text segment, handling voice prefixes, retries, and fallback."""
     print(f"Text: {text_segment}")  #Debug
     voice_map = {
                     await communicate.save(audio_path)
                 audio = AudioSegment.from_mp3(audio_path)
+                if check1:
+                    audio = strip_silence(audio, silence_thresh=-40, min_silence_len=100)  ##silence between sentences
+                else:
+                    audio = strip_silence(audio, silence_thresh=-40, min_silence_len=30)  ##less silence for mid-sentence segments
                 stripped_path = tempfile.mktemp(suffix=".mp3")
                 audio.export(stripped_path, format="mp3")
                 return stripped_path
     """Processes a single transcript line with HH:MM:SS.milliseconds timestamp and quoted text segments."""
     match = re.match(r'(\d{2}):(\d{2}):(\d{2}),(\d{3})\s+(.*)', line) # Modified timestamp regex
     if match:
+        count = 0
         hours, minutes, seconds, milliseconds, text_parts = match.groups()
         start_time_ms = (
             int(hours) * 3600000 +
         )
         audio_segments = []
         split_parts = re.split(r'(")', text_parts)  # Split by quote marks, keeping the quotes
+        # Initialize a variable to track if it's the first iteration
+        global check1   # Use the global variable
+        check1 = True
         process_next = False
         for part in split_parts:
             if part == '"':
                 process_next = not process_next
                 continue
             if process_next and part.strip():
+                    if check1:
+                        # Skip the first iteration logic here if needed
+                        check1 = True  # After first iteration, set it to True
                 audio_path = await generate_audio_with_voice_prefix(part, default_voice, rate, pitch)
                 if audio_path:
                     audio_segments.append(audio_path)