Edge_TTS_NGHIA_transcript

Sleeping

App Files Files Community

cnph001 commited on May 17

Commit

c542ec4

verified ·

1 Parent(s): f8b04ac

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -12

app.py CHANGED Viewed

@@ -17,12 +17,6 @@ default_voice_short= ""
 check1 = False  # set global variable to check to see if process_text is begin of transcript line or not.
 def strip_silence(audio: AudioSegment, silence_thresh=-40, min_silence_len=100, silence_padding_ms=100):
-    # Debugging: print input arguments
-    print(f"Audio length: {len(audio)} ms")
-    print(f"Silence threshold: {silence_thresh} dB")
-    print(f"Minimum silence length: {min_silence_len} ms")
-    print(f"Silence padding: {silence_padding_ms} ms")
     from pydub.silence import detect_nonsilent
     # Detect non-silent regions
     nonsilent = detect_nonsilent(audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh)
@@ -35,10 +29,16 @@ def strip_silence(audio: AudioSegment, silence_thresh=-40, min_silence_len=100,
     # Add padding before and after the trimmed audio
     # Ensure the padding doesn't exceed the trimmed boundaries
     start_trim = max(0, start_trim - silence_padding_ms)  # Ensure no negative start
-    #if not check1:
-    #    silence_padding_ms=30
     end_trim = min(len(audio), end_trim + silence_padding_ms)  # Ensure end doesn't go past audio length
     # Return the trimmed and padded audio
     return audio[start_trim:end_trim]
@@ -142,7 +142,7 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
                     await communicate.save(audio_path)
                 audio = AudioSegment.from_mp3(audio_path)
-                if check1:
                     print(f"not last part of sentence - SHORT silence")
                     audio = strip_silence(audio, silence_thresh=-40, min_silence_len=50, silence_padding_ms=50)  ##silence between sentences
                 else:
@@ -182,12 +182,13 @@ async def process_transcript_line(line, default_voice, rate, pitch):
         check1 = True
         process_next = False
         for part in split_parts:
-            if part == '"':
                 process_next = not process_next
                 continue
             if process_next and part.strip():
-                if part == split_parts[-1]: # check if this is laster iteration,
-                    check1 = False             # set it to False
                 audio_path = await generate_audio_with_voice_prefix(part, default_voice, rate, pitch)
                 if audio_path:
                     audio_segments.append(audio_path)

 check1 = False  # set global variable to check to see if process_text is begin of transcript line or not.
 def strip_silence(audio: AudioSegment, silence_thresh=-40, min_silence_len=100, silence_padding_ms=100):
     from pydub.silence import detect_nonsilent
     # Detect non-silent regions
     nonsilent = detect_nonsilent(audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh)
     # Add padding before and after the trimmed audio
     # Ensure the padding doesn't exceed the trimmed boundaries
     start_trim = max(0, start_trim - silence_padding_ms)  # Ensure no negative start
+    if not check1:
+        silence_padding_ms=00
     end_trim = min(len(audio), end_trim + silence_padding_ms)  # Ensure end doesn't go past audio length
     # Return the trimmed and padded audio
+    # Debugging: print input arguments
+    print(f"Audio length: {len(audio)} ms")
+    print(f"Silence threshold: {silence_thresh} dB")
+    print(f"Minimum silence length: {min_silence_len} ms")
+    print(f"Silence padding: {silence_padding_ms} ms")
+    print(f"Check1: {check1}**")
     return audio[start_trim:end_trim]
                     await communicate.save(audio_path)
                 audio = AudioSegment.from_mp3(audio_path)
+                if not check1:
                     print(f"not last part of sentence - SHORT silence")
                     audio = strip_silence(audio, silence_thresh=-40, min_silence_len=50, silence_padding_ms=50)  ##silence between sentences
                 else:
         check1 = True
         process_next = False
         for part in split_parts:
+            if part == '"':  #process text that are inside quote
                 process_next = not process_next
+                check1 = False             # set it to False
                 continue
             if process_next and part.strip():
+                #if part == split_parts[-1]: # check if this is laster iteration,
+                #    check1 = False             # set it to False
                 audio_path = await generate_audio_with_voice_prefix(part, default_voice, rate, pitch)
                 if audio_path:
                     audio_segments.append(audio_path)