moretts

Sleeping

App Files Files Community

Gregniuki commited on Nov 28, 2024

Commit

3f9ece7

verified ·

1 Parent(s): 6c61749

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -5

app.py CHANGED Viewed

@@ -136,33 +136,44 @@ E2TTS_ema_model2 = load_custom(
     "hf://Gregniuki/F5-tts_English_German_Polish/Polish/model_500000.pt", "", F5TTS_model_cfg
 )
 def chunk_text(text, max_chars=100):
     """
-    Splits the input text into chunks, each with a maximum number of characters.
     Args:
         text (str): The text to be split.
         max_chars (int): The maximum number of characters per chunk.
     Returns:
         List[str]: A list of text chunks.
     """
     chunks = []
     current_chunk = ""
     # Split the text into sentences based on punctuation followed by whitespace
     sentences = re.split(r"(?<=[;:,.!?])\s+|(?<=[；：，。！？])", text)
     for sentence in sentences:
-        if len(current_chunk.encode("utf-8")) + len(sentence.encode("utf-8")) <= max_chars:
-            current_chunk += sentence + " " if sentence and len(sentence[-1].encode("utf-8")) == 1 else sentence
         else:
             if current_chunk:
                 chunks.append(current_chunk.strip())
-            current_chunk = sentence + " " if sentence and len(sentence[-1].encode("utf-8")) == 1 else sentence
     if current_chunk:
         chunks.append(current_chunk.strip())
     return chunks
 @gpu_decorator
 def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress()):
     if exp_name == "English":

     "hf://Gregniuki/F5-tts_English_German_Polish/Polish/model_500000.pt", "", F5TTS_model_cfg
 )
+import re
 def chunk_text(text, max_chars=100):
     """
+    Splits the input text into chunks, each with a maximum number of characters
+    (but splits only after a space if the chunk exceeds the character limit).
     Args:
         text (str): The text to be split.
         max_chars (int): The maximum number of characters per chunk.
     Returns:
         List[str]: A list of text chunks.
     """
     chunks = []
     current_chunk = ""
     # Split the text into sentences based on punctuation followed by whitespace
     sentences = re.split(r"(?<=[;:,.!?])\s+|(?<=[；：，。！？])", text)
     for sentence in sentences:
+        # Check if adding this sentence to the current chunk will exceed the max_chars
+        if len(current_chunk) + len(sentence) + 1 <= max_chars:  # +1 for the space
+            current_chunk += sentence + " "
         else:
             if current_chunk:
+                # Add the current chunk to the list and reset it
                 chunks.append(current_chunk.strip())
+            # Start a new chunk with the current sentence
+            current_chunk = sentence + " "
+    # Append any remaining text in current_chunk to chunks
     if current_chunk:
         chunks.append(current_chunk.strip())
     return chunks
 @gpu_decorator
 def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress()):
     if exp_name == "English":