Spaces:

PineSearch
/

generateAudio

Paused

SAUL19 commited on Jun 23, 2023

Commit

1f7e24e

1 Parent(s): 77e4720

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -48,15 +48,26 @@ def generateAudio(text_to_audio, s3_save_as):
     s3_save_as = '-'.join(s3_save_as.split()) + ".wav"
-    def cut_text(text, max_tokens=500):
         # Remove non-alphanumeric characters, except periods and commas
         text = re.sub(r"[^\w\s.,]", "", text)
-        tokens = word_tokenize(text)
         if len(tokens) <= max_tokens:
             return text
-        cut = ' '.join(tokens[:max_tokens])
         return cut
     def save_audio_to_s3(audio):

     s3_save_as = '-'.join(s3_save_as.split()) + ".wav"
+   def cut_text(text, max_tokens=500):
         # Remove non-alphanumeric characters, except periods and commas
         text = re.sub(r"[^\w\s.,]", "", text)
+        # Replace multiple spaces with a single space
+        text = re.sub(r"\s{2,}", " ", text)
+        # Remove line breaks
+        text = re.sub(r"\n", " ", text)
+        sentences = sent_tokenize(text)
+        tokens = []
+        for sentence in sentences:
+            tokens.extend(word_tokenize(sentence))
         if len(tokens) <= max_tokens:
             return text
+        cut_tokens = tokens[:max_tokens]
+        cut = ' '.join(cut_tokens)
         return cut
     def save_audio_to_s3(audio):