Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -48,15 +48,26 @@ def generateAudio(text_to_audio, s3_save_as):
|
|
48 |
|
49 |
s3_save_as = '-'.join(s3_save_as.split()) + ".wav"
|
50 |
|
51 |
-
|
52 |
# Remove non-alphanumeric characters, except periods and commas
|
53 |
text = re.sub(r"[^\w\s.,]", "", text)
|
54 |
-
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
if len(tokens) <= max_tokens:
|
57 |
return text
|
58 |
-
|
59 |
-
|
|
|
60 |
return cut
|
61 |
|
62 |
def save_audio_to_s3(audio):
|
|
|
48 |
|
49 |
s3_save_as = '-'.join(s3_save_as.split()) + ".wav"
|
50 |
|
51 |
+
def cut_text(text, max_tokens=500):
|
52 |
# Remove non-alphanumeric characters, except periods and commas
|
53 |
text = re.sub(r"[^\w\s.,]", "", text)
|
54 |
+
|
55 |
+
# Replace multiple spaces with a single space
|
56 |
+
text = re.sub(r"\s{2,}", " ", text)
|
57 |
+
|
58 |
+
# Remove line breaks
|
59 |
+
text = re.sub(r"\n", " ", text)
|
60 |
+
|
61 |
+
sentences = sent_tokenize(text)
|
62 |
+
tokens = []
|
63 |
+
for sentence in sentences:
|
64 |
+
tokens.extend(word_tokenize(sentence))
|
65 |
+
|
66 |
if len(tokens) <= max_tokens:
|
67 |
return text
|
68 |
+
|
69 |
+
cut_tokens = tokens[:max_tokens]
|
70 |
+
cut = ' '.join(cut_tokens)
|
71 |
return cut
|
72 |
|
73 |
def save_audio_to_s3(audio):
|