SAUL19 commited on
Commit
1f7e24e
·
1 Parent(s): 77e4720

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -5
app.py CHANGED
@@ -48,15 +48,26 @@ def generateAudio(text_to_audio, s3_save_as):
48
 
49
  s3_save_as = '-'.join(s3_save_as.split()) + ".wav"
50
 
51
- def cut_text(text, max_tokens=500):
52
  # Remove non-alphanumeric characters, except periods and commas
53
  text = re.sub(r"[^\w\s.,]", "", text)
54
-
55
- tokens = word_tokenize(text)
 
 
 
 
 
 
 
 
 
 
56
  if len(tokens) <= max_tokens:
57
  return text
58
-
59
- cut = ' '.join(tokens[:max_tokens])
 
60
  return cut
61
 
62
  def save_audio_to_s3(audio):
 
48
 
49
  s3_save_as = '-'.join(s3_save_as.split()) + ".wav"
50
 
51
+ def cut_text(text, max_tokens=500):
52
  # Remove non-alphanumeric characters, except periods and commas
53
  text = re.sub(r"[^\w\s.,]", "", text)
54
+
55
+ # Replace multiple spaces with a single space
56
+ text = re.sub(r"\s{2,}", " ", text)
57
+
58
+ # Remove line breaks
59
+ text = re.sub(r"\n", " ", text)
60
+
61
+ sentences = sent_tokenize(text)
62
+ tokens = []
63
+ for sentence in sentences:
64
+ tokens.extend(word_tokenize(sentence))
65
+
66
  if len(tokens) <= max_tokens:
67
  return text
68
+
69
+ cut_tokens = tokens[:max_tokens]
70
+ cut = ' '.join(cut_tokens)
71
  return cut
72
 
73
  def save_audio_to_s3(audio):