Gregniuki commited on
Commit
3f9ece7
·
verified ·
1 Parent(s): 6c61749

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -5
app.py CHANGED
@@ -136,33 +136,44 @@ E2TTS_ema_model2 = load_custom(
136
  "hf://Gregniuki/F5-tts_English_German_Polish/Polish/model_500000.pt", "", F5TTS_model_cfg
137
  )
138
 
 
 
139
  def chunk_text(text, max_chars=100):
140
  """
141
- Splits the input text into chunks, each with a maximum number of characters.
 
 
142
  Args:
143
  text (str): The text to be split.
144
  max_chars (int): The maximum number of characters per chunk.
 
145
  Returns:
146
  List[str]: A list of text chunks.
147
  """
148
  chunks = []
149
  current_chunk = ""
 
150
  # Split the text into sentences based on punctuation followed by whitespace
151
  sentences = re.split(r"(?<=[;:,.!?])\s+|(?<=[;:,。!?])", text)
152
 
153
  for sentence in sentences:
154
- if len(current_chunk.encode("utf-8")) + len(sentence.encode("utf-8")) <= max_chars:
155
- current_chunk += sentence + " " if sentence and len(sentence[-1].encode("utf-8")) == 1 else sentence
 
156
  else:
157
  if current_chunk:
 
158
  chunks.append(current_chunk.strip())
159
- current_chunk = sentence + " " if sentence and len(sentence[-1].encode("utf-8")) == 1 else sentence
160
-
 
 
161
  if current_chunk:
162
  chunks.append(current_chunk.strip())
163
 
164
  return chunks
165
 
 
166
  @gpu_decorator
167
  def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress()):
168
  if exp_name == "English":
 
136
  "hf://Gregniuki/F5-tts_English_German_Polish/Polish/model_500000.pt", "", F5TTS_model_cfg
137
  )
138
 
139
+ import re
140
+
141
  def chunk_text(text, max_chars=100):
142
  """
143
+ Splits the input text into chunks, each with a maximum number of characters
144
+ (but splits only after a space if the chunk exceeds the character limit).
145
+
146
  Args:
147
  text (str): The text to be split.
148
  max_chars (int): The maximum number of characters per chunk.
149
+
150
  Returns:
151
  List[str]: A list of text chunks.
152
  """
153
  chunks = []
154
  current_chunk = ""
155
+
156
  # Split the text into sentences based on punctuation followed by whitespace
157
  sentences = re.split(r"(?<=[;:,.!?])\s+|(?<=[;:,。!?])", text)
158
 
159
  for sentence in sentences:
160
+ # Check if adding this sentence to the current chunk will exceed the max_chars
161
+ if len(current_chunk) + len(sentence) + 1 <= max_chars: # +1 for the space
162
+ current_chunk += sentence + " "
163
  else:
164
  if current_chunk:
165
+ # Add the current chunk to the list and reset it
166
  chunks.append(current_chunk.strip())
167
+ # Start a new chunk with the current sentence
168
+ current_chunk = sentence + " "
169
+
170
+ # Append any remaining text in current_chunk to chunks
171
  if current_chunk:
172
  chunks.append(current_chunk.strip())
173
 
174
  return chunks
175
 
176
+
177
  @gpu_decorator
178
  def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress()):
179
  if exp_name == "English":