cnph001 commited on
Commit
f182872
·
verified ·
1 Parent(s): 8a746bb

Add removes weird characters from text string such as underscore, asterix, etc

Browse files
Files changed (1) hide show
  1. app.py +19 -1
app.py CHANGED
@@ -39,6 +39,23 @@ async def get_voices():
39
  voices = await edge_tts.list_voices()
40
  return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  # Text-to-speech function for a single paragraph with SS handling
43
  async def paragraph_to_speech(text, voice, rate, pitch):
44
  voice1 = "en-AU-WilliamNeural - en-AU (Male)"
@@ -138,7 +155,8 @@ async def paragraph_to_speech(text, voice, rate, pitch):
138
  # processed_text = part[4:]
139
  # pitch = int(part[2:4])
140
  pitch_str = f"{current_pitch:+d}Hz"
141
- communicate = edge_tts.Communicate(processed_text, current_voice, rate=rate_str, pitch=pitch_str)
 
142
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
143
  tmp_path = tmp_file.name
144
  await communicate.save(tmp_path)
 
39
  voices = await edge_tts.list_voices()
40
  return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
41
 
42
+ def clean_text(text: str) -> str:
43
+ """
44
+ Cleans a text string by:
45
+ - Removing unwanted characters (except letters, digits, spaces, commas, periods)
46
+ - Fixing broken words caused by dots and spaces
47
+ - Normalizing spaces
48
+ """
49
+ # Remove unwanted characters
50
+ text = re.sub(r"[^a-zA-Z0-9\s,\.]", '', text)
51
+ # Fix broken words (remove dots and spaces between letters/digits)
52
+ text = re.sub(r'(?<=\w)[\.\s]+(?=\w)', '', text)
53
+ # Normalize multiple spaces to one
54
+ text = re.sub(r"\s+", ' ', text)
55
+ # Trim leading and trailing spaces
56
+ text = text.strip()
57
+ return text
58
+
59
  # Text-to-speech function for a single paragraph with SS handling
60
  async def paragraph_to_speech(text, voice, rate, pitch):
61
  voice1 = "en-AU-WilliamNeural - en-AU (Male)"
 
155
  # processed_text = part[4:]
156
  # pitch = int(part[2:4])
157
  pitch_str = f"{current_pitch:+d}Hz"
158
+ texttosend = clean_text (processed_text)
159
+ communicate = edge_tts.Communicate(texttosend, current_voice, rate=rate_str, pitch=pitch_str)
160
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
161
  tmp_path = tmp_file.name
162
  await communicate.save(tmp_path)