Spaces:
Running
Running
Yurii Paniv
commited on
Commit
·
349b2ad
1
Parent(s):
1ce668d
Improve number handling
Browse files- app.py +8 -4
- formatter.py +42 -13
app.py
CHANGED
|
@@ -7,7 +7,6 @@ import requests
|
|
| 7 |
from os.path import exists
|
| 8 |
from formatter import preprocess_text
|
| 9 |
from datetime import datetime
|
| 10 |
-
from stress import sentence_to_stress
|
| 11 |
from enum import Enum
|
| 12 |
import torch
|
| 13 |
|
|
@@ -46,11 +45,15 @@ if synthesizer is None:
|
|
| 46 |
raise NameError("model not found")
|
| 47 |
|
| 48 |
def tts(text: str, stress: str):
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
text_limit = 1200
|
| 51 |
text = text if len(text) < text_limit else text[0:text_limit] # mitigate crashes on hf space
|
| 52 |
-
|
| 53 |
-
print(text, stress, datetime.utcnow())
|
| 54 |
|
| 55 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
| 56 |
with torch.no_grad():
|
|
@@ -81,6 +84,7 @@ iface = gr.Interface(
|
|
| 81 |
["Введ+іть, б+удь л+аска, сво+є р+ечення.", StressOption.ManualStress.value],
|
| 82 |
["Введіть, будь ласка, своє речення.", StressOption.ManualStress.value],
|
| 83 |
["Привіт, як тебе звати?", StressOption.AutomaticStress.value],
|
|
|
|
| 84 |
]
|
| 85 |
)
|
| 86 |
iface.launch(enable_queue=True, prevent_thread_lock=True)
|
|
|
|
| 7 |
from os.path import exists
|
| 8 |
from formatter import preprocess_text
|
| 9 |
from datetime import datetime
|
|
|
|
| 10 |
from enum import Enum
|
| 11 |
import torch
|
| 12 |
|
|
|
|
| 45 |
raise NameError("model not found")
|
| 46 |
|
| 47 |
def tts(text: str, stress: str):
|
| 48 |
+
print("============================")
|
| 49 |
+
print("Original text:", text)
|
| 50 |
+
print("Stress:", stress)
|
| 51 |
+
print("Time:", datetime.utcnow())
|
| 52 |
+
autostress = True if stress == StressOption.AutomaticStress.value else False
|
| 53 |
+
text = preprocess_text(text, autostress)
|
| 54 |
text_limit = 1200
|
| 55 |
text = text if len(text) < text_limit else text[0:text_limit] # mitigate crashes on hf space
|
| 56 |
+
print("Converted:", text)
|
|
|
|
| 57 |
|
| 58 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
| 59 |
with torch.no_grad():
|
|
|
|
| 84 |
["Введ+іть, б+удь л+аска, сво+є р+ечення.", StressOption.ManualStress.value],
|
| 85 |
["Введіть, будь ласка, своє речення.", StressOption.ManualStress.value],
|
| 86 |
["Привіт, як тебе звати?", StressOption.AutomaticStress.value],
|
| 87 |
+
["Договір підписано 4 квітня 1949 року.", StressOption.AutomaticStress.value],
|
| 88 |
]
|
| 89 |
)
|
| 90 |
iface.launch(enable_queue=True, prevent_thread_lock=True)
|
formatter.py
CHANGED
|
@@ -1,18 +1,42 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
# replace apostrophe
|
| 3 |
text = text.replace("`", "'")
|
| 4 |
text = text.replace("ʼ", "'")
|
| 5 |
# numbers
|
| 6 |
-
text =
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
# speak english alphabet using brute force transliteration
|
| 17 |
english = {
|
| 18 |
"a": "а",
|
|
@@ -47,9 +71,14 @@ def preprocess_text(text):
|
|
| 47 |
text = text.replace(english_char.upper(), english[english_char].upper())
|
| 48 |
text = text.replace(english_char, english[english_char])
|
| 49 |
|
| 50 |
-
|
|
|
|
|
|
|
| 51 |
return text
|
| 52 |
|
| 53 |
|
| 54 |
if __name__ == "__main__":
|
| 55 |
-
print(preprocess_text("Quality of life update"))
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import num2words
|
| 2 |
+
import re
|
| 3 |
+
from stress import sentence_to_stress
|
| 4 |
+
|
| 5 |
+
def preprocess_text(text, autostress=False):
|
| 6 |
+
# currencies
|
| 7 |
+
text = text.replace("$", "долар")
|
| 8 |
+
text = text.replace("₴", "гривня")
|
| 9 |
+
text = text.replace("€", "євро")
|
| 10 |
# replace apostrophe
|
| 11 |
text = text.replace("`", "'")
|
| 12 |
text = text.replace("ʼ", "'")
|
| 13 |
# numbers
|
| 14 |
+
text = re.sub(r'(\d)\s+(\d)', r'\1\2', text)
|
| 15 |
+
|
| 16 |
+
def detect_num_and_convert(word):
|
| 17 |
+
numbers = "0123456789,."
|
| 18 |
+
is_number = all(map(lambda x: x in numbers, word))
|
| 19 |
+
if is_number:
|
| 20 |
+
try:
|
| 21 |
+
return num2words.num2words(word, lang="uk")
|
| 22 |
+
except:
|
| 23 |
+
return word
|
| 24 |
+
else:
|
| 25 |
+
return word
|
| 26 |
+
|
| 27 |
+
text = " ".join([detect_num_and_convert(word) for word in text.split(" ")])
|
| 28 |
+
|
| 29 |
+
# fallback numbers
|
| 30 |
+
text = text.replace("1", "один ")
|
| 31 |
+
text = text.replace("2", "два ")
|
| 32 |
+
text = text.replace("3", "три ")
|
| 33 |
+
text = text.replace("4", "чотири ")
|
| 34 |
+
text = text.replace("5", "п'ять ")
|
| 35 |
+
text = text.replace("6", "шість ")
|
| 36 |
+
text = text.replace("7", "сім ")
|
| 37 |
+
text = text.replace("8", "вісім ")
|
| 38 |
+
text = text.replace("9", "дев'ять ")
|
| 39 |
+
text = text.replace("0", "нуль ")
|
| 40 |
# speak english alphabet using brute force transliteration
|
| 41 |
english = {
|
| 42 |
"a": "а",
|
|
|
|
| 71 |
text = text.replace(english_char.upper(), english[english_char].upper())
|
| 72 |
text = text.replace(english_char, english[english_char])
|
| 73 |
|
| 74 |
+
if autostress:
|
| 75 |
+
text = sentence_to_stress(text)
|
| 76 |
+
|
| 77 |
return text
|
| 78 |
|
| 79 |
|
| 80 |
if __name__ == "__main__":
|
| 81 |
+
print(preprocess_text("Quality of life update"))
|
| 82 |
+
print(preprocess_text("Він украв 20000000 $"))
|
| 83 |
+
print(preprocess_text("111 000 000 000 доларів державного боргу."))
|
| 84 |
+
print(preprocess_text("11100000001 доларів державного боргу."))
|