Spaces:
Sleeping
Sleeping
Create TaiwaneseHokkien.py
Browse files- TaiwaneseHokkien.py +62 -0
TaiwaneseHokkien.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from LanguageBase import Language
|
2 |
+
from taibun import Converter
|
3 |
+
model_repo_id = "emlinking/wav2vec2-large-xls-r-300m-tsm-asr-v6"
|
4 |
+
util = Converter()
|
5 |
+
def get_initial_length(s):
|
6 |
+
if s[2] == 'h':
|
7 |
+
return 3
|
8 |
+
elif s[1] in ['s', 'h']:
|
9 |
+
return 2
|
10 |
+
else:
|
11 |
+
return 1
|
12 |
+
class TaiwaneseHokkien(Language):
|
13 |
+
def __init__(self, **kwargs):
|
14 |
+
self.pipe = pipeline(task="automatic-speech-recognition", model=model_repo_id, **kwargs)
|
15 |
+
def asr(self, audio):
|
16 |
+
return self.pipe(audio)['text']
|
17 |
+
def compare(self, target_pron, user_pron):
|
18 |
+
# normalize
|
19 |
+
target_pron = target_pron.lower().split()
|
20 |
+
user_pron = user_pron.lower().split()
|
21 |
+
result = []
|
22 |
+
for i in min(len(target_pron), len(user_pron)):
|
23 |
+
target_syls = [x for x in target_pron[i].split('-') if x]
|
24 |
+
user_syls = [x for x in user_pron[i].split('-') if x]
|
25 |
+
for j in min(len(target_syls), len(user_syls));
|
26 |
+
target_syl = util._Converter__get_number_tone(target_syls[i])
|
27 |
+
user_syl = util._Converter__get_number_tone(user_syls[i])
|
28 |
+
til = get_initial_length(target_syl)
|
29 |
+
uil = get_initial_length(user_syl)
|
30 |
+
if target_syl[:til] != user_syl[:uil]:
|
31 |
+
result.append((user_syl[:uil], 'initial error'))
|
32 |
+
else:
|
33 |
+
result.append((user_syl[:uil], None))
|
34 |
+
if target_syl[til:-1] != user_syl[uil:-1]:
|
35 |
+
result.append((user_syl[uil:-1], 'rime error'))
|
36 |
+
else:
|
37 |
+
result.append((user_syl[uil:-1], None))
|
38 |
+
if target_syl[-1] != user_syl[-1]:
|
39 |
+
result.append((user_syl[-1], 'tone error'))
|
40 |
+
else:
|
41 |
+
result.append((user_syl[-1], None))
|
42 |
+
if j < min(len(target_syls), len(user_syls))-1:
|
43 |
+
result.append(('-', None))
|
44 |
+
if len(target_syls) > len(user_syls):
|
45 |
+
for syl in target_syls[len(user_syls):]:
|
46 |
+
result.append(('-' + syl, 'missing syllables'))
|
47 |
+
elif len(user_syls) > len(target_syls):
|
48 |
+
for syl in user_syls[len(target_syls):]:
|
49 |
+
result.append(('-' + syl, 'extra syllables'))
|
50 |
+
result.append((' ', None))
|
51 |
+
if len(target_pron) > len(user_pron):
|
52 |
+
for word in target_pron[len(user_pron):]:
|
53 |
+
result.append((word, 'missing syllables'))
|
54 |
+
result.append((' ', None))
|
55 |
+
elif len(user_pron) > len(target_pron):
|
56 |
+
for word in user_pron[len(target_pron):]:
|
57 |
+
result.append((word, 'extra syllables'))
|
58 |
+
result.append((' ', None))
|
59 |
+
return result
|
60 |
+
@property
|
61 |
+
def compare_colors(self):
|
62 |
+
return {'tone error': 'red', 'initial error', 'blue', 'rime error': 'green', 'missing syllables': 'yellow', 'extra syllables': 'stone'}
|