etchen commited on
Commit
c85be9e
·
verified ·
1 Parent(s): 8406c8a

Create TaiwaneseHokkien.py

Browse files
Files changed (1) hide show
  1. TaiwaneseHokkien.py +62 -0
TaiwaneseHokkien.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from LanguageBase import Language
2
+ from taibun import Converter
3
+ model_repo_id = "emlinking/wav2vec2-large-xls-r-300m-tsm-asr-v6"
4
+ util = Converter()
5
+ def get_initial_length(s):
6
+ if s[2] == 'h':
7
+ return 3
8
+ elif s[1] in ['s', 'h']:
9
+ return 2
10
+ else:
11
+ return 1
12
+ class TaiwaneseHokkien(Language):
13
+ def __init__(self, **kwargs):
14
+ self.pipe = pipeline(task="automatic-speech-recognition", model=model_repo_id, **kwargs)
15
+ def asr(self, audio):
16
+ return self.pipe(audio)['text']
17
+ def compare(self, target_pron, user_pron):
18
+ # normalize
19
+ target_pron = target_pron.lower().split()
20
+ user_pron = user_pron.lower().split()
21
+ result = []
22
+ for i in min(len(target_pron), len(user_pron)):
23
+ target_syls = [x for x in target_pron[i].split('-') if x]
24
+ user_syls = [x for x in user_pron[i].split('-') if x]
25
+ for j in min(len(target_syls), len(user_syls));
26
+ target_syl = util._Converter__get_number_tone(target_syls[i])
27
+ user_syl = util._Converter__get_number_tone(user_syls[i])
28
+ til = get_initial_length(target_syl)
29
+ uil = get_initial_length(user_syl)
30
+ if target_syl[:til] != user_syl[:uil]:
31
+ result.append((user_syl[:uil], 'initial error'))
32
+ else:
33
+ result.append((user_syl[:uil], None))
34
+ if target_syl[til:-1] != user_syl[uil:-1]:
35
+ result.append((user_syl[uil:-1], 'rime error'))
36
+ else:
37
+ result.append((user_syl[uil:-1], None))
38
+ if target_syl[-1] != user_syl[-1]:
39
+ result.append((user_syl[-1], 'tone error'))
40
+ else:
41
+ result.append((user_syl[-1], None))
42
+ if j < min(len(target_syls), len(user_syls))-1:
43
+ result.append(('-', None))
44
+ if len(target_syls) > len(user_syls):
45
+ for syl in target_syls[len(user_syls):]:
46
+ result.append(('-' + syl, 'missing syllables'))
47
+ elif len(user_syls) > len(target_syls):
48
+ for syl in user_syls[len(target_syls):]:
49
+ result.append(('-' + syl, 'extra syllables'))
50
+ result.append((' ', None))
51
+ if len(target_pron) > len(user_pron):
52
+ for word in target_pron[len(user_pron):]:
53
+ result.append((word, 'missing syllables'))
54
+ result.append((' ', None))
55
+ elif len(user_pron) > len(target_pron):
56
+ for word in user_pron[len(target_pron):]:
57
+ result.append((word, 'extra syllables'))
58
+ result.append((' ', None))
59
+ return result
60
+ @property
61
+ def compare_colors(self):
62
+ return {'tone error': 'red', 'initial error', 'blue', 'rime error': 'green', 'missing syllables': 'yellow', 'extra syllables': 'stone'}