Yiwen Zhao commited on
Commit
810614d
·
1 Parent(s): be8d8e7

update mandarin PER

Browse files
Files changed (1) hide show
  1. test_performance.py +31 -4
test_performance.py CHANGED
@@ -86,15 +86,42 @@ def remove_punctuation_and_replace_with_space(text):
86
  return text
87
 
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  def on_click_metrics(audio_path, ref):
90
  global predictor
91
  # OWSM ctc + PER
92
  y, sr = librosa.load(audio_path, sr=16000)
93
  asr_result = asr_pipeline(y, generate_kwargs={"language": "mandarin"} )['text']
94
- hyp_pinin = lazy_pinyin(asr_result)
95
-
96
- ref_pinin = lazy_pinyin(ref)
97
- per = jiwer.wer(" ".join(ref_pinin), " ".join(hyp_pinin))
 
 
98
 
99
  audio = librosa.load(audio_path, sr=22050)[0]
100
  singmos = singmos_evaluation(
 
86
  return text
87
 
88
 
89
+ def pypinyin_g2p_phone_without_prosody(text):
90
+ from pypinyin import Style, pinyin
91
+ from pypinyin.style._utils import get_finals, get_initials
92
+
93
+ phones = []
94
+ for phone in pinyin(text, style=Style.NORMAL, strict=False):
95
+ initial = get_initials(phone[0], strict=False)
96
+ final = get_finals(phone[0], strict=False)
97
+ if len(initial) != 0:
98
+ if initial in ["x", "y", "j", "q"]:
99
+ if final == "un":
100
+ final = "vn"
101
+ elif final == "uan":
102
+ final = "van"
103
+ elif final == "u":
104
+ final = "v"
105
+ if final == "ue":
106
+ final = "ve"
107
+ phones.append(initial)
108
+ phones.append(final)
109
+ else:
110
+ phones.append(final)
111
+ return phones
112
+
113
+
114
  def on_click_metrics(audio_path, ref):
115
  global predictor
116
  # OWSM ctc + PER
117
  y, sr = librosa.load(audio_path, sr=16000)
118
  asr_result = asr_pipeline(y, generate_kwargs={"language": "mandarin"} )['text']
119
+
120
+ # Espnet embeded g2p, but sometimes it will mispronunce polyphonic characters
121
+ hyp_pinin = pypinyin_g2p_phone_without_prosody(asr_result)
122
+
123
+ ref_pinin = pypinyin_g2p_phone_without_prosody(ref)
124
+ per = jiwer.wer(ref_pinin, hyp_pinin)
125
 
126
  audio = librosa.load(audio_path, sr=22050)[0]
127
  singmos = singmos_evaluation(