Lakoc
Initial commit
605b3ec
raw
history blame contribute delete
652 Bytes
"""
NOTSOFAR adopts the same text normalizer as the CHiME-8 DASR track.
This code is aligned with the CHiME-8 repo:
https://github.com/chimechallenge/chime-utils/tree/main/chime_utils/text_norm
"""
import json
from .basic import BasicTextNormalizer as BasicTextNormalizer
from .english import EnglishTextNormalizer as EnglishTextNormalizerNSF
def get_text_norm(t_norm: str):
if t_norm == 'whisper':
SPELLING_CORRECTIONS = json.load(open('src/txt_norm/english.json'))
return EnglishTextNormalizer(SPELLING_CORRECTIONS)
elif t_norm == 'whisper_nsf':
return EnglishTextNormalizerNSF()
else:
return lambda x: x