Spaces:
Running
Running
File size: 652 Bytes
605b3ec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
"""
NOTSOFAR adopts the same text normalizer as the CHiME-8 DASR track.
This code is aligned with the CHiME-8 repo:
https://github.com/chimechallenge/chime-utils/tree/main/chime_utils/text_norm
"""
import json
from .basic import BasicTextNormalizer as BasicTextNormalizer
from .english import EnglishTextNormalizer as EnglishTextNormalizerNSF
def get_text_norm(t_norm: str):
if t_norm == 'whisper':
SPELLING_CORRECTIONS = json.load(open('src/txt_norm/english.json'))
return EnglishTextNormalizer(SPELLING_CORRECTIONS)
elif t_norm == 'whisper_nsf':
return EnglishTextNormalizerNSF()
else:
return lambda x: x |