File size: 718 Bytes
601d149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
"""Translate english to chinese via a dict."""
from typing import List, Union

from gradiobee.en2zh import en2zh
from gradiobee.insert_spaces import insert_spaces


# fmt: off
def en2zh_tokens(
        # text: Union[str, List[List[str]]],
        text: Union[str, List[str]],
        dedup: bool = True,
) -> List[List[str]]:
    # fmt: on
    """Translate english to chinese tokens via a dict.

    Args
        text: to translate, list of str
        dedup: if True, remove all duplicates
    Returns
        res: list of list of str/token/char
    """
    res = en2zh(text)

    if dedup:
        return [list(set(insert_spaces(elm).split())) for elm in res]

    return [insert_spaces(elm).split() for elm in res]