Spaces:
Build error
Build error
| """Shuffle sents.""" | |
| # pylint: disable= | |
| from typing import List, Optional, Tuple, Union | |
| from fastlid import fastlid | |
| from logzero import logger # noqa | |
| from radiobee.lists2cmat import lists2cmat | |
| from radiobee.gen_pset import gen_pset | |
| from radiobee.gen_aset import gen_aset | |
| from radiobee.align_texts import align_texts | |
| # fmt: off | |
| def shuffle_sents( | |
| lst1: List[str], | |
| lst2: List[str], | |
| eps: float = 6, | |
| min_samples: int = 4, | |
| tf_type: str = "linear", | |
| idf_type: Optional[str] = None, | |
| dl_type: Optional[str] = None, | |
| norm: Optional[str] = None, | |
| lang1: Optional[str] = None, | |
| lang2: Optional[str] = None, | |
| ) -> List[Tuple[str, str, Union[str, float]]]: | |
| # fmt: on | |
| """shuffle sents to the right positions. | |
| Based on __main__.py. | |
| """ | |
| set_languages = fastlid.set_languages | |
| fastlid.set_languages = ["en", "zh"] | |
| if lang1 is None: | |
| lang1, _ = fastlid(" ".join(lst1)) | |
| if lang2 is None: | |
| lang2, _ = fastlid(" ".join(lst2)) | |
| # restore fastlid.set_languages | |
| fastlid.set_languages = set_languages | |
| cmat = lists2cmat( | |
| lst1, | |
| lst2, | |
| tf_type=tf_type, | |
| idf_type=idf_type, | |
| dl_type=dl_type, | |
| norm=norm, | |
| lang1=lang1, | |
| lang2=lang2, | |
| ) | |
| pset = gen_pset( | |
| cmat, | |
| eps=eps, | |
| min_samples=min_samples, | |
| delta=7, | |
| ) | |
| src_len, tgt_len = cmat.shape | |
| aset = gen_aset(pset, src_len, tgt_len) | |
| final_list = align_texts(aset, lst2, lst1) | |
| return final_list | |
| # return [("", "")] | |