File size: 2,476 Bytes
bccd6e8
 
 
 
 
 
 
 
 
677f95f
bccd6e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6f41e86
b496854
bccd6e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b496854
 
 
 
 
 
 
 
 
bccd6e8
 
 
 
 
 
 
 
 
 
 
 
b496854
 
 
 
bccd6e8
145bd44
677f95f
bccd6e8
 
 
 
 
 
 
46a9657
b496854
bccd6e8
b496854
145bd44
bccd6e8
a943def
bccd6e8
b496854
 
bccd6e8
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
"""Create entry."""
# pylint: disbale=invalid-name
import gradio as gr
import pandas as pd

from about_time import about_time
from aset2pairs import aset2pairs
from cmat2aset import cmat2aset
from logzero import logger
from seg_text import seg_text
from typing import List, Optional, Union

from radio_mlbee import __version__
from radio_mlbee.gen_cmat import gen_cmat
from radio_mlbee.utils import text1, text2


def greet(name):
    """Greet."""
    if not name:
        name = "world"
    return "Hello " + name + "!! (coming sooooon...)"


def ml_fn(
    text1: str,
    text2: str,
    split_to_sents: bool = False,
    preview: bool = False,
) -> pd.DataFrame:
    """Align text1 text2"""
    text1 = str(text1)
    text2 = str(text2)
    try:
        paras1 = text1.splitlines()
        paras1 = [_.strip() for _ in paras1 if _.strip()]
    except Exception as exc:
        logger.error(" praras.slpitlines() erros: %s, setting to ['']", exc)
        paras1 = [""]
    try:
        paras2 = text2.splitlines()
        paras2 = [_.strip() for _ in paras2 if _.strip()]
    except Exception as exc:
        logger.error(" praras slpitlines erros: %s, setting to ['']", exc)
        paras2 = [""]

    if split_to_sents:  # TODO
        try:
            paras1 = seg_text(paras1)
        except Exception as exc:
            logger.error(exc)
        try:
            paras2 = seg_text(paras2)
        except Exception as exc:
            logger.error(exc)

    with about_time() as t:
        cmat = gen_cmat(paras1, paras2)
        aset = cmat2aset(cmat)

    _ = len(paras1) + len(paras2)
    av = t.duration / _ * 1000
    logger.info(" %s blocks, took %s, av. %s s/1000 blk", _, t.duration_human, av)

    pairs = aset2pairs(paras1, paras2, aset)
    df = pd.DataFrame(pairs, columns=["text1", "text2", "llh"])

    html = None
    if preview:
        html = df.to_html()

    # return pd.DataFrame([["", "", ""]])
    # return df.to_html()
    return df, html


mlbee = gr.Interface(
    fn=ml_fn,
    inputs=[
        "textarea",
        "textarea",
        gr.Checkbox(label="Split to sents?"),
        gr.Checkbox(label="Preview?"),
    ],
    outputs=["dataframe", "html"],
    # outputs="html",
    title=f"radio-mlbee {__version__}",
    description="mlbee rest api on dev ",
    examples=[
        # [text1, text2, False],
        [text1[:len(text1) // 5], text2[:len(text2) // 5], False],
    ]
)

mlbee.launch(
    show_error=True,
    enable_queue=True,
)