Spaces:
Runtime error
Runtime error
File size: 3,494 Bytes
bccd6e8 4ba22a4 5cb1a08 bccd6e8 677f95f bccd6e8 4ba22a4 bccd6e8 6f41e86 b496854 5cb1a08 bccd6e8 5cb1a08 bccd6e8 b496854 bccd6e8 8ccf616 bccd6e8 fc2ede0 bccd6e8 b496854 5cb1a08 fc2ede0 5cb1a08 bccd6e8 145bd44 5cb1a08 bccd6e8 46a9657 b496854 5cb1a08 bccd6e8 964db97 145bd44 bccd6e8 a943def bccd6e8 b496854 5cb1a08 bccd6e8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
"""Create entry."""
# pylint: disbale=invalid-name
import os
import time
from pathlib import Path
import gradio as gr
import pandas as pd
from about_time import about_time
from aset2pairs import aset2pairs
from cmat2aset import cmat2aset
from logzero import logger
from seg_text import seg_text
from radio_mlbee import __version__
from radio_mlbee.gen_cmat import gen_cmat
from radio_mlbee.utils import text1, text2
os.environ["TZ"] = "Asia/Shanghai"
try:
time.tzset() # type: ignore
except Exception as _:
logger.warning("time.tzset() error: %s. Probably running Windows, we let it pass.", _)
def greet(name):
"""Greet."""
if not name:
name = "world"
return "Hello " + name + "!! (coming sooooon...)"
def ml_fn(
text1: str,
text2: str,
split_to_sents: bool = False,
preview: bool = False,
download_csv: bool = False,
) -> pd.DataFrame:
"""Align multilingual (50+ pairs) text1 text2."""
text1 = str(text1)
text2 = str(text2)
try:
paras1 = text1.splitlines()
paras1 = [_.strip() for _ in paras1 if _.strip()]
except Exception as exc:
logger.error(" praras.slpitlines() erros: %s, setting to ['']", exc)
paras1 = [""]
try:
paras2 = text2.splitlines()
paras2 = [_.strip() for _ in paras2 if _.strip()]
except Exception as exc:
logger.error(" praras slpitlines erros: %s, setting to ['']", exc)
paras2 = [""]
if split_to_sents: # TODO
try:
paras1 = seg_text(paras1)
except Exception as exc:
logger.error(exc)
try:
paras2 = seg_text(paras2)
except Exception as exc:
logger.error(exc)
with about_time() as t:
try:
cmat = gen_cmat(paras1, paras2)
except Exception as exc:
logger.exception(exc)
logger.info(paras1)
logger.info(paras2)
logger.info("len(paras1): %s, len(paras2): %s", len(paras1), len(paras2))
cmat = [[]]
try:
aset = cmat2aset(cmat)
except Exception as exc:
logger.exception(exc)
aset = [["", "", ""]]
_ = len(paras1) + len(paras2)
av = f"{t.duration / _ * 1000:.2f}"
logger.info(" %s blocks, took %s, av. %s s/1000 blk", _, t.duration_human, av)
pairs = aset2pairs(paras1, paras2, aset)
df = pd.DataFrame(pairs, columns=["text1", "text2", "llh"])
html = None
if preview:
html = df.to_html()
dl_csv = None
try:
if download_csv:
dl_csv = Path("aligned-blocks.csv")
_ = df.to_csv(index=False)
dl_csv.write_text(_, encoding="utf8")
except Exception as exc:
logger.exception(exc)
# return pd.DataFrame([["", "", ""]])
# return df.to_html()
return df, html, dl_csv
mlbee = gr.Interface(
fn=ml_fn,
inputs=[
"textarea",
"textarea",
gr.Checkbox(label="Split to sents?"),
gr.Checkbox(label="Preview?"),
gr.Checkbox(label="Download csv?"),
],
outputs=["dataframe", "html", gr.outputs.File(
label="Click to download csv",
)],
# outputs="html",
title=f"radio-mlbee {__version__}",
description="mlbee rest api on dev ",
examples=[
# [text1, text2, False],
[text1[: len(text1) // 5], text2[: len(text2) // 5], False, False, False],
],
)
mlbee.launch(
show_error=True,
enable_queue=True,
)
|