Spaces:
Runtime error
Runtime error
freemt
commited on
Commit
·
a137bf6
1
Parent(s):
99dd882
Update (dev) fixed download sort of
Browse files- aligned-blocks.csv +12 -0
- app.py +17 -7
- gradio_queue.db +0 -0
- install-sw.sh +3 -17
- install-sw1.sh +25 -0
- poetry.lock +1 -1
- pyproject.toml +1 -0
- radio_mlbee/gen_cmat.py +15 -2
aligned-blocks.csv
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
text1,text2,llh
|
2 |
+
Wuthering Heights,��Хɽׯ,0.45
|
3 |
+
--------------------------------------------------------------------------------,--------------------------------------------------------------------------------,1.0
|
4 |
+
Chapter 2,�ڶ���,0.91
|
5 |
+
Chinese,Ӣ��,0.52
|
6 |
+
Yesterday afternoon set in misty and cold.,��������������������,0.74
|
7 |
+
"I had half a mind to spend it by my study fire, instead of wading through heath and mud to Wuthering Heights.",��������鷿¯����ĥһ���磬��������Ӳ����ൽ��Хɽׯ�ˡ�,0.45
|
8 |
+
"On coming up from dinner, however (N.B. I dine between twelve and one o'clock; the housekeeper, a matronly lady, taken as a fixture along with the house, could not, or would not, comprehend my request that I might be served at five), on mounting the stairs with this lazy intention, and stepping into the room, I saw a servant girl on her knees surrounded by brushes and coal-scuttles, and raising an infernal dust as she extinguished the flames with heaps of cinders.",���ǣ��Թ��緹��ע�⡪������ʮ������һ����֮����緹�������Ե����������ӵĸ�����Ĺܼ��ţ�һλ�����̫̫ȴ���ܣ����߲���Ը����������������ӿ��������⣩�����һ������������뷨����¥���������ӵ�ʱ����һ��Ů���ڵ��ϣ�������ɨ���ú����,0.72
|
9 |
+
,��������һ�Ѷ�ú����𣬸���һƬ�����Ļҳ���,
|
10 |
+
,�⾰�����̰��Ҹϻ�ͷ�ˡ�,
|
11 |
+
"This spectacle drove me back immediately; I took my hat, and, after a four-miles' walk, arrived at Heathcliff's garden gate just in time to escape the first feathery flakes of a snow shower.",������ñ�ӣ���������·��������ϣ�̿�����Ļ��ڿڣ��պö����һ����������Ķ�ë��ѩ��,0.68
|
12 |
+
"On that bleak hill top the earth was hard with a black frost, and the air made me shiver through every limb.",���ǻ�����ɽ���ϣ�������,0.43
|
app.py
CHANGED
@@ -87,9 +87,19 @@ def ml_fn(
|
|
87 |
logger.exception(exc)
|
88 |
aset = [["", "", ""]]
|
89 |
|
90 |
-
|
91 |
-
|
92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
|
94 |
pairs = aset2pairs(paras1, paras2, aset)
|
95 |
df = pd.DataFrame(pairs, columns=["text1", "text2", "llh"])
|
@@ -104,15 +114,15 @@ def ml_fn(
|
|
104 |
try:
|
105 |
dl_csv = Path("aligned-blocks.csv")
|
106 |
csv_str = df.to_csv(index=False)
|
107 |
-
|
108 |
ic("Saving df.to_csv to dl_csv...")
|
109 |
except Exception as exc:
|
110 |
logger.exception(exc)
|
111 |
|
112 |
-
return df, html,
|
113 |
|
114 |
|
115 |
-
|
116 |
fn=ml_fn,
|
117 |
inputs=[
|
118 |
"textarea",
|
@@ -135,7 +145,7 @@ mlbee = gr.Interface(
|
|
135 |
],
|
136 |
)
|
137 |
|
138 |
-
|
139 |
show_error=True,
|
140 |
enable_queue=True,
|
141 |
)
|
|
|
87 |
logger.exception(exc)
|
88 |
aset = [["", "", ""]]
|
89 |
|
90 |
+
len1 = len(paras1)
|
91 |
+
len2 = len(paras2)
|
92 |
+
ic(len1, len2)
|
93 |
+
|
94 |
+
if not (len1 and len2):
|
95 |
+
_ = "At least one text is empty... nothing to do."
|
96 |
+
return pd.DataFrame([[_]]), None, None
|
97 |
+
|
98 |
+
av = ""
|
99 |
+
len12 = len1 + len2
|
100 |
+
if len12:
|
101 |
+
av = f"{t.duration / len12 * 1000:.2f}"
|
102 |
+
logger.info(" %s blocks, took %s, av. %s s/1000 blk", len12, t.duration_human, av)
|
103 |
|
104 |
pairs = aset2pairs(paras1, paras2, aset)
|
105 |
df = pd.DataFrame(pairs, columns=["text1", "text2", "llh"])
|
|
|
114 |
try:
|
115 |
dl_csv = Path("aligned-blocks.csv")
|
116 |
csv_str = df.to_csv(index=False)
|
117 |
+
dl_csv.write_text(csv_str, encoding="gbk")
|
118 |
ic("Saving df.to_csv to dl_csv...")
|
119 |
except Exception as exc:
|
120 |
logger.exception(exc)
|
121 |
|
122 |
+
return df, html, dl_csv
|
123 |
|
124 |
|
125 |
+
iface = gr.Interface(
|
126 |
fn=ml_fn,
|
127 |
inputs=[
|
128 |
"textarea",
|
|
|
145 |
],
|
146 |
)
|
147 |
|
148 |
+
iface.launch(
|
149 |
show_error=True,
|
150 |
enable_queue=True,
|
151 |
)
|
gradio_queue.db
CHANGED
Binary files a/gradio_queue.db and b/gradio_queue.db differ
|
|
install-sw.sh
CHANGED
@@ -1,24 +1,10 @@
|
|
1 |
-
# pip install pipx
|
2 |
-
# pipx install poetry
|
3 |
-
# pipx ensurepath
|
4 |
-
# source ~/.bashrc
|
5 |
-
|
6 |
-
# curl -sSL https://install.python-poetry.org | python3 -
|
7 |
-
# -C- continue -S show error -o output
|
8 |
curl -sSL -C- -o install-poetry.py https://install.python-poetry.org
|
9 |
python install-poetry.py
|
10 |
rm install-poetry.py
|
11 |
echo export PATH=~/.local/bin:$PATH > ~/.bashrc
|
12 |
source ~/.bashrc
|
13 |
-
# ~/.local/bin/poetry install
|
14 |
-
|
15 |
-
wget -c https://deb.nodesource.com/setup_12.x
|
16 |
-
bash setup_12.x
|
17 |
-
apt-get install -y nodejs
|
18 |
-
npm install -g npm@latest
|
19 |
-
npm install -g nodemon
|
20 |
-
rm setup_12.x
|
21 |
|
22 |
-
|
23 |
apt install byobu -y > /dev/null 2>&1
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
curl -sSL -C- -o install-poetry.py https://install.python-poetry.org
|
2 |
python install-poetry.py
|
3 |
rm install-poetry.py
|
4 |
echo export PATH=~/.local/bin:$PATH > ~/.bashrc
|
5 |
source ~/.bashrc
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
+
apt update > /dev/null 2>&1
|
8 |
apt install byobu -y > /dev/null 2>&1
|
9 |
+
byobu-enable
|
10 |
+
byobu
|
install-sw1.sh
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# pip install pipx
|
2 |
+
# pipx install poetry
|
3 |
+
# pipx ensurepath
|
4 |
+
# source ~/.bashrc
|
5 |
+
|
6 |
+
# curl -sSL https://install.python-poetry.org | python3 -
|
7 |
+
# -C- continue -S show error -o output
|
8 |
+
curl -sSL -C- -o install-poetry.py https://install.python-poetry.org
|
9 |
+
python install-poetry.py
|
10 |
+
rm install-poetry.py
|
11 |
+
echo export PATH=~/.local/bin:$PATH > ~/.bashrc
|
12 |
+
source ~/.bashrc
|
13 |
+
# ~/.local/bin/poetry install
|
14 |
+
|
15 |
+
wget -c https://deb.nodesource.com/setup_12.x
|
16 |
+
bash setup_12.x
|
17 |
+
apt-get install -y nodejs
|
18 |
+
npm install -g npm@latest
|
19 |
+
npm install -g nodemon
|
20 |
+
rm setup_12.x
|
21 |
+
|
22 |
+
# apt update # alerady done in apt-get install -y nodejs
|
23 |
+
apt install byobu -y > /dev/null 2>&1
|
24 |
+
byobu-enable
|
25 |
+
byobu
|
poetry.lock
CHANGED
@@ -1517,7 +1517,7 @@ multidict = ">=4.0"
|
|
1517 |
[metadata]
|
1518 |
lock-version = "1.1"
|
1519 |
python-versions = "^3.8.3"
|
1520 |
-
content-hash = "
|
1521 |
|
1522 |
[metadata.files]
|
1523 |
about-time = [
|
|
|
1517 |
[metadata]
|
1518 |
lock-version = "1.1"
|
1519 |
python-versions = "^3.8.3"
|
1520 |
+
content-hash = "4d19a2267a01e6f89ec45ad178fe2bc0e9c4c36c07c1737be1e965480bcee049"
|
1521 |
|
1522 |
[metadata.files]
|
1523 |
about-time = [
|
pyproject.toml
CHANGED
@@ -26,6 +26,7 @@ more-itertools = "^8.13.0"
|
|
26 |
cchardet = "^2.1.7"
|
27 |
typer = "^0.4.1"
|
28 |
seg-text = "^0.1.2"
|
|
|
29 |
|
30 |
[tool.poe.executor]
|
31 |
type = "poetry"
|
|
|
26 |
cchardet = "^2.1.7"
|
27 |
typer = "^0.4.1"
|
28 |
seg-text = "^0.1.2"
|
29 |
+
joblib = "^1.1.0"
|
30 |
|
31 |
[tool.poe.executor]
|
32 |
type = "poetry"
|
radio_mlbee/gen_cmat.py
CHANGED
@@ -1,25 +1,38 @@
|
|
1 |
"""Gen cmat for de/en text."""
|
2 |
# pylint: disable=invalid-name, too-many-branches
|
3 |
-
|
|
|
4 |
from typing import List, Optional
|
5 |
|
6 |
import more_itertools as mit
|
7 |
import numpy as np
|
|
|
8 |
|
9 |
-
# from model_pool import load_model_s
|
10 |
from hf_model_s_cpu import model_s
|
|
|
11 |
from logzero import logger
|
|
|
12 |
from tqdm import tqdm
|
13 |
|
14 |
from radio_mlbee.cos_matrix2 import cos_matrix2
|
15 |
|
|
|
|
|
|
|
|
|
16 |
try:
|
17 |
model = model_s()
|
18 |
except Exception as _:
|
19 |
logger.exception(_)
|
20 |
raise
|
21 |
|
|
|
|
|
|
|
|
|
|
|
22 |
|
|
|
23 |
def gen_cmat(text1: List[str], text2: List[str], bsize: int = 50) -> np.ndarray:
|
24 |
"""Gen corr matrix for texts.
|
25 |
|
|
|
1 |
"""Gen cmat for de/en text."""
|
2 |
# pylint: disable=invalid-name, too-many-branches
|
3 |
+
import os
|
4 |
+
from pathlib import Path
|
5 |
from typing import List, Optional
|
6 |
|
7 |
import more_itertools as mit
|
8 |
import numpy as np
|
9 |
+
import logzero
|
10 |
|
|
|
11 |
from hf_model_s_cpu import model_s
|
12 |
+
from joblib import Memory
|
13 |
from logzero import logger
|
14 |
+
from set_loglevel import set_loglevel
|
15 |
from tqdm import tqdm
|
16 |
|
17 |
from radio_mlbee.cos_matrix2 import cos_matrix2
|
18 |
|
19 |
+
os.environ["LOGLEVEL"] = "10" # turn debug on
|
20 |
+
logzero.loglevel(set_loglevel())
|
21 |
+
logger.debug(" debug is on ")
|
22 |
+
|
23 |
try:
|
24 |
model = model_s()
|
25 |
except Exception as _:
|
26 |
logger.exception(_)
|
27 |
raise
|
28 |
|
29 |
+
cachedir = Path("~").expanduser() / "cachedir"
|
30 |
+
memory = Memory(cachedir, verbose=0)
|
31 |
+
if set_loglevel() <= 10:
|
32 |
+
memory.clear()
|
33 |
+
|
34 |
|
35 |
+
@memory.cache
|
36 |
def gen_cmat(text1: List[str], text2: List[str], bsize: int = 50) -> np.ndarray:
|
37 |
"""Gen corr matrix for texts.
|
38 |
|