freemt commited on
Commit
a137bf6
·
1 Parent(s): 99dd882

Update (dev) fixed download sort of

Browse files
Files changed (8) hide show
  1. aligned-blocks.csv +12 -0
  2. app.py +17 -7
  3. gradio_queue.db +0 -0
  4. install-sw.sh +3 -17
  5. install-sw1.sh +25 -0
  6. poetry.lock +1 -1
  7. pyproject.toml +1 -0
  8. radio_mlbee/gen_cmat.py +15 -2
aligned-blocks.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ text1,text2,llh
2
+ Wuthering Heights,��Хɽׯ,0.45
3
+ --------------------------------------------------------------------------------,--------------------------------------------------------------------------------,1.0
4
+ Chapter 2,�ڶ���,0.91
5
+ Chinese,Ӣ��,0.52
6
+ Yesterday afternoon set in misty and cold.,��������������������,0.74
7
+ "I had half a mind to spend it by my study fire, instead of wading through heath and mud to Wuthering Heights.",��������鷿¯����ĥһ���磬��������Ӳ����ൽ��Хɽׯ�ˡ�,0.45
8
+ "On coming up from dinner, however (N.B. I dine between twelve and one o'clock; the housekeeper, a matronly lady, taken as a fixture along with the house, could not, or would not, comprehend my request that I might be served at five), on mounting the stairs with this lazy intention, and stepping into the room, I saw a servant girl on her knees surrounded by brushes and coal-scuttles, and raising an infernal dust as she extinguished the flames with heaps of cinders.",���ǣ��Թ��緹��ע�⡪������ʮ������һ����֮����緹�������Ե����������ӵĸ�����Ĺܼ��ţ�һλ�����̫̫ȴ���ܣ����߲���Ը����������������ӿ��������⣩�����һ������������뷨����¥���������ӵ�ʱ�򣬿���һ��Ů�͹��ڵ��ϣ�������ɨ���ú����,0.72
9
+ ,��������һ�Ѷ�ú����𣬸���һƬ�����Ļҳ���,
10
+ ,�⾰�����̰��Ҹϻ�ͷ�ˡ�,
11
+ "This spectacle drove me back immediately; I took my hat, and, after a four-miles' walk, arrived at Heathcliff's garden gate just in time to escape the first feathery flakes of a snow shower.",������ñ�ӣ���������·��������ϣ�̿�����Ļ�԰�ڿڣ��պö����һ����������Ķ�ë��ѩ��,0.68
12
+ "On that bleak hill top the earth was hard with a black frost, and the air made me shiver through every limb.",���ǻ�����ɽ���ϣ�������,0.43
app.py CHANGED
@@ -87,9 +87,19 @@ def ml_fn(
87
  logger.exception(exc)
88
  aset = [["", "", ""]]
89
 
90
- _ = len(paras1) + len(paras2)
91
- av = f"{t.duration / _ * 1000:.2f}"
92
- logger.info(" %s blocks, took %s, av. %s s/1000 blk", _, t.duration_human, av)
 
 
 
 
 
 
 
 
 
 
93
 
94
  pairs = aset2pairs(paras1, paras2, aset)
95
  df = pd.DataFrame(pairs, columns=["text1", "text2", "llh"])
@@ -104,15 +114,15 @@ def ml_fn(
104
  try:
105
  dl_csv = Path("aligned-blocks.csv")
106
  csv_str = df.to_csv(index=False)
107
- # dl_csv.write_text(csv_str, encoding="utf8")
108
  ic("Saving df.to_csv to dl_csv...")
109
  except Exception as exc:
110
  logger.exception(exc)
111
 
112
- return df, html, csv_str
113
 
114
 
115
- mlbee = gr.Interface(
116
  fn=ml_fn,
117
  inputs=[
118
  "textarea",
@@ -135,7 +145,7 @@ mlbee = gr.Interface(
135
  ],
136
  )
137
 
138
- mlbee.launch(
139
  show_error=True,
140
  enable_queue=True,
141
  )
 
87
  logger.exception(exc)
88
  aset = [["", "", ""]]
89
 
90
+ len1 = len(paras1)
91
+ len2 = len(paras2)
92
+ ic(len1, len2)
93
+
94
+ if not (len1 and len2):
95
+ _ = "At least one text is empty... nothing to do."
96
+ return pd.DataFrame([[_]]), None, None
97
+
98
+ av = ""
99
+ len12 = len1 + len2
100
+ if len12:
101
+ av = f"{t.duration / len12 * 1000:.2f}"
102
+ logger.info(" %s blocks, took %s, av. %s s/1000 blk", len12, t.duration_human, av)
103
 
104
  pairs = aset2pairs(paras1, paras2, aset)
105
  df = pd.DataFrame(pairs, columns=["text1", "text2", "llh"])
 
114
  try:
115
  dl_csv = Path("aligned-blocks.csv")
116
  csv_str = df.to_csv(index=False)
117
+ dl_csv.write_text(csv_str, encoding="gbk")
118
  ic("Saving df.to_csv to dl_csv...")
119
  except Exception as exc:
120
  logger.exception(exc)
121
 
122
+ return df, html, dl_csv
123
 
124
 
125
+ iface = gr.Interface(
126
  fn=ml_fn,
127
  inputs=[
128
  "textarea",
 
145
  ],
146
  )
147
 
148
+ iface.launch(
149
  show_error=True,
150
  enable_queue=True,
151
  )
gradio_queue.db CHANGED
Binary files a/gradio_queue.db and b/gradio_queue.db differ
 
install-sw.sh CHANGED
@@ -1,24 +1,10 @@
1
- # pip install pipx
2
- # pipx install poetry
3
- # pipx ensurepath
4
- # source ~/.bashrc
5
-
6
- # curl -sSL https://install.python-poetry.org | python3 -
7
- # -C- continue -S show error -o output
8
  curl -sSL -C- -o install-poetry.py https://install.python-poetry.org
9
  python install-poetry.py
10
  rm install-poetry.py
11
  echo export PATH=~/.local/bin:$PATH > ~/.bashrc
12
  source ~/.bashrc
13
- # ~/.local/bin/poetry install
14
-
15
- wget -c https://deb.nodesource.com/setup_12.x
16
- bash setup_12.x
17
- apt-get install -y nodejs
18
- npm install -g npm@latest
19
- npm install -g nodemon
20
- rm setup_12.x
21
 
22
- # apt upate # alerady done in apt-get install -y nodejs
23
  apt install byobu -y > /dev/null 2>&1
24
-
 
 
 
 
 
 
 
 
 
1
  curl -sSL -C- -o install-poetry.py https://install.python-poetry.org
2
  python install-poetry.py
3
  rm install-poetry.py
4
  echo export PATH=~/.local/bin:$PATH > ~/.bashrc
5
  source ~/.bashrc
 
 
 
 
 
 
 
 
6
 
7
+ apt update > /dev/null 2>&1
8
  apt install byobu -y > /dev/null 2>&1
9
+ byobu-enable
10
+ byobu
install-sw1.sh ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pip install pipx
2
+ # pipx install poetry
3
+ # pipx ensurepath
4
+ # source ~/.bashrc
5
+
6
+ # curl -sSL https://install.python-poetry.org | python3 -
7
+ # -C- continue -S show error -o output
8
+ curl -sSL -C- -o install-poetry.py https://install.python-poetry.org
9
+ python install-poetry.py
10
+ rm install-poetry.py
11
+ echo export PATH=~/.local/bin:$PATH > ~/.bashrc
12
+ source ~/.bashrc
13
+ # ~/.local/bin/poetry install
14
+
15
+ wget -c https://deb.nodesource.com/setup_12.x
16
+ bash setup_12.x
17
+ apt-get install -y nodejs
18
+ npm install -g npm@latest
19
+ npm install -g nodemon
20
+ rm setup_12.x
21
+
22
+ # apt update # alerady done in apt-get install -y nodejs
23
+ apt install byobu -y > /dev/null 2>&1
24
+ byobu-enable
25
+ byobu
poetry.lock CHANGED
@@ -1517,7 +1517,7 @@ multidict = ">=4.0"
1517
  [metadata]
1518
  lock-version = "1.1"
1519
  python-versions = "^3.8.3"
1520
- content-hash = "8a3e87bc76faae5ea5b48c1e0cdb5bf98a1b671e2d90baa30696f228d8a6113b"
1521
 
1522
  [metadata.files]
1523
  about-time = [
 
1517
  [metadata]
1518
  lock-version = "1.1"
1519
  python-versions = "^3.8.3"
1520
+ content-hash = "4d19a2267a01e6f89ec45ad178fe2bc0e9c4c36c07c1737be1e965480bcee049"
1521
 
1522
  [metadata.files]
1523
  about-time = [
pyproject.toml CHANGED
@@ -26,6 +26,7 @@ more-itertools = "^8.13.0"
26
  cchardet = "^2.1.7"
27
  typer = "^0.4.1"
28
  seg-text = "^0.1.2"
 
29
 
30
  [tool.poe.executor]
31
  type = "poetry"
 
26
  cchardet = "^2.1.7"
27
  typer = "^0.4.1"
28
  seg-text = "^0.1.2"
29
+ joblib = "^1.1.0"
30
 
31
  [tool.poe.executor]
32
  type = "poetry"
radio_mlbee/gen_cmat.py CHANGED
@@ -1,25 +1,38 @@
1
  """Gen cmat for de/en text."""
2
  # pylint: disable=invalid-name, too-many-branches
3
-
 
4
  from typing import List, Optional
5
 
6
  import more_itertools as mit
7
  import numpy as np
 
8
 
9
- # from model_pool import load_model_s
10
  from hf_model_s_cpu import model_s
 
11
  from logzero import logger
 
12
  from tqdm import tqdm
13
 
14
  from radio_mlbee.cos_matrix2 import cos_matrix2
15
 
 
 
 
 
16
  try:
17
  model = model_s()
18
  except Exception as _:
19
  logger.exception(_)
20
  raise
21
 
 
 
 
 
 
22
 
 
23
  def gen_cmat(text1: List[str], text2: List[str], bsize: int = 50) -> np.ndarray:
24
  """Gen corr matrix for texts.
25
 
 
1
  """Gen cmat for de/en text."""
2
  # pylint: disable=invalid-name, too-many-branches
3
+ import os
4
+ from pathlib import Path
5
  from typing import List, Optional
6
 
7
  import more_itertools as mit
8
  import numpy as np
9
+ import logzero
10
 
 
11
  from hf_model_s_cpu import model_s
12
+ from joblib import Memory
13
  from logzero import logger
14
+ from set_loglevel import set_loglevel
15
  from tqdm import tqdm
16
 
17
  from radio_mlbee.cos_matrix2 import cos_matrix2
18
 
19
+ os.environ["LOGLEVEL"] = "10" # turn debug on
20
+ logzero.loglevel(set_loglevel())
21
+ logger.debug(" debug is on ")
22
+
23
  try:
24
  model = model_s()
25
  except Exception as _:
26
  logger.exception(_)
27
  raise
28
 
29
+ cachedir = Path("~").expanduser() / "cachedir"
30
+ memory = Memory(cachedir, verbose=0)
31
+ if set_loglevel() <= 10:
32
+ memory.clear()
33
+
34
 
35
+ @memory.cache
36
  def gen_cmat(text1: List[str], text2: List[str], bsize: int = 50) -> np.ndarray:
37
  """Gen corr matrix for texts.
38