freemt commited on
Commit
1319de8
·
1 Parent(s): f11db08

Bump version from 0.1.2-alpha.2 to 0.1.2-alpha.3, styled df, dl links

Browse files
app.py CHANGED
@@ -195,4 +195,5 @@ def main():
195
  state.ns.count += 1
196
  state.ns.updated = False
197
 
 
198
  main()
 
195
  state.ns.count += 1
196
  state.ns.updated = False
197
 
198
+
199
  main()
litbee/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
  """Init."""
2
- __version__ = "0.1.2a1"
3
  from .litbee import litbee
4
 
5
  __all__ = ("litbee",)
 
1
  """Init."""
2
+ __version__ = "0.1.2a3"
3
  from .litbee import litbee
4
 
5
  __all__ = ("litbee",)
litbee/app.py CHANGED
@@ -55,13 +55,12 @@ from pathlib import Path
55
  from types import SimpleNamespace
56
  from typing import Optional
57
 
 
 
 
58
  import loguru
59
  import logzero
60
  import pandas as pd
61
- import ezbee
62
- import dzbee
63
- import debee
64
-
65
  import streamlit as st
66
  from loguru import logger as loggu
67
  from logzero import logger
@@ -69,25 +68,25 @@ from set_loglevel import set_loglevel
69
  from streamlit import session_state as state
70
 
71
  from litbee import __version__
72
- # from litbee.options import options
 
 
 
73
 
74
  # from litbee.files2df import files2df
75
  # from litbee.utils import sb_front_cover, instructions, menu_items
76
  # from litbee.ezbee_page import ezbee_page
77
  # from litbee.dzbee_page import dzbee_page
78
  # from litbee.xbee_page import xbee_page
79
- from litbee.utils import menu_items
 
 
80
 
81
- from litbee.multipage import Multipage
82
 
83
  # from litbee.fetch_upload import fetch_upload
84
  # from litbee.fetch_paste import fetch_paste
85
  # from litbee.fetch_urls import fetch_urls
86
 
87
- from litbee.home import home
88
- from litbee.settings import settings
89
- from litbee.info import info
90
- from litbee.utils import style_css
91
 
92
  # from ezbee import ezbee
93
 
@@ -195,4 +194,5 @@ def main():
195
  state.ns.count += 1
196
  state.ns.updated = False
197
 
 
198
  main()
 
55
  from types import SimpleNamespace
56
  from typing import Optional
57
 
58
+ import debee
59
+ import dzbee
60
+ import ezbee
61
  import loguru
62
  import logzero
63
  import pandas as pd
 
 
 
 
64
  import streamlit as st
65
  from loguru import logger as loggu
66
  from logzero import logger
 
68
  from streamlit import session_state as state
69
 
70
  from litbee import __version__
71
+ from litbee.home import home
72
+ from litbee.info import info
73
+ from litbee.multipage import Multipage
74
+ from litbee.settings import settings
75
 
76
  # from litbee.files2df import files2df
77
  # from litbee.utils import sb_front_cover, instructions, menu_items
78
  # from litbee.ezbee_page import ezbee_page
79
  # from litbee.dzbee_page import dzbee_page
80
  # from litbee.xbee_page import xbee_page
81
+ from litbee.utils import menu_items, style_css
82
+
83
+ # from litbee.options import options
84
 
 
85
 
86
  # from litbee.fetch_upload import fetch_upload
87
  # from litbee.fetch_paste import fetch_paste
88
  # from litbee.fetch_urls import fetch_urls
89
 
 
 
 
 
90
 
91
  # from ezbee import ezbee
92
 
 
194
  state.ns.count += 1
195
  state.ns.updated = False
196
 
197
+
198
  main()
litbee/color_map.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Map cell background color for pandas.DataFrame.
2
+
3
+ palette = sns.blend_palette(
4
+ # ["pink", "palegreen", 'green'], N_COLORS).as_hex()
5
+ # ["pink", "palegreen"], N_COLORS).as_hex()
6
+ ["red", "palegreen"], N_COLORS).as_hex()
7
+ Refer to color_table_applymap.py
8
+
9
+ Taken from vizbee color_map
10
+ """
11
+ # pylint: disable=invalid-name, broad-except
12
+ palette = [
13
+ # "#f00000",
14
+ # "#f02315",
15
+ "#e2482c",
16
+ "#d36b41",
17
+ "#c49057",
18
+ "#b5b36c",
19
+ "#a7d883",
20
+ "#98fb98",
21
+ ]
22
+ ncolors = len(palette)
23
+
24
+
25
+ def color_map(v, min_: float = 0, max_: float = 1):
26
+ """Map cell background color.
27
+
28
+ e.g. s_df = df.style.applymap(color_map, min_=min_, max_=max_, subset=["B"])
29
+ or s_df = df.style.applymap(color_map, subset=['likelihood'])
30
+
31
+ or
32
+ s_df = df.style.applymap(color_map, subset=[2,])
33
+
34
+ or
35
+ s_df = df.style.applymap(color_map, subset=[df.columns[2])
36
+
37
+ or
38
+ s_df = df.style.applymap(color_map, subset=[*df.columns[1:3]] + [*df.columns[0:1]])
39
+ """
40
+ wd = (max_ - min_) / ncolors
41
+ try:
42
+ v = float(v) # !!!
43
+ pal = palette[min(ncolors - 1, int((v - min_) / wd))]
44
+ except Exception: # as e: # wont style str etc.
45
+ # logger.debug("%s", e)
46
+ # return None
47
+ return "wrap_text: true"
48
+
49
+ return f"background-color: {pal}"
litbee/fetch_paste.py CHANGED
@@ -40,5 +40,7 @@ def fetch_paste():
40
 
41
  logger.debug("len(list1): %s, len(list2): %s", len(list1), len(list2))
42
 
43
- state.ns.updated = True
44
  logger.debug("state.ns.updated: %s", state.ns.updated)
 
 
 
 
40
 
41
  logger.debug("len(list1): %s, len(list2): %s", len(list1), len(list2))
42
 
 
43
  logger.debug("state.ns.updated: %s", state.ns.updated)
44
+
45
+ state.ns.src_filename = ""
46
+ state.ns.updated = True
litbee/fetch_upload.py CHANGED
@@ -1,6 +1,6 @@
1
  """Fetch upload and convert to list1/list2."""
2
- from logzero import logger
3
  import streamlit as st
 
4
  from streamlit import session_state as state
5
 
6
 
 
1
  """Fetch upload and convert to list1/list2."""
 
2
  import streamlit as st
3
+ from logzero import logger
4
  from streamlit import session_state as state
5
 
6
 
litbee/fetch_urls.py CHANGED
@@ -4,6 +4,7 @@ import streamlit as st
4
  from icecream import ic
5
  from logzero import logger
6
  from streamlit import session_state as state
 
7
  from litbee.url2txt import url2txt
8
 
9
  ic.configureOutput(
@@ -18,8 +19,12 @@ def fetch_urls():
18
  sourcecount = state.ns.sourcecount
19
  value = ""
20
  if beetype == "ezbee":
21
- url1 = "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/test_en.txt"
22
- url2 = "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/test_zh.txt"
 
 
 
 
23
  value = f"{url1} {url2}"
24
  if beetype == "dzbee":
25
  url1 = "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/sternstunden04-de.txt"
@@ -144,10 +149,7 @@ def fetch_urls():
144
  value=text2,
145
  )
146
 
147
- submitted = st.form_submit_button(
148
- "Submit",
149
- on_click=text2lists
150
- )
151
 
152
  else: # 1-mix
153
  with st.form(key="fetched_1_text_in_form"):
@@ -167,4 +169,5 @@ def fetch_urls():
167
  return
168
  # """
169
 
 
170
  state.ns.updated = True
 
4
  from icecream import ic
5
  from logzero import logger
6
  from streamlit import session_state as state
7
+
8
  from litbee.url2txt import url2txt
9
 
10
  ic.configureOutput(
 
19
  sourcecount = state.ns.sourcecount
20
  value = ""
21
  if beetype == "ezbee":
22
+ url1 = (
23
+ "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/test_en.txt"
24
+ )
25
+ url2 = (
26
+ "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/test_zh.txt"
27
+ )
28
  value = f"{url1} {url2}"
29
  if beetype == "dzbee":
30
  url1 = "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/sternstunden04-de.txt"
 
149
  value=text2,
150
  )
151
 
152
+ submitted = st.form_submit_button("Submit", on_click=text2lists)
 
 
 
153
 
154
  else: # 1-mix
155
  with st.form(key="fetched_1_text_in_form"):
 
169
  return
170
  # """
171
 
172
+ state.ns.src_filename = ""
173
  state.ns.updated = True
litbee/files2df.py CHANGED
@@ -14,7 +14,9 @@ def files2df(file1, file2):
14
  text1 = [_.strip() for _ in process_upload(file1).splitlines() if _.strip()]
15
 
16
  # if file2 is tempfile._TemporaryFileWrapper:
17
- if isinstance(file2, tempfile._TemporaryFileWrapper): # pylint: disable=protected-access
 
 
18
  try:
19
  filename = file2.name
20
  except AttributeError:
 
14
  text1 = [_.strip() for _ in process_upload(file1).splitlines() if _.strip()]
15
 
16
  # if file2 is tempfile._TemporaryFileWrapper:
17
+ if isinstance(
18
+ file2, tempfile._TemporaryFileWrapper
19
+ ): # pylint: disable=protected-access
20
  try:
21
  filename = file2.name
22
  except AttributeError:
litbee/home.py CHANGED
@@ -4,18 +4,20 @@ org ezbee_page.py.
4
  """
5
  # pylint: disable=invalid-name
6
  # pylint: disable=too-many-locals, too-many-return-statements, too-many-branches, too-many-statements
 
7
  import inspect
 
8
 
9
  # pylint: disable=invalid-name
10
  from functools import partial
11
  from itertools import zip_longest
12
- from about_time import about_time
13
 
14
  import hanzidentifier
15
  import logzero
16
  import numpy as np
17
  import pandas as pd
18
  import streamlit as st
 
19
 
20
  # from ezbee.gen_pairs import gen_pairs # aset2pairs?
21
  from aset2pairs import aset2pairs
@@ -32,8 +34,9 @@ from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
32
  # from st_aggrid.grid_options_builder import GridOptionsBuilder
33
  from streamlit import session_state as state
34
 
35
- from litbee.fetch_upload import fetch_upload
36
  from litbee.fetch_paste import fetch_paste
 
37
  from litbee.fetch_urls import fetch_urls
38
  from litbee.t2s import t2s
39
 
@@ -156,7 +159,9 @@ def home(): # noqa
156
  # min_samples=min_samples,
157
  )
158
  except Exception as e:
159
- logger.exception("aset = globals()[state.ns.beetype](...) exc: %s", e)
 
 
160
  aset = ""
161
  st.write("Collecting inputs...")
162
  logger.debug("Collecting inputs...")
@@ -197,12 +202,12 @@ def home(): # noqa
197
  aligned_pairs, columns=["text1", "text2", "llh"], dtype="object"
198
  )
199
 
200
- # if set_loglevel() <= 10:
201
- _ = st.expander("done aligned")
202
- with _:
203
- st.table(df_a.astype(str))
204
- # st.markdown(df_a.astype(str).to_markdown())
205
- # st.markdown(df_a.astype(str).to_numpy().tolist())
206
 
207
  # insert seq no
208
  df_a.insert(0, "sn", range(len(df_a)))
@@ -234,6 +239,53 @@ def home(): # noqa
234
  update_mode=GridUpdateMode.MODEL_CHANGED,
235
  )
236
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  # reset
238
  state.ns.updated = False
239
 
 
4
  """
5
  # pylint: disable=invalid-name
6
  # pylint: disable=too-many-locals, too-many-return-statements, too-many-branches, too-many-statements
7
+ import base64
8
  import inspect
9
+ import io
10
 
11
  # pylint: disable=invalid-name
12
  from functools import partial
13
  from itertools import zip_longest
 
14
 
15
  import hanzidentifier
16
  import logzero
17
  import numpy as np
18
  import pandas as pd
19
  import streamlit as st
20
+ from about_time import about_time
21
 
22
  # from ezbee.gen_pairs import gen_pairs # aset2pairs?
23
  from aset2pairs import aset2pairs
 
34
  # from st_aggrid.grid_options_builder import GridOptionsBuilder
35
  from streamlit import session_state as state
36
 
37
+ from litbee.color_map import color_map
38
  from litbee.fetch_paste import fetch_paste
39
+ from litbee.fetch_upload import fetch_upload
40
  from litbee.fetch_urls import fetch_urls
41
  from litbee.t2s import t2s
42
 
 
159
  # min_samples=min_samples,
160
  )
161
  except Exception as e:
162
+ logger.exception(
163
+ "aset = globals()[state.ns.beetype](...) exc: %s", e
164
+ )
165
  aset = ""
166
  st.write("Collecting inputs...")
167
  logger.debug("Collecting inputs...")
 
202
  aligned_pairs, columns=["text1", "text2", "llh"], dtype="object"
203
  )
204
 
205
+ if set_loglevel() <= 10:
206
+ _ = st.expander("done aligned")
207
+ with _:
208
+ st.table(df_a.astype(str))
209
+ # st.markdown(df_a.astype(str).to_markdown())
210
+ # st.markdown(df_a.astype(str).to_numpy().tolist())
211
 
212
  # insert seq no
213
  df_a.insert(0, "sn", range(len(df_a)))
 
239
  update_mode=GridUpdateMode.MODEL_CHANGED,
240
  )
241
 
242
+ # ### prep download
243
+
244
+ # taken from vizbee cb_save_xlsx
245
+ # subset = list(df_a.columns[2:3]) # 3rd col
246
+ subset = list(df_a.columns[2:]) # 3rd col
247
+
248
+ # pop("sn"): remove sn column
249
+ df_a.pop("sn")
250
+ s_df = df_a.astype(str).style.applymap(color_map, subset=subset)
251
+
252
+ if set_loglevel() <= 10:
253
+ logger.debug(" showing styled aligned")
254
+ with st.expander("styled aligned"):
255
+ # st.dataframe(s_df) # can't handle styleddf
256
+ st.table(s_df)
257
+
258
+ output = io.BytesIO()
259
+ with pd.ExcelWriter(
260
+ output, engine="xlsxwriter"
261
+ ) as writer: # pylint: disable=abstract-class-instantiated
262
+ s_df.to_excel(writer, index=False, header=False, sheet_name="Sheet1")
263
+ writer.sheets["Sheet1"].set_column("A:A", 70)
264
+ writer.sheets["Sheet1"].set_column("B:B", 70)
265
+ output.seek(0)
266
+
267
+ val = output.getvalue()
268
+ b64 = base64.b64encode(val)
269
+ filename = ""
270
+ if state.ns.src_filename:
271
+ filename = f"{state.ns.src_filename}-"
272
+
273
+ dl_xlsx = f'<a href="data:application/octet-stream;base64,{b64.decode()}" download="{filename}aligned_paras.xlsx">Download aligned paras xlsx</a>'
274
+
275
+ output = io.BytesIO()
276
+ df_a.astype(str).to_csv(output, sep="\t", index=False, header=False, encoding="gbk")
277
+ output.seek(0)
278
+
279
+ val = output.getvalue()
280
+ b64 = base64.b64encode(val)
281
+ dl_tsv = f'<a href="data:application/octet-stream;base64,{b64.decode()}" download="{filename}aligned_paras.tsv">Download aligned paras tsv</a>'
282
+
283
+ col1_dl, col2_dl = st.columns(2)
284
+ with col1_dl:
285
+ st.markdown(dl_xlsx, unsafe_allow_html=True)
286
+ with col2_dl:
287
+ st.markdown(dl_tsv, unsafe_allow_html=True)
288
+
289
  # reset
290
  state.ns.updated = False
291
 
litbee/pad.txt DELETED
@@ -1,21 +0,0 @@
1
-
2
- if text1:
3
- try:
4
- list1 = [elm.strip() for elm in text1.splitlines() if elm.strip()]
5
- state.ns.list1 = list1[:]
6
- except Exception as e:
7
- logger.warning("text1 to list1 errors: %s", e)
8
- st.text_area(
9
- label="Your pasted",
10
- value=text1,
11
- key="pasted_text1",
12
- # help=""
13
- height=500,
14
- )
15
-
16
- if text2:
17
- try:
18
- list1 = [elm.strip() for elm in text1.splitlines() if elm.strip()]
19
- state.ns.list1 = list1[:]
20
- except Exception as e:
21
- logger.warning("text1 to list1 errors: %s", e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
litbee/settings.py CHANGED
@@ -68,7 +68,7 @@ def settings():
68
  sourcecount_list,
69
  index=index,
70
  format_func=lambda x: f"{x:<3} |",
71
- help="2: two separate sources (files/paste/urls), each containing one language; 1: one mixed source (file/pate/url) containing both languages",
72
  disabled=True,
73
  )
74
  state.ns.sourcecount = sourcecount
 
68
  sourcecount_list,
69
  index=index,
70
  format_func=lambda x: f"{x:<3} |",
71
+ help="2: two separate sources (files/pastes/urls), each containing one language; 1: one mixed source (file/paste/url) containing both languages",
72
  disabled=True,
73
  )
74
  state.ns.sourcecount = sourcecount
litbee/t2s.py CHANGED
@@ -1,11 +1,12 @@
1
  """Convert list to simlified Chinese for traditional Chinese, do nothing otherwise."""
2
  # pylint: disable=invalid-name
3
  from typing import List
 
4
  import hanzidentifier
5
  from logzero import logger
6
  from opencc import OpenCC
7
 
8
- convert = OpenCC('t2s').convert
9
 
10
 
11
  def t2s(lst: List[str]) -> List[str]:
@@ -21,7 +22,9 @@ def t2s(lst: List[str]) -> List[str]:
21
  # lst[:1000] strim down for extremely large docs
22
  _ = hanzidentifier.identify(" ".join(lst[:1000]))
23
  except Exception as e:
24
- logger.warning("hanzidentifier.is_traditional error: %s, settin to simplified.", e)
 
 
25
  _ = hanzidentifier.SIMP # 2: simplified
26
 
27
  if _ not in [hanzidentifier.TRAD, hanzidentifier.MIXED]:
 
1
  """Convert list to simlified Chinese for traditional Chinese, do nothing otherwise."""
2
  # pylint: disable=invalid-name
3
  from typing import List
4
+
5
  import hanzidentifier
6
  from logzero import logger
7
  from opencc import OpenCC
8
 
9
+ convert = OpenCC("t2s").convert
10
 
11
 
12
  def t2s(lst: List[str]) -> List[str]:
 
22
  # lst[:1000] strim down for extremely large docs
23
  _ = hanzidentifier.identify(" ".join(lst[:1000]))
24
  except Exception as e:
25
+ logger.warning(
26
+ "hanzidentifier.is_traditional error: %s, settin to simplified.", e
27
+ )
28
  _ = hanzidentifier.SIMP # 2: simplified
29
 
30
  if _ not in [hanzidentifier.TRAD, hanzidentifier.MIXED]:
litbee/utils.py CHANGED
@@ -1,7 +1,10 @@
1
  """Prep front cover for sidebar (based on st-bumblebee-st_app.py)."""
 
 
2
  from textwrap import dedent
3
 
4
  import logzero
 
5
  import streamlit as st
6
  from logzero import logger
7
  from set_loglevel import set_loglevel
@@ -267,3 +270,44 @@ hr {
267
  }
268
  }
269
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """Prep front cover for sidebar (based on st-bumblebee-st_app.py)."""
2
+ import base64
3
+ from io import BytesIO
4
  from textwrap import dedent
5
 
6
  import logzero
7
+ import pandas as pd
8
  import streamlit as st
9
  from logzero import logger
10
  from set_loglevel import set_loglevel
 
270
  }
271
  }
272
  """
273
+
274
+
275
+ def to_excel(df):
276
+ """Convert df to excel.
277
+
278
+ ref. st-bumblebee st_app.py
279
+ """
280
+ output = BytesIO()
281
+ writer = pd.ExcelWriter(output, engine="xlsxwriter")
282
+ df.to_excel(writer, sheet_name="Sheet1")
283
+ writer.save()
284
+ processed_data = output.getvalue()
285
+ return processed_data
286
+
287
+
288
+ def get_table_download_link(df):
289
+ """Generates a link allowing the data in a given panda dataframe to be downloaded.
290
+
291
+ Args:
292
+ df: pandas.dataframe
293
+
294
+ Returns:
295
+ href string
296
+ """
297
+ val = to_excel(df)
298
+ b64 = base64.b64encode(val) # val looks like b'...'
299
+ return f'<a href="data:application/octet-stream;base64,{b64.decode()}" download="aligned_paras.xlsx">Download aligned paras xlsx file</a>' # decode b'abc' => abc
300
+
301
+
302
+ def get_table_download_link_sents(df):
303
+ """Generates a link allowing the data in a given panda dataframe to be downloaded for sents aligned.
304
+
305
+ Args:
306
+ df: pandas.dataframe
307
+
308
+ Returns:
309
+ href string
310
+ """
311
+ val = to_excel(df)
312
+ b64 = base64.b64encode(val) # val looks like b'...'
313
+ return f'<a href="data:application/octet-stream;base64,{b64.decode()}" download="aligned_sents.xlsx">Download aligned sents xlsx file</a>' # decode b'abc' => abc
pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
  [tool.poetry]
2
  name = "litbee"
3
- version = "0.1.2-alpha.1"
4
  description = "align (en, de, zh) texts via streamlit "
5
  authors = ["ffreemt"]
6
  license = "MIT"
@@ -48,7 +48,10 @@ isort = "isort tests litbee"
48
  black = "black tests litbee"
49
  format = ["isort", "black"]
50
  docstyle = "pydocstyle --convention=google tests litbee"
 
 
51
  git = {cmd = "git ac Update && git push && git push github"}
 
52
  tunnel = {cmd = "ssh -CN ip_or_hostname_defined_in_hosts -L 9091:127.0.0.1:9091"}
53
 
54
  [tool.poetry.dev-dependencies]
 
1
  [tool.poetry]
2
  name = "litbee"
3
+ version = "0.1.2-alpha.3"
4
  description = "align (en, de, zh) texts via streamlit "
5
  authors = ["ffreemt"]
6
  license = "MIT"
 
48
  black = "black tests litbee"
49
  format = ["isort", "black"]
50
  docstyle = "pydocstyle --convention=google tests litbee"
51
+ copyapp = "copy appy.py litbee/app.py"
52
+ prerelease = {cmd = "poetry version prerelease && sync-version"}
53
  git = {cmd = "git ac Update && git push && git push github"}
54
+ deploy = ["copyapp", "export", "git"]
55
  tunnel = {cmd = "ssh -CN ip_or_hostname_defined_in_hosts -L 9091:127.0.0.1:9091"}
56
 
57
  [tool.poetry.dev-dependencies]
pyrightconfig.json CHANGED
@@ -4,6 +4,7 @@
4
  "reportTypeshedErrors": false,
5
  "reportMissingImports": true,
6
  "reportMissingTypeStubs": false,
7
- "pythonVersion": "3.7",
 
8
  "ignore": []
9
  }
 
4
  "reportTypeshedErrors": false,
5
  "reportMissingImports": true,
6
  "reportMissingTypeStubs": false,
7
+ "reportOptionalMemberAccess": true,
8
+ "pythonVersion": "3.8",
9
  "ignore": []
10
  }