freemt commited on
Commit
d18f436
·
1 Parent(s): aa4d65d

Bump version from 0.1.2-alpha.0 to 0.1.2-alpha.1

Browse files
app.py CHANGED
@@ -107,7 +107,7 @@ loggu.remove()
107
  _ = (
108
  "<green>{time:YY-MM-DD HH:mm:ss}</green> | "
109
  "<level>{level: <5}</level> | <level>{message}</level> "
110
- "<cyan>{name}</cyan>:<cyan>{line}</cyan>"
111
  )
112
  loggu.add(
113
  sys.stderr,
@@ -131,11 +131,15 @@ st.set_page_config( # type: ignore
131
  pd.set_option("display.precision", 2)
132
  pd.options.display.float_format = "{:,.2f}".format
133
 
 
 
 
 
134
  _ = dict(
135
  beetype="ezbee",
136
- sourcetype="upload",
137
  sourcecount=2,
138
- sent_ali=None,
139
  src_filename="",
140
  tgt_filename="",
141
  src_fileio=b"",
@@ -148,6 +152,7 @@ _ = dict(
148
  df_a=None,
149
  df_s_a=None,
150
  count=1,
 
151
  )
152
  if "ns" not in state:
153
  state.ns = SimpleNamespace(**_)
@@ -186,7 +191,8 @@ def main():
186
  if set_loglevel() <= 10:
187
  st.markdown(state.ns.count)
188
  loggu.debug(f" run: {state.ns.count}")
 
189
  state.ns.count += 1
190
-
191
 
192
  main()
 
107
  _ = (
108
  "<green>{time:YY-MM-DD HH:mm:ss}</green> | "
109
  "<level>{level: <5}</level> | <level>{message}</level> "
110
+ "<cyan>{module}.{name}</cyan>:<cyan>{line}</cyan>"
111
  )
112
  loggu.add(
113
  sys.stderr,
 
131
  pd.set_option("display.precision", 2)
132
  pd.options.display.float_format = "{:,.2f}".format
133
 
134
+ sourcetype = "upload"
135
+ if set_loglevel() <= 10:
136
+ sourcetype = "urls"
137
+
138
  _ = dict(
139
  beetype="ezbee",
140
+ sourcetype=sourcetype,
141
  sourcecount=2,
142
+ sentali=None,
143
  src_filename="",
144
  tgt_filename="",
145
  src_fileio=b"",
 
152
  df_a=None,
153
  df_s_a=None,
154
  count=1,
155
+ updated=False,
156
  )
157
  if "ns" not in state:
158
  state.ns = SimpleNamespace(**_)
 
191
  if set_loglevel() <= 10:
192
  st.markdown(state.ns.count)
193
  loggu.debug(f" run: {state.ns.count}")
194
+ logger.debug(f" run: {state.ns.count}")
195
  state.ns.count += 1
196
+ state.ns.updated = False
197
 
198
  main()
data/test_zh_t.txt CHANGED
@@ -1,74 +1,36 @@
1
  呼嘯山莊
2
-
3
- -------------------------------------------------- ------------------------------
4
-
5
  第二章
6
-
7
- 中文
8
-
9
-
10
- 我想就在書房邊呼磨山霧了,想不想踩著雜草到。
11
-
12
- 注意 注意 注意 吃 吃 飯 午飯 吃 並且 當作 所 房子 午 飯 的 附屬物 管家 , 一 位 的 這 位 的 這 位 的 這 位 的 這 位 並 開 意 的 太太 能 或 願 意 意 我 在 五 點 鐘 時 用 意 意 , 在 懷 懷 的 聯 系上一樓,邁一個進了屋子地上,有掃帚正在她她正在堆堆的煤渣搞一團散佈的立刻回頭了。場場初降鵝毛鵝毛大雪。
13
-
14
- 在荒涼的涼涼裡,而而在山頂的樹叢中,踩著陸地的。到我的手指骨都痛了,狗狂吠起來。
15
-
16
- “倒霉的人至少就沒有禮了,我就直接為我家就這樣了。”我還在搖家裡孤零零地打電話。它。苦臉的約瑟夫從穀倉的一個圓窗裡探出頭來。
17
-
18
- “你幹嗎?”他大叫。 “主人在牛欄裡,你找他說話,就從這條路口繞過去。”
19
-
20
- “屋裡開門嗎?”我也叫起來。
21
-
22
- “除了老婆沒有別人。你就是騰到夜裡,她也不會開。”
23
-
24
- “為什麼?你就不能告訴她我是誰,呃,約瑟夫?”
25
-
26
  “別找我!我才不管這些閒事呢,”這個腦袋咕嚕著,又不見了。
27
-
28
- 雪開始大 握住 門柄 門柄 又 沒有 一個 外衣 外衣 烤 , , 一草 耙 , 在 院子 裡 他 招呼 我 著 走 走 一 穿過 一個 一個 和 一 鋪 一地 的 、 有 抽水機抽水機 抽水機 鴿籠 鴿籠 到 到 我 使 接待 過 的 那 個 的 、 屋子老婆以前從來沒有她的料。這麼一她我的等候,以為我會叫坐下。望望我,往的椅子一靠,不動,也不出。
29
-
30
- “天氣真!壞我我是老婆的老婆,說不定你的僕人大吃大喝,大勁才聽到,聽見了!我的”
31
-
32
- 眼睛一瞪——也不開開。 總是一冷冷瞪她的神眸冷眼瞪住,使,而且不愉快
33
-
34
- “坐下吧,”那粗聲粗氣說,“他來了。”
35
-
36
- 我咳了一聲,叫那惡狗朱諾。臨到第二次會面,它總算賞臉,搖起尾巴人埆礆。服從表柆示。衘礆。
37
-
38
- “好漂亮的狗我又開始說話了。”你是不是反對那些呢!
39
-
40
- “這一些我的女刺客克厲克民的回答,比希希親可愛的回答的主人公更冷冷的說那些人。”
41
-
42
- “,,我所愛的就是在一堆人身上!”
43
-
44
- “誰會愛這些東西那才怪呢!”她輕蔑地說。
45
-
46
- 倒霉,原來又是一堆死兔子。我輕咳一聲,向火爐湊近,又把今晚天氣不好的話評論一通。
47
-
48
- “你本來就不會。。她說,站起來去拿”該台上的兩個茶葉罐。
49
-
50
- 我沒有她的小官現在還沒有全身出現纖細的透明狀態,她的臉還沒有明顯的淡麗,而且她的身體還沒有明顯的淡麗。捲髮還不如說說,垂垂垂垂的她脖子上說說,輕而易舉地在與眼神能拒一些和說,要使人抗拒。一,而那張眼神 眼神 眼神 不是自然的。
51
-
52
- 她夠不到。 動了 一動 ,她 猛 猛 猛 猛 地 向 像 像 守財奴 守財奴 他 他 的 金子 金子
53
-
54
- “我不要你幫忙,”她怒氣沖沖地說,“我自己得到了。”
55
-
56
- “對不起!”我連忙回答。
57
-
58
- “是請你來喫茶的嗎?”她問,把一條圍裙系在那乾淨的���衣服上,就這樣站著,拿一盒茶葉〣邌候茶葉〣邌候茶葉〣邌
59
-
60
- “我很想喝杯茶。”我回答。
61
-
62
- “是請你來的嗎?”她又問。
63
-
64
- “沒有,”我說,勉強笑一笑。 “您請我喝茶。”
65
-
66
-
67
-
68
-
69
  目錄
70
- 上一章
71
- 下一章
72
-
73
-
74
  返回首頁
 
1
  呼嘯山莊
2
+ --------------------------------------------------------------------------------
 
 
3
  第二章
4
+ 英文
5
+ 昨天下午又冷又有霧。 我想就在書房爐邊消磨一下午,不想踩著雜草污泥到呼嘯山莊了。
6
+ 但是,吃過午飯(注意——我在十二點與一點鐘之間吃午飯,而可以當作這所房子的附屬物的管家婆,一比特慈祥的家後卻不能,或者並不願理解我請求在五點鐘開飯的用意),在我懷着這個懶惰的想法上了樓,邁進屋子的時候,看見一個女僕跪在地上,身邊是掃帚和煤鬥。 她正在用一堆堆煤渣封火,搞起一片彌漫的灰塵。 這景象立刻把我趕回頭了。 我拿了帽子,走了四裡路,到達了希刺克厲夫的花園口口,剛好躲過了一場今年初降的鹅毛大雪。
7
+ 在那荒凉的山頂上,土地由於結了一層黑冰而凍得堅硬,冷空氣使我四肢發抖。 我弄不開門鏈,就跳進去,順著兩邊種著蔓延的醋栗樹叢的石路跑去。 我白白地敲了半天門,一直敲到我的手指骨都痛了,狗也狂吠起來。
8
+ “倒楣的人家!” 我心裡直叫,“只為你這樣無禮待客,就該一輩子跟人群隔離。我至少還不會在白天把門閂住。我才不管呢——我要進去!” 如此决定了。 我就抓住門閂,使勁搖它。 苦臉的約瑟夫從穀倉的一個圓窗裏探出頭來。
9
+ “你幹嗎?” 他大叫。 “主人在牛欄裏,你要是找他說話,就從這條路口繞過去。”
10
+ “屋裡沒人開門嗎?” 我也叫起來。
11
+ “除了家後沒有別人。你就是鬧騰到夜裡,她也不會開。”
12
+ “為什麼?你就不能告訴她我是誰嗎,呃,約瑟夫?”
 
 
 
 
 
 
 
 
 
 
 
13
  “別找我!我才不管這些閒事呢,”這個腦袋咕嚕著,又不見了。
14
+ 雪開始下大了。 我握住門柄又試一回。 這時一個沒穿外衣的年輕人,扛著一根草耙,在後面院子裏出現了。 他招呼我跟著他走,穿過了一個洗衣房和一片鋪平的地,那兒有煤棚、抽水機和鴿籠,我們終於到了我上次被接待過的那間溫暖的、熱鬧的大屋子。 煤、炭和木材混合在一起燃起的熊熊爐火,使這屋子放著光彩。 在準備擺上豐盛晚餐的桌旁,我很高興地看到了那位“家後”,以前我從未料想到會有這麼一個人存在的。 我鞠躬等候,以為她會叫我坐下。 她望望我,往她的椅背一靠,不動,也不出聲。
15
+ “天氣真壞!” 我說,“希刺克厲夫家後,恐怕大門因為您的僕人偷懶而大吃苦頭,我費了好大勁才使他們聽見我敲門!”
16
+ 她死不開口。 我瞪眼——她也瞪眼。 反正她總是以一種冷冷的、漠不關心的神氣盯住我,使人十分窘,而且不愉快。
17
+ “坐下吧,”那年輕人粗聲粗氣地說,“他就要來了。”
18
+ 我服從了; 輕輕咳了一下,叫喚那惡狗朱諾。 臨到第二次會面,它總算賞臉,搖起尾巴尖,表示認我是熟人了。
19
+ “好漂亮的狗!” 我又開始說話。 “您是不是打算不要這些小的呢,夫人?”
20
+ “那些不是我的,”這可愛可親的女主人說,比希刺克厲夫本人所能回答的腔調還要更冷淡些。
21
+ “啊,您所心愛的是在這一堆裏啦!” 我轉身指著一個看不清楚的靠墊上那一堆像猫似的東西,接著說下去。
22
+ “誰會愛這些東西那才怪呢!” 她輕蔑地說。
23
+ 倒楣,原來那是堆死兔子。 我又輕咳一聲,向火爐凑近些,又把今晚天氣不好的話評論一通。
24
+ “你本來就不該出來。” 她說,站起來去拿壁爐臺上的兩個彩色茶葉罐。
25
+ 她原先坐在光線被遮住的地方,現在我把她的全身和面貌都看得清清楚楚。 她苗條,顯然還沒有過青春期。 挺好看的體態,還有一張我生平從未有幸見過的絕妙的小臉蛋。 五官纖麗,非常漂亮。 淡黃色的卷髮,或者不如說是金黃色的,松松地垂在她那細嫩的頸上。 至於眼睛,要是眼神能顯得和悅些,就要使人無法抗拒了。 對我這容易動情的心說來倒是常事,因為它們所表現的只是在輕蔑與近似絕望之間的一種情緒,而在那張臉上看見那樣的眼神是特別不自然的。
26
+ 她簡直够不到茶葉罐。 我動了一動,想幫她一下。 她猛地扭轉身向我,像守財奴看見別人打算幫他數他的金子一樣。
27
+ “我不要你幫忙,”她怒氣衝衝地說,“我自己拿得到。”
28
+ “對不起!” 我連忙回答。
29
+ “是請你來吃茶的嗎?” 她問,把一條圍裙系在她那乾淨的黑衣服上,就這樣站著,拿一匙茶葉正要往茶壺裏倒。
30
+ “我很想喝杯茶。” 我回答。
31
+ “是請你來的嗎?” 她又問。
32
+ “沒有,”我說,勉强笑一笑。 “您正好請我喝茶。”
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  目錄
34
+ 上一章
35
+ 下一章
 
 
36
  返回首頁
litbee/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
  """Init."""
2
- __version__ = "0.1.2a0"
3
  from .litbee import litbee
4
 
5
  __all__ = ("litbee",)
 
1
  """Init."""
2
+ __version__ = "0.1.2a1"
3
  from .litbee import litbee
4
 
5
  __all__ = ("litbee",)
litbee/app.py CHANGED
@@ -107,7 +107,7 @@ loggu.remove()
107
  _ = (
108
  "<green>{time:YY-MM-DD HH:mm:ss}</green> | "
109
  "<level>{level: <5}</level> | <level>{message}</level> "
110
- "<cyan>{name}</cyan>:<cyan>{line}</cyan>"
111
  )
112
  loggu.add(
113
  sys.stderr,
@@ -131,11 +131,15 @@ st.set_page_config( # type: ignore
131
  pd.set_option("display.precision", 2)
132
  pd.options.display.float_format = "{:,.2f}".format
133
 
 
 
 
 
134
  _ = dict(
135
  beetype="ezbee",
136
- sourcetype="upload",
137
  sourcecount=2,
138
- sent_ali=None,
139
  src_filename="",
140
  tgt_filename="",
141
  src_fileio=b"",
@@ -148,6 +152,7 @@ _ = dict(
148
  df_a=None,
149
  df_s_a=None,
150
  count=1,
 
151
  )
152
  if "ns" not in state:
153
  state.ns = SimpleNamespace(**_)
@@ -186,7 +191,8 @@ def main():
186
  if set_loglevel() <= 10:
187
  st.markdown(state.ns.count)
188
  loggu.debug(f" run: {state.ns.count}")
 
189
  state.ns.count += 1
190
-
191
 
192
  main()
 
107
  _ = (
108
  "<green>{time:YY-MM-DD HH:mm:ss}</green> | "
109
  "<level>{level: <5}</level> | <level>{message}</level> "
110
+ "<cyan>{module}.{name}</cyan>:<cyan>{line}</cyan>"
111
  )
112
  loggu.add(
113
  sys.stderr,
 
131
  pd.set_option("display.precision", 2)
132
  pd.options.display.float_format = "{:,.2f}".format
133
 
134
+ sourcetype = "upload"
135
+ if set_loglevel() <= 10:
136
+ sourcetype = "urls"
137
+
138
  _ = dict(
139
  beetype="ezbee",
140
+ sourcetype=sourcetype,
141
  sourcecount=2,
142
+ sentali=None,
143
  src_filename="",
144
  tgt_filename="",
145
  src_fileio=b"",
 
152
  df_a=None,
153
  df_s_a=None,
154
  count=1,
155
+ updated=False,
156
  )
157
  if "ns" not in state:
158
  state.ns = SimpleNamespace(**_)
 
191
  if set_loglevel() <= 10:
192
  st.markdown(state.ns.count)
193
  loggu.debug(f" run: {state.ns.count}")
194
+ logger.debug(f" run: {state.ns.count}")
195
  state.ns.count += 1
196
+ state.ns.updated = False
197
 
198
  main()
litbee/fetch_paste.py CHANGED
@@ -1,9 +1,44 @@
1
- """Display from paste."""
2
- import pandas as pd
3
  import streamlit as st
4
  from logzero import logger
 
5
 
6
 
7
  def fetch_paste():
8
- """Display from paste."""
9
- st.write("Coming soon")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Fetch pasted text and convert to state.ns.list1/list2."""
2
+ # pylint: disable=invalid-name
3
  import streamlit as st
4
  from logzero import logger
5
+ from streamlit import session_state as state
6
 
7
 
8
  def fetch_paste():
9
+ """Fetch from clipboard."""
10
+ # st.write("Coming soon")
11
+ with st.form(key="paste_in_form"):
12
+ _ = st.expander(f"{state.ns.beetype}: Paste text", expanded=True)
13
+ with _:
14
+ col1, col2 = st.columns(2)
15
+ with col1:
16
+ text1 = st.text_area(
17
+ label="Paste your stuff here",
18
+ key="paste_text1",
19
+ # help=""
20
+ height=500,
21
+ )
22
+
23
+ with col2:
24
+ text2 = st.text_area(
25
+ label="Paste your stuff here",
26
+ # help=""
27
+ key="paste_text2",
28
+ height=500,
29
+ )
30
+
31
+ submitted = st.form_submit_button("Submit")
32
+
33
+ logger.debug("text1[:10]: %s, text2[:10]: %s", text1[:10], text2[:10])
34
+
35
+ list1 = [_.strip() for _ in text1.splitlines() if _.strip()]
36
+ list2 = [_.strip() for _ in text2.splitlines() if _.strip()]
37
+
38
+ state.ns.list1 = list1[:]
39
+ state.ns.list2 = list2[:]
40
+
41
+ logger.debug("len(list1): %s, len(list2): %s", len(list1), len(list2))
42
+
43
+ state.ns.updated = True
44
+ logger.debug("state.ns.updated: %s", state.ns.updated)
litbee/fetch_upload.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Fetch upload and convert to list1/list2."""
2
+ from logzero import logger
3
+ import streamlit as st
4
+ from streamlit import session_state as state
5
+
6
+
7
+ def fetch_upload():
8
+ """Fetch upload and convert to list1/list2."""
9
+ # src_fileio tgt_fileio
10
+ with st.form(key="upload_in_form"):
11
+ _ = st.expander(f"{state.ns.beetype}: Pick two files", expanded=True)
12
+ with _:
13
+ col1, col2 = st.columns(2)
14
+ with col1:
15
+ src_fileio = st.file_uploader(
16
+ "Choose source file (utf8 txt)",
17
+ type=[
18
+ "txt",
19
+ ],
20
+ key="src_text",
21
+ # accept_multiple_files=True,
22
+ # accept_multiple_files=False,
23
+ )
24
+
25
+ with col2:
26
+ tgt_fileio = st.file_uploader(
27
+ "Choose target file (utf8 txt)",
28
+ type=[
29
+ "txt",
30
+ ],
31
+ key="tgt_text",
32
+ # accept_multiple_files=True,
33
+ )
34
+ submitted = st.form_submit_button("Submit")
35
+
36
+ # logger.debug(" len(src_fileio): %s", len(src_fileio))
37
+ # logger.debug(" len(tgt_fileio): %s", len(tgt_fileio))
38
+
39
+ filename1 = ""
40
+ if src_fileio:
41
+ logger.debug(" type(src_fileio): %s", type(src_fileio))
42
+
43
+ # for st.file_uploade accept_multiple_files=True
44
+ if isinstance(src_fileio, list):
45
+ logger.debug(" len(src_fileio): %s", len(src_fileio))
46
+ filenames = []
47
+ try:
48
+ filenames = [elm.name for elm in src_fileio] # type: ignore
49
+ except Exception as exc:
50
+ logger.error(exc)
51
+ logger.debug("src_fileio names: *%s*", filenames)
52
+
53
+ # state.ns.src_fileio = src_fileio
54
+ state.ns.src_file = src_fileio[-1].getvalue().decode()
55
+ state.ns.src_filename = src_fileio[-1].name
56
+ else:
57
+ logger.debug("src_fileio.name: [%s]", src_fileio.name)
58
+ filenames = [src_fileio.name]
59
+ logger.debug("src_fileio names: %s", filenames)
60
+
61
+ # state.ns.src_fileio = src_fileio
62
+ state.ns.src_file = src_fileio.getvalue().decode()
63
+ state.ns.src_filename = src_fileio.name
64
+ filename1 = state.ns.src_filename
65
+
66
+ filename2 = ""
67
+ if tgt_fileio:
68
+ if isinstance(tgt_fileio, list):
69
+ logger.warning("not set to handle multiple files")
70
+ logger.warning("set accept_multiple_files=False in the meantime")
71
+ else:
72
+ state.ns.tgt_file = tgt_fileio.getvalue().decode()
73
+ state.ns.tgt_filename = tgt_fileio.name
74
+ filename2 = tgt_fileio.name
75
+
76
+ # proceed when Submit is clicked
77
+ msg1 = ""
78
+ if filename1:
79
+ msg1 += f" file1 {filename1}"
80
+ msg2 = ""
81
+ if filename2:
82
+ msg2 += f" file2 {filename2}"
83
+ glue = ""
84
+ if filename1 and filename2:
85
+ glue = ", "
86
+
87
+ upload_placeholder = st.empty()
88
+ prefix = f" Upload submitted: {msg1}{glue}{msg2}"
89
+ upload_placeholder.write(prefix)
90
+
91
+ # st.write(f" Submitted upload: {msg1}{glue}{msg2}")
92
+ if not submitted:
93
+ return None
94
+
95
+ if not (filename1 or filename2):
96
+ # st.write("| no file uploaded")
97
+ upload_placeholder.write(f"{prefix} no file uploaded")
98
+ return None
99
+
100
+ if not filename1:
101
+ # st.write("| file1 not ready")
102
+ upload_placeholder.write(f"{prefix}, file1 not ready")
103
+ return None
104
+
105
+ if not filename2:
106
+ # st.write("| file2 not ready")
107
+ upload_placeholder.write(f"{prefix}, file2 not ready")
108
+ return None
109
+
110
+ try:
111
+ _ = state.ns.src_file.splitlines()
112
+ list1 = [elm.strip() for elm in _ if elm.strip()]
113
+ _ = state.ns.tgt_file.splitlines()
114
+ list2 = [elm.strip() for elm in _ if elm.strip()]
115
+ except Exception as exc:
116
+ logger.error(exc)
117
+ list1 = [""]
118
+ list2 = [""]
119
+
120
+ logger.debug("len(list1): %s, len(list2): %s", len(list1), len(list2))
121
+
122
+ state.ns.list1 = list1[:]
123
+ state.ns.list2 = list2[:]
124
+
125
+ state.ns.updated = True
126
+ logger.debug("state.ns.updated: %s", state.ns.updated)
127
+
128
+ return None
litbee/fetch_urls.py CHANGED
@@ -1,9 +1,170 @@
1
- """Fetch from urls."""
2
- import pandas as pd
3
  import streamlit as st
 
4
  from logzero import logger
 
 
 
 
 
 
 
5
 
6
 
7
  def fetch_urls():
8
- """Display from urls."""
9
- st.write("Coming soon")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Fetch text from urls and convert to state.ns.list1/list2."""
2
+ # pylint: disable=invalid-name
3
  import streamlit as st
4
+ from icecream import ic
5
  from logzero import logger
6
+ from streamlit import session_state as state
7
+ from litbee.url2txt import url2txt
8
+
9
+ ic.configureOutput(
10
+ includeContext=True,
11
+ outputFunction=logger.debug, # outputFunction=logger.info,
12
+ )
13
 
14
 
15
  def fetch_urls():
16
+ """Fetch text from urls and convert to state.ns.list1/list2."""
17
+ beetype = state.ns.beetype
18
+ sourcecount = state.ns.sourcecount
19
+ value = ""
20
+ if beetype == "ezbee":
21
+ url1 = "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/test_en.txt"
22
+ url2 = "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/test_zh.txt"
23
+ value = f"{url1} {url2}"
24
+ if beetype == "dzbee":
25
+ url1 = "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/sternstunden04-de.txt"
26
+ url2 = "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/sternstunden04-zh.txt"
27
+ value = f"{url1} {url2}"
28
+ if beetype == "debee":
29
+ url1 = "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/sternstunden04-de.txt"
30
+ url2 = "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/sternstunden04-en.txt"
31
+ value = f"{url1} {url2}"
32
+
33
+ dict_ = dict(text1="", text2="")
34
+
35
+ def fetch_cb():
36
+ """Fetch text (dict_["text1"|"text2"]) from urls."""
37
+ ic("fetch_cb")
38
+ urls = [elm.strip() for elm in text_inp.split(" ") if elm.strip()]
39
+
40
+ # supply http:// if not startswith http
41
+ urls = [elm if elm.startswith("http") else "http://" + elm for elm in urls]
42
+
43
+ _ = "\n\t"
44
+ # st.markdown(f" urls submitted: \n{_.join(urls)}")
45
+ ic(f" urls submitted: \n{_.join(urls)}")
46
+
47
+ # st.write(" TODO: fetch text from urls.")
48
+
49
+ if state.ns.sourcecount == 2: # 2-sep
50
+ for idx, url in enumerate(urls[:2]):
51
+ try:
52
+ _ = url2txt(url)
53
+ except Exception as e:
54
+ logger.error(e)
55
+ _ = str(e)
56
+ dict_[f"text{idx + 1}"] = _
57
+ ic(f"{idx + 1}: [{url}] {dict_['text' + str(idx + 1)][:100]}")
58
+
59
+ ic(dict_["text1"][:10])
60
+ ic(dict_["text2"][:10])
61
+ else: # 1-mix
62
+ text1 = ""
63
+ for url in urls:
64
+ try:
65
+ _ = url2txt(url)
66
+ except Exception as e:
67
+ logger.error(e)
68
+ _ = str(e)
69
+ text1 += _
70
+ ic(text1[:10])
71
+ dict_["text1"] = text1[:]
72
+
73
+ _ = [elm.strip() for elm in dict_["text1"].splitlines() if elm.strip()]
74
+ state.ns.list1 = _
75
+ _ = [elm.strip() for elm in dict_["text2"].splitlines() if elm.strip()]
76
+ state.ns.list2 = _
77
+
78
+ list1 = state.ns.list1
79
+ list2 = state.ns.list2
80
+ ic(len(list1), len(list2))
81
+
82
+ state.fetched_text1 = dict_["text1"]
83
+ state.fetched_text2 = dict_["text2"]
84
+
85
+ # streamlit complains if an initial value of
86
+ # a widget with this key is set
87
+ # state.text_area_urls = text_inp
88
+
89
+ # with st.form(key="urls_in_form"):
90
+ # _ = st.expander(f"{beetype}: Paste urls below and press Ctl+Enter or Space Ctl+Enter to testdrive", expanded=True)
91
+ # with _:
92
+ label = f"{beetype}: Paste urls below and press Ctl+Enter or Space Ctl+Enter to testdrive"
93
+ text_inp = st.text_area(
94
+ label=label,
95
+ value=value,
96
+ key="text_area_urls",
97
+ height=25,
98
+ help=" URLs separated by at least a space or a newline(贴网址,空格分开或另起一行, Ctrl+回车提交)",
99
+ on_change=fetch_cb,
100
+ # args=(text_inp,),
101
+ )
102
+
103
+ # st.button("Fetch", on_click=fetch_cb, args=(text_inp,))
104
+
105
+ def text2lists():
106
+ """Convert text(s) to list(s)."""
107
+ if text1:
108
+ try:
109
+ list1 = [elm.strip() for elm in text1.splitlines() if elm.strip()]
110
+ state.ns.list1 = list1[:]
111
+ except Exception as e:
112
+ logger.warning("text1 to list1 errors: %s", e)
113
+
114
+ if text2:
115
+ try:
116
+ list2 = [elm.strip() for elm in text2.splitlines() if elm.strip()]
117
+ state.ns.list2 = list2[:]
118
+ except Exception as e:
119
+ logger.warning("text2 to list2 errors: %s", e)
120
+
121
+ # show fetch text(s)
122
+ text1 = dict_["text1"]
123
+ text2 = dict_["text2"]
124
+ if state.ns.sourcecount == 2: # 2-sep
125
+ with st.form(key="fetched_2texts_in_form"):
126
+ _ = st.expander(f"{state.ns.beetype}: fetched text", expanded=True)
127
+ with _:
128
+ col1, col2 = st.columns(2)
129
+ with col1:
130
+ text1 = st.text_area(
131
+ label="Edit when necessary, click Submit when ready",
132
+ key="fetched_text1",
133
+ # help=""
134
+ height=500,
135
+ value=text1,
136
+ )
137
+
138
+ with col2:
139
+ text2 = st.text_area(
140
+ label="Edit when necessary, click Submit when ready",
141
+ # help=""
142
+ key="fetched_text2",
143
+ height=500,
144
+ value=text2,
145
+ )
146
+
147
+ submitted = st.form_submit_button(
148
+ "Submit",
149
+ on_click=text2lists
150
+ )
151
+
152
+ else: # 1-mix
153
+ with st.form(key="fetched_1_text_in_form"):
154
+ _ = st.expander(f"{state.ns.beetype}: fetched mixed text", expanded=True)
155
+ with _:
156
+ text1 = st.text_area(
157
+ label="Edit when necessary, click Submit when ready",
158
+ key="fetched_mixed_text1",
159
+ height=500,
160
+ value=text1,
161
+ )
162
+ submitted = st.form_submit_button("Submit", on_click=text2lists)
163
+
164
+ # _ = """
165
+ if not submitted:
166
+ ic("Submit not yet clicked")
167
+ return
168
+ # """
169
+
170
+ state.ns.updated = True
litbee/home.py CHANGED
@@ -32,14 +32,26 @@ from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
32
  # from st_aggrid.grid_options_builder import GridOptionsBuilder
33
  from streamlit import session_state as state
34
 
 
 
 
35
  from litbee.t2s import t2s
36
 
37
 
38
  def home(): # noqa
39
- """Fetch content from upload."""
40
- # st.write(state.ns.beetype)
41
 
42
- if state.ns.sourcetype not in ["upload"]:
 
 
 
 
 
 
 
 
 
 
43
  st.write("Coming soooooooon...")
44
  return None
45
 
@@ -47,122 +59,37 @@ def home(): # noqa
47
  st.write("Coming soon...")
48
  return None
49
 
50
- # src_fileio tgt_fileio
51
- with st.form(key="upload_in_form"):
52
- _ = st.expander(f"{state.ns.beetype}: Pick two files", expanded=True)
53
- with _:
54
- col1, col2 = st.columns(2)
55
- with col1:
56
- src_fileio = st.file_uploader(
57
- "Choose source file (utf8 txt)",
58
- type=[
59
- "txt",
60
- ],
61
- key="src_text",
62
- # accept_multiple_files=True,
63
- # accept_multiple_files=False,
64
- )
65
-
66
- with col2:
67
- tgt_fileio = st.file_uploader(
68
- "Choose target file (utf8 txt)",
69
- type=[
70
- "txt",
71
- ],
72
- key="tgt_text",
73
- # accept_multiple_files=True,
74
- )
75
- submitted = st.form_submit_button("Submit")
76
-
77
- # logger.debug(" len(src_fileio): %s", len(src_fileio))
78
- # logger.debug(" len(tgt_fileio): %s", len(tgt_fileio))
79
-
80
- filename1 = ""
81
- if src_fileio:
82
- logger.debug(" type(src_fileio): %s", type(src_fileio))
83
-
84
- # for st.file_uploade accept_multiple_files=True
85
- if isinstance(src_fileio, list):
86
- logger.debug(" len(src_fileio): %s", len(src_fileio))
87
- filenames = []
88
- try:
89
- filenames = [elm.name for elm in src_fileio] # type: ignore
90
- except Exception as exc:
91
- logger.error(exc)
92
- logger.debug("src_fileio names: *%s*", filenames)
93
-
94
- # state.ns.src_fileio = src_fileio
95
- state.ns.src_file = src_fileio[-1].getvalue().decode()
96
- state.ns.src_filename = src_fileio[-1].name
97
- else:
98
- logger.debug("src_fileio.name: [%s]", src_fileio.name)
99
- filenames = [src_fileio.name]
100
- logger.debug("src_fileio names: %s", filenames)
101
-
102
- # state.ns.src_fileio = src_fileio
103
- state.ns.src_file = src_fileio.getvalue().decode()
104
- state.ns.src_filename = src_fileio.name
105
- filename1 = state.ns.src_filename
106
-
107
- filename2 = ""
108
- if tgt_fileio:
109
- if isinstance(tgt_fileio, list):
110
- logger.warning("not set to handle multiple files")
111
- logger.warning("set accept_multiple_files=False in the meantime")
112
- else:
113
- state.ns.tgt_file = tgt_fileio.getvalue().decode()
114
- state.ns.tgt_filename = tgt_fileio.name
115
- filename2 = tgt_fileio.name
116
-
117
- # proceed when Submit is clicked
118
- msg1 = ""
119
- if filename1:
120
- msg1 += f" file1 {filename1}"
121
- msg2 = ""
122
- if filename2:
123
- msg2 += f" file2 {filename2}"
124
- glue = ""
125
- if filename1 and filename2:
126
- glue = ", "
127
-
128
- upload_placeholder = st.empty()
129
- prefix = f" Submitted upload: {msg1}{glue}{msg2}"
130
- upload_placeholder.write(prefix)
131
-
132
- # st.write(f" Submitted upload: {msg1}{glue}{msg2}")
133
- if not submitted:
134
  return None
135
 
136
- if not (filename1 or filename2):
137
- # st.write("| no file uploaded")
138
- upload_placeholder.write(f"{prefix} no file uploaded")
139
- return None
140
 
141
- if not filename1:
142
- # st.write("| file1 not ready")
143
- upload_placeholder.write(f"{prefix}, file1 not ready")
 
144
  return None
145
 
146
- if not filename2:
147
- # st.write("| file2 not ready")
148
- upload_placeholder.write(f"{prefix}, file2 not ready")
149
- return None
150
 
151
- try:
152
- _ = state.ns.src_file.splitlines()
153
- list1 = [elm.strip() for elm in _ if elm.strip()]
154
- _ = state.ns.tgt_file.splitlines()
155
- list2 = [elm.strip() for elm in _ if elm.strip()]
156
- except Exception as exc:
157
- logger.error(exc)
158
- list1 = [""]
159
- list2 = [""]
160
- state.ns.list1 = list1[:]
161
- state.ns.list2 = list2[:]
162
 
163
  df = pd.DataFrame(zip_longest(list1, list2, fillvalue=""))
164
  try:
165
- df.columns = ["text1", "text2"]
 
166
  except Exception as exc:
167
  logger.debug("df: \n%s", df)
168
  logger.error("%s", exc)
@@ -171,6 +98,7 @@ def home(): # noqa
171
  logger.debug("df: %s", df)
172
 
173
  # st.table(df) # looks alright
 
174
 
175
  # stlyed pd dataframe?
176
  # bigger, no pagination
@@ -186,28 +114,11 @@ def home(): # noqa
186
  logger.debug(" df.empty, return")
187
  return None
188
 
189
- # df = pd.DataFrame([["", "", ""]], columns=["text1", "text2", "llh"])
190
-
191
- df_exp = st.expander("to be aligned", expanded=False)
192
- with df_exp:
193
- st.write(df) # too small
194
-
195
- _ = """
196
- ag_exp = st.expander("done aligned") # , expanded=False
197
- with ag_exp:
198
- agdf = AgGrid(
199
- df,
200
- # fit_columns_on_grid_load=True,
201
- editable=True,
202
- gridOptions=gridOptions,
203
- key="ag_exp",
204
- )
205
- # """
206
-
207
- list1 = [elm.strip() for elm in df.text1 if elm.strip()]
208
- list2 = [elm.strip() for elm in df.text2 if elm.strip()]
209
- logger.debug("list1[:3]: %s", list1[:3])
210
- logger.debug("list2[:3]: %s", list2[:3])
211
 
212
  logger.info("Processing data... %s", state.ns.beetype)
213
  # if state.ns.beetype in ["ezbee", "dzbee"]:
@@ -245,12 +156,12 @@ def home(): # noqa
245
  # min_samples=min_samples,
246
  )
247
  except Exception as e:
248
- # logger.error("aset = ezbee(...) exc: %s", e)
249
  logger.exception("aset = globals()[state.ns.beetype](...) exc: %s", e)
250
  aset = ""
251
- # st.write(e)
252
  st.write("Collecting inputs...")
 
253
  return None
 
254
  st.success(f"Done, took {t.duration_human}")
255
 
256
  else:
@@ -279,8 +190,8 @@ def home(): # noqa
279
  # aligned_pairs = gen_pairs(list1, list2, aset)
280
  aligned_pairs = aset2pairs(list1, list2, aset)
281
  if aligned_pairs:
282
- logger.debug("%s...%s", aligned_pairs[:1], aligned_pairs[-1:])
283
- # logger.debug("aligned_pairs[:20]: \n%s", aligned_pairs[:20])
284
 
285
  df_a = pd.DataFrame(
286
  aligned_pairs, columns=["text1", "text2", "llh"], dtype="object"
@@ -323,4 +234,7 @@ def home(): # noqa
323
  update_mode=GridUpdateMode.MODEL_CHANGED,
324
  )
325
 
 
 
 
326
  return None
 
32
  # from st_aggrid.grid_options_builder import GridOptionsBuilder
33
  from streamlit import session_state as state
34
 
35
+ from litbee.fetch_upload import fetch_upload
36
+ from litbee.fetch_paste import fetch_paste
37
+ from litbee.fetch_urls import fetch_urls
38
  from litbee.t2s import t2s
39
 
40
 
41
  def home(): # noqa
42
+ """Run tasks.
 
43
 
44
+ beetype
45
+
46
+ sourcetype
47
+ fetch_upload/fetch_paste, fetch_url
48
+ sourcecount
49
+
50
+ align: para-align/sent-align
51
+
52
+ save xlsx/tsv
53
+ """
54
+ if state.ns.sourcetype not in ["upload", "paste", "urls"]:
55
  st.write("Coming soooooooon...")
56
  return None
57
 
 
59
  st.write("Coming soon...")
60
  return None
61
 
62
+ # process sourcetype and fetch list1/list2
63
+ list1 = list2 = []
64
+ # fetch_upload/fetch_paste
65
+ if state.ns.sourcetype in ["upload"]:
66
+ fetch_upload()
67
+ elif state.ns.sourcetype in ["paste"]:
68
+ fetch_paste()
69
+ elif state.ns.sourcetype in ["urls"]:
70
+ fetch_urls()
71
+ else:
72
+ st.warning(f"{state.ns.sourcetype}: Not implemented")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  return None
74
 
75
+ logger.debug("state.ns.updated: %s", state.ns.updated)
 
 
 
76
 
77
+ # if not updated, quit: this does not quite work
78
+ # only prevents the first run/missing upload
79
+ if not state.ns.updated:
80
+ logger.debug(" not updated, early exit.")
81
  return None
82
 
83
+ list1 = state.ns.list1[:]
84
+ list2 = state.ns.list2[:]
 
 
85
 
86
+ logger.debug("list1[:3]: %s", list1[:3])
87
+ logger.debug("list2[:3]: %s", list2[:3])
 
 
 
 
 
 
 
 
 
88
 
89
  df = pd.DataFrame(zip_longest(list1, list2, fillvalue=""))
90
  try:
91
+ # df.columns = ["text1", "text2"]
92
+ df.columns = [f"text{i + 1}" for i in range(len(df.columns))]
93
  except Exception as exc:
94
  logger.debug("df: \n%s", df)
95
  logger.error("%s", exc)
 
98
  logger.debug("df: %s", df)
99
 
100
  # st.table(df) # looks alright
101
+ # equiv to st.markdown(df.to_markdown())?
102
 
103
  # stlyed pd dataframe?
104
  # bigger, no pagination
 
114
  logger.debug(" df.empty, return")
115
  return None
116
 
117
+ # only show this for upload
118
+ if state.ns.sourcetype in ["upload"]:
119
+ _ = st.expander("to be aligned", expanded=False)
120
+ with _:
121
+ st.write(df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
  logger.info("Processing data... %s", state.ns.beetype)
124
  # if state.ns.beetype in ["ezbee", "dzbee"]:
 
156
  # min_samples=min_samples,
157
  )
158
  except Exception as e:
 
159
  logger.exception("aset = globals()[state.ns.beetype](...) exc: %s", e)
160
  aset = ""
 
161
  st.write("Collecting inputs...")
162
+ logger.debug("Collecting inputs...")
163
  return None
164
+
165
  st.success(f"Done, took {t.duration_human}")
166
 
167
  else:
 
190
  # aligned_pairs = gen_pairs(list1, list2, aset)
191
  aligned_pairs = aset2pairs(list1, list2, aset)
192
  if aligned_pairs:
193
+ # logger.debug("%s...%s", aligned_pairs[:1], aligned_pairs[-1:])
194
+ logger.debug("%s...s", aligned_pairs[:1])
195
 
196
  df_a = pd.DataFrame(
197
  aligned_pairs, columns=["text1", "text2", "llh"], dtype="object"
 
234
  update_mode=GridUpdateMode.MODEL_CHANGED,
235
  )
236
 
237
+ # reset
238
+ state.ns.updated = False
239
+
240
  return None
litbee/pad.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ if text1:
3
+ try:
4
+ list1 = [elm.strip() for elm in text1.splitlines() if elm.strip()]
5
+ state.ns.list1 = list1[:]
6
+ except Exception as e:
7
+ logger.warning("text1 to list1 errors: %s", e)
8
+ st.text_area(
9
+ label="Your pasted",
10
+ value=text1,
11
+ key="pasted_text1",
12
+ # help=""
13
+ height=500,
14
+ )
15
+
16
+ if text2:
17
+ try:
18
+ list1 = [elm.strip() for elm in text1.splitlines() if elm.strip()]
19
+ state.ns.list1 = list1[:]
20
+ except Exception as e:
21
+ logger.warning("text1 to list1 errors: %s", e)
litbee/settings.py CHANGED
@@ -9,7 +9,7 @@ from streamlit import session_state as state
9
 
10
 
11
  def settings():
12
- """Prep Settings/Options page.
13
 
14
  Refer to options.py
15
  """
@@ -53,7 +53,7 @@ def settings():
53
  index=index,
54
  format_func=lambda x: f"{x:<8} |",
55
  help="upload: one or two files; paste: from clipboard; urls: from the net",
56
- disabled=True,
57
  )
58
  state.ns.sourcetype = sourcetype
59
 
 
9
 
10
 
11
  def settings():
12
+ """Prep Settings/Options page.
13
 
14
  Refer to options.py
15
  """
 
53
  index=index,
54
  format_func=lambda x: f"{x:<8} |",
55
  help="upload: one or two files; paste: from clipboard; urls: from the net",
56
+ # disabled=True,
57
  )
58
  state.ns.sourcetype = sourcetype
59
 
litbee/url2txt.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Fetch text from url."""
2
+ from typing import Optional
3
+ from urllib.parse import urlparse
4
+
5
+ import html2text
6
+ import httpx
7
+ import streamlit as st
8
+ from logzero import logger
9
+ from readability import Document
10
+
11
+
12
+ @st.cache
13
+ def url2txt(
14
+ url: str,
15
+ bodywidth: Optional[int] = 5000,
16
+ remove: bool = False,
17
+ show_url: bool = True,
18
+ ignore_links: bool = True,
19
+ ) -> str:
20
+ """Fetch text from url.
21
+
22
+ Args:
23
+ url: netloc from which to fetch text
24
+ bodywidth: if set to None, fall back to default bodywidth of
25
+ html2text.HTML2Text
26
+ remove: remove blank lines if set to True
27
+ show_url: prepend url if set to True
28
+ ignore_links: remove [ur](url)
29
+
30
+ Return:
31
+ main body in text
32
+
33
+ bodywidth: Optional[int] = 5000
34
+ remove: bool = False
35
+ show_url: bool = True
36
+ ignore_links: bool = True
37
+ """
38
+ url = url.strip()
39
+ if not url.startswith("http"):
40
+ url = "http://" + url
41
+
42
+ logger.info("url: %s", url)
43
+
44
+ parsed = urlparse(url)
45
+ if not parsed.scheme or not parsed.netloc: # no scheme or netloc present
46
+ raise Exception(f"Invalid url: {url}")
47
+
48
+ try:
49
+ resp = httpx.get(url, timeout=30)
50
+ resp.raise_for_status()
51
+ except Exception as exc:
52
+ logger.error(exc)
53
+ raise
54
+
55
+ try:
56
+ content_type = resp.headers["content-type"]
57
+ except Exception as e:
58
+ logger.error(e)
59
+ content_type = ""
60
+ # output text if text/plain
61
+ if "text/plain" in content_type:
62
+ return resp.text
63
+
64
+ # handle html and the rest
65
+ try:
66
+ doc = Document(resp.text)
67
+ except Exception as exc:
68
+ logger.error(exc)
69
+ raise
70
+
71
+ if not doc.summary().strip():
72
+ raise Exception("No content for some reason...")
73
+
74
+ if bodywidth is not None:
75
+ handle = html2text.HTML2Text(bodywidth=bodywidth)
76
+ else:
77
+ handle = html2text.HTML2Text()
78
+
79
+ handle.ignore_links = ignore_links
80
+
81
+ try:
82
+ res = handle.handle(doc.summary())
83
+ except Exception as exc:
84
+ logger.error(exc)
85
+ raise
86
+
87
+ # remove double blank lines
88
+ if remove:
89
+ res = "\n".join(elm for elm in res.splitlines() if elm.strip())
90
+
91
+ if not res.strip(): # warn if empty output
92
+ logger.warning("Output seems to be empty...")
93
+
94
+ if show_url:
95
+ return f"{url}\n# {doc.title()}\n{res}"
96
+
97
+ return f"# {doc.title()}\n{res}"
poetry.lock CHANGED
@@ -37,6 +37,23 @@ toolz = "*"
37
  [package.extras]
38
  dev = ["black", "docutils", "ipython", "flake8", "pytest", "sphinx", "mistune (<2.0.0)", "m2r", "vega-datasets", "recommonmark"]
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  [[package]]
41
  name = "appnope"
42
  version = "0.1.3"
@@ -216,6 +233,14 @@ python-versions = "*"
216
  [package.dependencies]
217
  pycparser = "*"
218
 
 
 
 
 
 
 
 
 
219
  [[package]]
220
  name = "charset-normalizer"
221
  version = "2.0.12"
@@ -273,6 +298,14 @@ python-versions = "*"
273
  [package.extras]
274
  test = ["flake8 (==3.7.8)", "hypothesis (==3.55.3)"]
275
 
 
 
 
 
 
 
 
 
276
  [[package]]
277
  name = "de2en"
278
  version = "0.1.1"
@@ -519,6 +552,14 @@ python-versions = "*"
519
  [package.extras]
520
  test = ["pytest", "sphinx", "sphinx-autobuild", "wheel", "twine"]
521
 
 
 
 
 
 
 
 
 
522
  [[package]]
523
  name = "hanzidentifier"
524
  version = "1.0.2"
@@ -530,6 +571,52 @@ python-versions = "*"
530
  [package.dependencies]
531
  zhon = ">=1.1.3"
532
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
533
  [[package]]
534
  name = "icecream"
535
  version = "2.1.2"
@@ -842,6 +929,20 @@ python-versions = "*"
842
  [package.dependencies]
843
  colorama = {version = "*", markers = "sys_platform == \"win32\""}
844
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
845
  [[package]]
846
  name = "markupsafe"
847
  version = "2.1.1"
@@ -1463,6 +1564,22 @@ python-versions = ">=3.6"
1463
  cffi = {version = "*", markers = "implementation_name == \"pypy\""}
1464
  py = {version = "*", markers = "implementation_name == \"pypy\""}
1465
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1466
  [[package]]
1467
  name = "regex"
1468
  version = "2022.4.24"
@@ -1489,6 +1606,20 @@ urllib3 = ">=1.21.1,<1.27"
1489
  socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"]
1490
  use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"]
1491
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1492
  [[package]]
1493
  name = "rich"
1494
  version = "12.4.4"
@@ -1612,6 +1743,14 @@ category = "main"
1612
  optional = false
1613
  python-versions = ">=3.6"
1614
 
 
 
 
 
 
 
 
 
1615
  [[package]]
1616
  name = "snowballstemmer"
1617
  version = "2.2.0"
@@ -2027,7 +2166,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-
2027
  [metadata]
2028
  lock-version = "1.1"
2029
  python-versions = "^3.8.3"
2030
- content-hash = "671c809c9707c753d7aab881b8319beb76c2311edd270ca629f40cfd10227faa"
2031
 
2032
  [metadata.files]
2033
  about-time = [
@@ -2042,6 +2181,10 @@ altair = [
2042
  {file = "altair-4.2.0-py3-none-any.whl", hash = "sha256:0c724848ae53410c13fa28be2b3b9a9dcb7b5caa1a70f7f217bd663bb419935a"},
2043
  {file = "altair-4.2.0.tar.gz", hash = "sha256:d87d9372e63b48cd96b2a6415f0cf9457f50162ab79dc7a31cd7e024dd840026"},
2044
  ]
 
 
 
 
2045
  appnope = [
2046
  {file = "appnope-0.1.3-py2.py3-none-any.whl", hash = "sha256:265a455292d0bd8a72453494fa24df5a11eb18373a60c7c0430889f22548605e"},
2047
  {file = "appnope-0.1.3.tar.gz", hash = "sha256:02bd91c4de869fbb1e1c50aafc4098827a7a54ab2f39d9dcba6c9547ed920e24"},
@@ -2213,6 +2356,10 @@ cffi = [
2213
  {file = "cffi-1.15.0-cp39-cp39-win_amd64.whl", hash = "sha256:3773c4d81e6e818df2efbc7dd77325ca0dcb688116050fb2b3011218eda36139"},
2214
  {file = "cffi-1.15.0.tar.gz", hash = "sha256:920f0d66a896c2d99f0adbb391f990a84091179542c205fa53ce5787aff87954"},
2215
  ]
 
 
 
 
2216
  charset-normalizer = [
2217
  {file = "charset-normalizer-2.0.12.tar.gz", hash = "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597"},
2218
  {file = "charset_normalizer-2.0.12-py3-none-any.whl", hash = "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df"},
@@ -2233,6 +2380,10 @@ commonmark = [
2233
  {file = "commonmark-0.9.1-py2.py3-none-any.whl", hash = "sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9"},
2234
  {file = "commonmark-0.9.1.tar.gz", hash = "sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60"},
2235
  ]
 
 
 
 
2236
  de2en = [
2237
  {file = "de2en-0.1.1-py3-none-any.whl", hash = "sha256:6a04c77a04d3a8d426012b29a4561701416aaede64b650b4a1161c3b803fa928"},
2238
  {file = "de2en-0.1.1.tar.gz", hash = "sha256:830dfc7a168cc1dcaea582655fc738db9add98ded30f56ddaca86b7b23dd6eff"},
@@ -2319,9 +2470,25 @@ gitpython = [
2319
  grapheme = [
2320
  {file = "grapheme-0.6.0.tar.gz", hash = "sha256:44c2b9f21bbe77cfb05835fec230bd435954275267fea1858013b102f8603cca"},
2321
  ]
 
 
 
 
2322
  hanzidentifier = [
2323
  {file = "hanzidentifier-1.0.2.tar.gz", hash = "sha256:793a298430aa9a9d6ab344dc0ca0ab4bd1161d88c7da941d6554571093003cba"},
2324
  ]
 
 
 
 
 
 
 
 
 
 
 
 
2325
  icecream = [
2326
  {file = "icecream-2.1.2-py2.py3-none-any.whl", hash = "sha256:04b9cea4d9931cf6960db0430ebf11fa34464ce7152e384ddf50f96d25b201b4"},
2327
  {file = "icecream-2.1.2.tar.gz", hash = "sha256:09300b2d1c678712410cbd47c95198eb1b580f66f311a554ccd6b9e758ece0ee"},
@@ -2406,6 +2573,71 @@ logzero = [
2406
  {file = "logzero-1.7.0-py2.py3-none-any.whl", hash = "sha256:23eb1f717a2736f9ab91ca0d43160fd2c996ad49ae6bad34652d47aba908769d"},
2407
  {file = "logzero-1.7.0.tar.gz", hash = "sha256:7f73ddd3ae393457236f081ffebd044a3aa2e423a47ae6ddb5179ab90d0ad082"},
2408
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2409
  markupsafe = [
2410
  {file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812"},
2411
  {file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a"},
@@ -2931,6 +3163,10 @@ pyzmq = [
2931
  {file = "pyzmq-23.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:4d861ae20040afc17adef33053c328667da78d4d3676b2936788fd031665e3a8"},
2932
  {file = "pyzmq-23.0.0.tar.gz", hash = "sha256:a45f5c0477d12df05ef2e2922b49b7c0ae9d0f4ff9b6bb0d666558df0ef37122"},
2933
  ]
 
 
 
 
2934
  regex = [
2935
  {file = "regex-2022.4.24-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f86aef546add4ff1202e1f31e9bb54f9268f17d996b2428877283146bf9bc013"},
2936
  {file = "regex-2022.4.24-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e944268445b5694f5d41292c9228f0ca46d5a32a67f195d5f8547c1f1d91f4bc"},
@@ -3011,6 +3247,10 @@ requests = [
3011
  {file = "requests-2.27.1-py2.py3-none-any.whl", hash = "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"},
3012
  {file = "requests-2.27.1.tar.gz", hash = "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61"},
3013
  ]
 
 
 
 
3014
  rich = [
3015
  {file = "rich-12.4.4-py3-none-any.whl", hash = "sha256:d2bbd99c320a2532ac71ff6a3164867884357da3e3301f0240090c5d2fdac7ec"},
3016
  {file = "rich-12.4.4.tar.gz", hash = "sha256:4c586de507202505346f3e32d1363eb9ed6932f0c2f63184dea88983ff4971e2"},
@@ -3146,6 +3386,10 @@ smmap = [
3146
  {file = "smmap-5.0.0-py3-none-any.whl", hash = "sha256:2aba19d6a040e78d8b09de5c57e96207b09ed71d8e55ce0959eeee6c8e190d94"},
3147
  {file = "smmap-5.0.0.tar.gz", hash = "sha256:c840e62059cd3be204b0c9c9f74be2c09d5648eddd4580d9314c3ecde0b30936"},
3148
  ]
 
 
 
 
3149
  snowballstemmer = [
3150
  {file = "snowballstemmer-2.2.0-py2.py3-none-any.whl", hash = "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"},
3151
  {file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"},
 
37
  [package.extras]
38
  dev = ["black", "docutils", "ipython", "flake8", "pytest", "sphinx", "mistune (<2.0.0)", "m2r", "vega-datasets", "recommonmark"]
39
 
40
+ [[package]]
41
+ name = "anyio"
42
+ version = "3.6.1"
43
+ description = "High level compatibility layer for multiple asynchronous event loop implementations"
44
+ category = "main"
45
+ optional = false
46
+ python-versions = ">=3.6.2"
47
+
48
+ [package.dependencies]
49
+ idna = ">=2.8"
50
+ sniffio = ">=1.1"
51
+
52
+ [package.extras]
53
+ doc = ["packaging", "sphinx-rtd-theme", "sphinx-autodoc-typehints (>=1.2.0)"]
54
+ test = ["coverage[toml] (>=4.5)", "hypothesis (>=4.0)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "contextlib2", "uvloop (<0.15)", "mock (>=4)", "uvloop (>=0.15)"]
55
+ trio = ["trio (>=0.16)"]
56
+
57
  [[package]]
58
  name = "appnope"
59
  version = "0.1.3"
 
233
  [package.dependencies]
234
  pycparser = "*"
235
 
236
+ [[package]]
237
+ name = "chardet"
238
+ version = "4.0.0"
239
+ description = "Universal encoding detector for Python 2 and 3"
240
+ category = "main"
241
+ optional = false
242
+ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
243
+
244
  [[package]]
245
  name = "charset-normalizer"
246
  version = "2.0.12"
 
298
  [package.extras]
299
  test = ["flake8 (==3.7.8)", "hypothesis (==3.55.3)"]
300
 
301
+ [[package]]
302
+ name = "cssselect"
303
+ version = "1.1.0"
304
+ description = "cssselect parses CSS3 Selectors and translates them to XPath 1.0"
305
+ category = "main"
306
+ optional = false
307
+ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
308
+
309
  [[package]]
310
  name = "de2en"
311
  version = "0.1.1"
 
552
  [package.extras]
553
  test = ["pytest", "sphinx", "sphinx-autobuild", "wheel", "twine"]
554
 
555
+ [[package]]
556
+ name = "h11"
557
+ version = "0.12.0"
558
+ description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
559
+ category = "main"
560
+ optional = false
561
+ python-versions = ">=3.6"
562
+
563
  [[package]]
564
  name = "hanzidentifier"
565
  version = "1.0.2"
 
571
  [package.dependencies]
572
  zhon = ">=1.1.3"
573
 
574
+ [[package]]
575
+ name = "html2text"
576
+ version = "2020.1.16"
577
+ description = "Turn HTML into equivalent Markdown-structured text."
578
+ category = "main"
579
+ optional = false
580
+ python-versions = ">=3.5"
581
+
582
+ [[package]]
583
+ name = "httpcore"
584
+ version = "0.15.0"
585
+ description = "A minimal low-level HTTP client."
586
+ category = "main"
587
+ optional = false
588
+ python-versions = ">=3.7"
589
+
590
+ [package.dependencies]
591
+ anyio = ">=3.0.0,<4.0.0"
592
+ certifi = "*"
593
+ h11 = ">=0.11,<0.13"
594
+ sniffio = ">=1.0.0,<2.0.0"
595
+
596
+ [package.extras]
597
+ http2 = ["h2 (>=3,<5)"]
598
+ socks = ["socksio (>=1.0.0,<2.0.0)"]
599
+
600
+ [[package]]
601
+ name = "httpx"
602
+ version = "0.23.0"
603
+ description = "The next generation HTTP client."
604
+ category = "main"
605
+ optional = false
606
+ python-versions = ">=3.7"
607
+
608
+ [package.dependencies]
609
+ certifi = "*"
610
+ httpcore = ">=0.15.0,<0.16.0"
611
+ rfc3986 = {version = ">=1.3,<2", extras = ["idna2008"]}
612
+ sniffio = "*"
613
+
614
+ [package.extras]
615
+ brotli = ["brotlicffi", "brotli"]
616
+ cli = ["click (>=8.0.0,<9.0.0)", "rich (>=10,<13)", "pygments (>=2.0.0,<3.0.0)"]
617
+ http2 = ["h2 (>=3,<5)"]
618
+ socks = ["socksio (>=1.0.0,<2.0.0)"]
619
+
620
  [[package]]
621
  name = "icecream"
622
  version = "2.1.2"
 
929
  [package.dependencies]
930
  colorama = {version = "*", markers = "sys_platform == \"win32\""}
931
 
932
+ [[package]]
933
+ name = "lxml"
934
+ version = "4.9.0"
935
+ description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API."
936
+ category = "main"
937
+ optional = false
938
+ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*"
939
+
940
+ [package.extras]
941
+ cssselect = ["cssselect (>=0.7)"]
942
+ html5 = ["html5lib"]
943
+ htmlsoup = ["beautifulsoup4"]
944
+ source = ["Cython (>=0.29.7)"]
945
+
946
  [[package]]
947
  name = "markupsafe"
948
  version = "2.1.1"
 
1564
  cffi = {version = "*", markers = "implementation_name == \"pypy\""}
1565
  py = {version = "*", markers = "implementation_name == \"pypy\""}
1566
 
1567
+ [[package]]
1568
+ name = "readability-lxml"
1569
+ version = "0.8.1"
1570
+ description = "fast html to text parser (article readability tool) with python 3 support"
1571
+ category = "main"
1572
+ optional = false
1573
+ python-versions = "*"
1574
+
1575
+ [package.dependencies]
1576
+ chardet = "*"
1577
+ cssselect = "*"
1578
+ lxml = "*"
1579
+
1580
+ [package.extras]
1581
+ test = ["timeout-decorator"]
1582
+
1583
  [[package]]
1584
  name = "regex"
1585
  version = "2022.4.24"
 
1606
  socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"]
1607
  use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"]
1608
 
1609
+ [[package]]
1610
+ name = "rfc3986"
1611
+ version = "1.5.0"
1612
+ description = "Validating URI References per RFC 3986"
1613
+ category = "main"
1614
+ optional = false
1615
+ python-versions = "*"
1616
+
1617
+ [package.dependencies]
1618
+ idna = {version = "*", optional = true, markers = "extra == \"idna2008\""}
1619
+
1620
+ [package.extras]
1621
+ idna2008 = ["idna"]
1622
+
1623
  [[package]]
1624
  name = "rich"
1625
  version = "12.4.4"
 
1743
  optional = false
1744
  python-versions = ">=3.6"
1745
 
1746
+ [[package]]
1747
+ name = "sniffio"
1748
+ version = "1.2.0"
1749
+ description = "Sniff out which async library your code is running under"
1750
+ category = "main"
1751
+ optional = false
1752
+ python-versions = ">=3.5"
1753
+
1754
  [[package]]
1755
  name = "snowballstemmer"
1756
  version = "2.2.0"
 
2166
  [metadata]
2167
  lock-version = "1.1"
2168
  python-versions = "^3.8.3"
2169
+ content-hash = "f594ea8ffe579f3c01e737f1d41548f25e8d16bf5a3dab9344aad0376fb456d9"
2170
 
2171
  [metadata.files]
2172
  about-time = [
 
2181
  {file = "altair-4.2.0-py3-none-any.whl", hash = "sha256:0c724848ae53410c13fa28be2b3b9a9dcb7b5caa1a70f7f217bd663bb419935a"},
2182
  {file = "altair-4.2.0.tar.gz", hash = "sha256:d87d9372e63b48cd96b2a6415f0cf9457f50162ab79dc7a31cd7e024dd840026"},
2183
  ]
2184
+ anyio = [
2185
+ {file = "anyio-3.6.1-py3-none-any.whl", hash = "sha256:cb29b9c70620506a9a8f87a309591713446953302d7d995344d0d7c6c0c9a7be"},
2186
+ {file = "anyio-3.6.1.tar.gz", hash = "sha256:413adf95f93886e442aea925f3ee43baa5a765a64a0f52c6081894f9992fdd0b"},
2187
+ ]
2188
  appnope = [
2189
  {file = "appnope-0.1.3-py2.py3-none-any.whl", hash = "sha256:265a455292d0bd8a72453494fa24df5a11eb18373a60c7c0430889f22548605e"},
2190
  {file = "appnope-0.1.3.tar.gz", hash = "sha256:02bd91c4de869fbb1e1c50aafc4098827a7a54ab2f39d9dcba6c9547ed920e24"},
 
2356
  {file = "cffi-1.15.0-cp39-cp39-win_amd64.whl", hash = "sha256:3773c4d81e6e818df2efbc7dd77325ca0dcb688116050fb2b3011218eda36139"},
2357
  {file = "cffi-1.15.0.tar.gz", hash = "sha256:920f0d66a896c2d99f0adbb391f990a84091179542c205fa53ce5787aff87954"},
2358
  ]
2359
+ chardet = [
2360
+ {file = "chardet-4.0.0-py2.py3-none-any.whl", hash = "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"},
2361
+ {file = "chardet-4.0.0.tar.gz", hash = "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa"},
2362
+ ]
2363
  charset-normalizer = [
2364
  {file = "charset-normalizer-2.0.12.tar.gz", hash = "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597"},
2365
  {file = "charset_normalizer-2.0.12-py3-none-any.whl", hash = "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df"},
 
2380
  {file = "commonmark-0.9.1-py2.py3-none-any.whl", hash = "sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9"},
2381
  {file = "commonmark-0.9.1.tar.gz", hash = "sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60"},
2382
  ]
2383
+ cssselect = [
2384
+ {file = "cssselect-1.1.0-py2.py3-none-any.whl", hash = "sha256:f612ee47b749c877ebae5bb77035d8f4202c6ad0f0fc1271b3c18ad6c4468ecf"},
2385
+ {file = "cssselect-1.1.0.tar.gz", hash = "sha256:f95f8dedd925fd8f54edb3d2dfb44c190d9d18512377d3c1e2388d16126879bc"},
2386
+ ]
2387
  de2en = [
2388
  {file = "de2en-0.1.1-py3-none-any.whl", hash = "sha256:6a04c77a04d3a8d426012b29a4561701416aaede64b650b4a1161c3b803fa928"},
2389
  {file = "de2en-0.1.1.tar.gz", hash = "sha256:830dfc7a168cc1dcaea582655fc738db9add98ded30f56ddaca86b7b23dd6eff"},
 
2470
  grapheme = [
2471
  {file = "grapheme-0.6.0.tar.gz", hash = "sha256:44c2b9f21bbe77cfb05835fec230bd435954275267fea1858013b102f8603cca"},
2472
  ]
2473
+ h11 = [
2474
+ {file = "h11-0.12.0-py3-none-any.whl", hash = "sha256:36a3cb8c0a032f56e2da7084577878a035d3b61d104230d4bd49c0c6b555a9c6"},
2475
+ {file = "h11-0.12.0.tar.gz", hash = "sha256:47222cb6067e4a307d535814917cd98fd0a57b6788ce715755fa2b6c28b56042"},
2476
+ ]
2477
  hanzidentifier = [
2478
  {file = "hanzidentifier-1.0.2.tar.gz", hash = "sha256:793a298430aa9a9d6ab344dc0ca0ab4bd1161d88c7da941d6554571093003cba"},
2479
  ]
2480
+ html2text = [
2481
+ {file = "html2text-2020.1.16-py3-none-any.whl", hash = "sha256:c7c629882da0cf377d66f073329ccf34a12ed2adf0169b9285ae4e63ef54c82b"},
2482
+ {file = "html2text-2020.1.16.tar.gz", hash = "sha256:e296318e16b059ddb97f7a8a1d6a5c1d7af4544049a01e261731d2d5cc277bbb"},
2483
+ ]
2484
+ httpcore = [
2485
+ {file = "httpcore-0.15.0-py3-none-any.whl", hash = "sha256:1105b8b73c025f23ff7c36468e4432226cbb959176eab66864b8e31c4ee27fa6"},
2486
+ {file = "httpcore-0.15.0.tar.gz", hash = "sha256:18b68ab86a3ccf3e7dc0f43598eaddcf472b602aba29f9aa6ab85fe2ada3980b"},
2487
+ ]
2488
+ httpx = [
2489
+ {file = "httpx-0.23.0-py3-none-any.whl", hash = "sha256:42974f577483e1e932c3cdc3cd2303e883cbfba17fe228b0f63589764d7b9c4b"},
2490
+ {file = "httpx-0.23.0.tar.gz", hash = "sha256:f28eac771ec9eb4866d3fb4ab65abd42d38c424739e80c08d8d20570de60b0ef"},
2491
+ ]
2492
  icecream = [
2493
  {file = "icecream-2.1.2-py2.py3-none-any.whl", hash = "sha256:04b9cea4d9931cf6960db0430ebf11fa34464ce7152e384ddf50f96d25b201b4"},
2494
  {file = "icecream-2.1.2.tar.gz", hash = "sha256:09300b2d1c678712410cbd47c95198eb1b580f66f311a554ccd6b9e758ece0ee"},
 
2573
  {file = "logzero-1.7.0-py2.py3-none-any.whl", hash = "sha256:23eb1f717a2736f9ab91ca0d43160fd2c996ad49ae6bad34652d47aba908769d"},
2574
  {file = "logzero-1.7.0.tar.gz", hash = "sha256:7f73ddd3ae393457236f081ffebd044a3aa2e423a47ae6ddb5179ab90d0ad082"},
2575
  ]
2576
+ lxml = [
2577
+ {file = "lxml-4.9.0-cp27-cp27m-macosx_10_15_x86_64.whl", hash = "sha256:b5031d151d6147eac53366d6ec87da84cd4d8c5e80b1d9948a667a7164116e39"},
2578
+ {file = "lxml-4.9.0-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5d52e1173f52020392f593f87a6af2d4055dd800574a5cb0af4ea3878801d307"},
2579
+ {file = "lxml-4.9.0-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:3af00ee88376022589ceeb8170eb67dacf5f7cd625ea59fa0977d719777d4ae8"},
2580
+ {file = "lxml-4.9.0-cp27-cp27m-win32.whl", hash = "sha256:1057356b808d149bc14eb8f37bb89129f237df488661c1e0fc0376ca90e1d2c3"},
2581
+ {file = "lxml-4.9.0-cp27-cp27m-win_amd64.whl", hash = "sha256:f6d23a01921b741774f35e924d418a43cf03eca1444f3fdfd7978d35a5aaab8b"},
2582
+ {file = "lxml-4.9.0-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:56e19fb6e4b8bd07fb20028d03d3bc67bcc0621347fbde64f248e44839771756"},
2583
+ {file = "lxml-4.9.0-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:4cd69bca464e892ea4ed544ba6a7850aaff6f8d792f8055a10638db60acbac18"},
2584
+ {file = "lxml-4.9.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:94b181dd2777890139e49a5336bf3a9a3378ce66132c665fe8db4e8b7683cde2"},
2585
+ {file = "lxml-4.9.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:607224ffae9a0cf0a2f6e14f5f6bce43e83a6fbdaa647891729c103bdd6a5593"},
2586
+ {file = "lxml-4.9.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:11d62c97ceff9bab94b6b29c010ea5fb6831743459bb759c917f49ba75601cd0"},
2587
+ {file = "lxml-4.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:70a198030d26f5e569367f0f04509b63256faa76a22886280eea69a4f535dd40"},
2588
+ {file = "lxml-4.9.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:3cf816aed8125cfc9e6e5c6c31ff94278320d591bd7970c4a0233bee0d1c8790"},
2589
+ {file = "lxml-4.9.0-cp310-cp310-win32.whl", hash = "sha256:65b3b5f12c6fb5611e79157214f3cd533083f9b058bf2fc8a1c5cc5ee40fdc5a"},
2590
+ {file = "lxml-4.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:0aa4cce579512c33373ca4c5e23c21e40c1aa1a33533a75e51b654834fd0e4f2"},
2591
+ {file = "lxml-4.9.0-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:63419db39df8dc5564f6f103102c4665f7e4d9cb64030e98cf7a74eae5d5760d"},
2592
+ {file = "lxml-4.9.0-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:d8e5021e770b0a3084c30dda5901d5fce6d4474feaf0ced8f8e5a82702502fbb"},
2593
+ {file = "lxml-4.9.0-cp35-cp35m-win32.whl", hash = "sha256:f17b9df97c5ecdfb56c5e85b3c9df9831246df698f8581c6e111ac664c7c656e"},
2594
+ {file = "lxml-4.9.0-cp35-cp35m-win_amd64.whl", hash = "sha256:75da29a0752c8f2395df0115ac1681cefbdd4418676015be8178b733704cbff2"},
2595
+ {file = "lxml-4.9.0-cp36-cp36m-macosx_10_15_x86_64.whl", hash = "sha256:e4d020ecf3740b7312bacab2cb966bb720fd4d3490562d373b4ad91dd1857c0d"},
2596
+ {file = "lxml-4.9.0-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:b71c52d69b91af7d18c13aef1b0cc3baee36b78607c711eb14a52bf3aa7c815e"},
2597
+ {file = "lxml-4.9.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28cf04a1a38e961d4a764d2940af9b941b66263ed5584392ef875ee9c1e360a3"},
2598
+ {file = "lxml-4.9.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:915ecf7d486df17cc65aeefdb680d5ad4390cc8c857cf8db3fe241ed234f856a"},
2599
+ {file = "lxml-4.9.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e564d5a771b4015f34166a05ea2165b7e283635c41b1347696117f780084b46d"},
2600
+ {file = "lxml-4.9.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:c2a57755e366e0ac7ebdb3e9207f159c3bf1afed02392ab18453ce81f5ee92ee"},
2601
+ {file = "lxml-4.9.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:00f3a6f88fd5f4357844dd91a1abac5f466c6799f1b7f1da2df6665253845b11"},
2602
+ {file = "lxml-4.9.0-cp36-cp36m-win32.whl", hash = "sha256:9093a359a86650a3dbd6532c3e4d21a6f58ba2cb60d0e72db0848115d24c10ba"},
2603
+ {file = "lxml-4.9.0-cp36-cp36m-win_amd64.whl", hash = "sha256:d1690c4d37674a5f0cdafbc5ed7e360800afcf06928c2a024c779c046891bf09"},
2604
+ {file = "lxml-4.9.0-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:6af7f51a6010748fc1bb71917318d953c9673e4ae3f6d285aaf93ef5b2eb11c1"},
2605
+ {file = "lxml-4.9.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:eabdbe04ee0a7e760fa6cd9e799d2b020d098c580ba99107d52e1e5e538b1ecb"},
2606
+ {file = "lxml-4.9.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:b1e22f3ee4d75ca261b6bffbf64f6f178cb194b1be3191065a09f8d98828daa9"},
2607
+ {file = "lxml-4.9.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:53b0410b220766321759f7f9066da67b1d0d4a7f6636a477984cbb1d98483955"},
2608
+ {file = "lxml-4.9.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d76da27f5e3e9bc40eba6ed7a9e985f57547e98cf20521d91215707f2fb57e0f"},
2609
+ {file = "lxml-4.9.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:686565ac77ff94a8965c11829af253d9e2ce3bf0d9225b1d2eb5c4d4666d0dca"},
2610
+ {file = "lxml-4.9.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:b62d1431b4c40cda43cc986f19b8c86b1d2ae8918cfc00f4776fdf070b65c0c4"},
2611
+ {file = "lxml-4.9.0-cp37-cp37m-win32.whl", hash = "sha256:4becd16750ca5c2a1b1588269322b2cebd10c07738f336c922b658dbab96a61c"},
2612
+ {file = "lxml-4.9.0-cp37-cp37m-win_amd64.whl", hash = "sha256:e35a298691b9e10e5a5631f8f0ba605b30ebe19208dc8f58b670462f53753641"},
2613
+ {file = "lxml-4.9.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:aa7447bf7c1a15ef24e2b86a277b585dd3f055e8890ac7f97374d170187daa97"},
2614
+ {file = "lxml-4.9.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:612ef8f2795a89ba3a1d4c8c1af84d8453fd53ee611aa5ad460fdd2cab426fc2"},
2615
+ {file = "lxml-4.9.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:1bfb791a8fcdbf55d1d41b8be940393687bec0e9b12733f0796668086d1a23ff"},
2616
+ {file = "lxml-4.9.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:024684e0c5cfa121c22140d3a0898a3a9b2ea0f0fd2c229b6658af4bdf1155e5"},
2617
+ {file = "lxml-4.9.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:81c29c8741fa07ecec8ec7417c3d8d1e2f18cf5a10a280f4e1c3f8c3590228b2"},
2618
+ {file = "lxml-4.9.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6467626fa74f96f4d80fc6ec2555799e97fff8f36e0bfc7f67769f83e59cff40"},
2619
+ {file = "lxml-4.9.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:9cae837b988f44925d14d048fa6a8c54f197c8b1223fd9ee9c27084f84606143"},
2620
+ {file = "lxml-4.9.0-cp38-cp38-win32.whl", hash = "sha256:5a49ad78543925e1a4196e20c9c54492afa4f1502c2a563f73097e2044c75190"},
2621
+ {file = "lxml-4.9.0-cp38-cp38-win_amd64.whl", hash = "sha256:bb7c1b029e54e26e01b1d1d912fc21abb65650d16ea9a191d026def4ed0859ed"},
2622
+ {file = "lxml-4.9.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:d0d03b9636f1326772e6854459728676354d4c7731dae9902b180e2065ba3da6"},
2623
+ {file = "lxml-4.9.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:9af19eb789d674b59a9bee5005779757aab857c40bf9cc313cb01eafac55ce55"},
2624
+ {file = "lxml-4.9.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:dd00d28d1ab5fa7627f5abc957f29a6338a7395b724571a8cbff8fbed83aaa82"},
2625
+ {file = "lxml-4.9.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:754a1dd04bff8a509a31146bd8f3a5dc8191a8694d582dd5fb71ff09f0722c22"},
2626
+ {file = "lxml-4.9.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b7679344f2270840dc5babc9ccbedbc04f7473c1f66d4676bb01680c0db85bcc"},
2627
+ {file = "lxml-4.9.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:d882c2f3345261e898b9f604be76b61c901fbfa4ac32e3f51d5dc1edc89da3cb"},
2628
+ {file = "lxml-4.9.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4e97c8fc761ad63909198acc892f34c20f37f3baa2c50a62d5ec5d7f1efc68a1"},
2629
+ {file = "lxml-4.9.0-cp39-cp39-win32.whl", hash = "sha256:cf9ec915857d260511399ab87e1e70fa13d6b2972258f8e620a3959468edfc32"},
2630
+ {file = "lxml-4.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:1254a79f8a67a3908de725caf59eae62d86738f6387b0a34b32e02abd6ae73db"},
2631
+ {file = "lxml-4.9.0-pp37-pypy37_pp73-macosx_10_15_x86_64.whl", hash = "sha256:03370ec37fe562238d385e2c53089076dee53aabf8325cab964fdb04a9130fa0"},
2632
+ {file = "lxml-4.9.0-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:f386def57742aacc3d864169dfce644a8c396f95aa35b41b69df53f558d56dd0"},
2633
+ {file = "lxml-4.9.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:ea3f2e9eb41f973f73619e88bf7bd950b16b4c2ce73d15f24a11800ce1eaf276"},
2634
+ {file = "lxml-4.9.0-pp38-pypy38_pp73-macosx_10_15_x86_64.whl", hash = "sha256:2d10659e6e5c53298e6d718fd126e793285bff904bb71d7239a17218f6a197b7"},
2635
+ {file = "lxml-4.9.0-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:fcdf70191f0d1761d190a436db06a46f05af60e1410e1507935f0332280c9268"},
2636
+ {file = "lxml-4.9.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:2b9c2341d96926b0d0e132e5c49ef85eb53fa92ae1c3a70f9072f3db0d32bc07"},
2637
+ {file = "lxml-4.9.0-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:615886ee84b6f42f1bdf1852a9669b5fe3b96b6ff27f1a7a330b67ad9911200a"},
2638
+ {file = "lxml-4.9.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:94f2e45b054dd759bed137b6e14ae8625495f7d90ddd23cf62c7a68f72b62656"},
2639
+ {file = "lxml-4.9.0.tar.gz", hash = "sha256:520461c36727268a989790aef08884347cd41f2d8ae855489ccf40b50321d8d7"},
2640
+ ]
2641
  markupsafe = [
2642
  {file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812"},
2643
  {file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a"},
 
3163
  {file = "pyzmq-23.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:4d861ae20040afc17adef33053c328667da78d4d3676b2936788fd031665e3a8"},
3164
  {file = "pyzmq-23.0.0.tar.gz", hash = "sha256:a45f5c0477d12df05ef2e2922b49b7c0ae9d0f4ff9b6bb0d666558df0ef37122"},
3165
  ]
3166
+ readability-lxml = [
3167
+ {file = "readability-lxml-0.8.1.tar.gz", hash = "sha256:e51fea56b5909aaf886d307d48e79e096293255afa567b7d08bca94d25b1a4e1"},
3168
+ {file = "readability_lxml-0.8.1-py3-none-any.whl", hash = "sha256:e0d366a21b1bd6cca17de71a4e6ea16fcfaa8b0a5b4004e39e2c7eff884e6305"},
3169
+ ]
3170
  regex = [
3171
  {file = "regex-2022.4.24-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f86aef546add4ff1202e1f31e9bb54f9268f17d996b2428877283146bf9bc013"},
3172
  {file = "regex-2022.4.24-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e944268445b5694f5d41292c9228f0ca46d5a32a67f195d5f8547c1f1d91f4bc"},
 
3247
  {file = "requests-2.27.1-py2.py3-none-any.whl", hash = "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"},
3248
  {file = "requests-2.27.1.tar.gz", hash = "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61"},
3249
  ]
3250
+ rfc3986 = [
3251
+ {file = "rfc3986-1.5.0-py2.py3-none-any.whl", hash = "sha256:a86d6e1f5b1dc238b218b012df0aa79409667bb209e58da56d0b94704e712a97"},
3252
+ {file = "rfc3986-1.5.0.tar.gz", hash = "sha256:270aaf10d87d0d4e095063c65bf3ddbc6ee3d0b226328ce21e036f946e421835"},
3253
+ ]
3254
  rich = [
3255
  {file = "rich-12.4.4-py3-none-any.whl", hash = "sha256:d2bbd99c320a2532ac71ff6a3164867884357da3e3301f0240090c5d2fdac7ec"},
3256
  {file = "rich-12.4.4.tar.gz", hash = "sha256:4c586de507202505346f3e32d1363eb9ed6932f0c2f63184dea88983ff4971e2"},
 
3386
  {file = "smmap-5.0.0-py3-none-any.whl", hash = "sha256:2aba19d6a040e78d8b09de5c57e96207b09ed71d8e55ce0959eeee6c8e190d94"},
3387
  {file = "smmap-5.0.0.tar.gz", hash = "sha256:c840e62059cd3be204b0c9c9f74be2c09d5648eddd4580d9314c3ecde0b30936"},
3388
  ]
3389
+ sniffio = [
3390
+ {file = "sniffio-1.2.0-py3-none-any.whl", hash = "sha256:471b71698eac1c2112a40ce2752bb2f4a4814c22a54a3eed3676bc0f5ca9f663"},
3391
+ {file = "sniffio-1.2.0.tar.gz", hash = "sha256:c4666eecec1d3f50960c6bdf61ab7bc350648da6c126e3cf6898d8cd4ddcd3de"},
3392
+ ]
3393
  snowballstemmer = [
3394
  {file = "snowballstemmer-2.2.0-py2.py3-none-any.whl", hash = "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"},
3395
  {file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"},
pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
  [tool.poetry]
2
  name = "litbee"
3
- version = "0.1.2-alpha.0"
4
  description = "align (en, de, zh) texts via streamlit "
5
  authors = ["ffreemt"]
6
  license = "MIT"
@@ -26,6 +26,9 @@ plotly = "^5.8.0"
26
  hanzidentifier = "^1.0.2"
27
  opencc-python-reimplemented = "^0.1.6"
28
  tabulate = "^0.8.9"
 
 
 
29
 
30
  [tool.poe.executor]
31
  type = "poetry"
 
1
  [tool.poetry]
2
  name = "litbee"
3
+ version = "0.1.2-alpha.1"
4
  description = "align (en, de, zh) texts via streamlit "
5
  authors = ["ffreemt"]
6
  license = "MIT"
 
26
  hanzidentifier = "^1.0.2"
27
  opencc-python-reimplemented = "^0.1.6"
28
  tabulate = "^0.8.9"
29
+ httpx = "^0.23.0"
30
+ html2text = "^2020.1.16"
31
+ readability-lxml = "^0.8.1"
32
 
33
  [tool.poe.executor]
34
  type = "poetry"