freemt
commited on
Commit
·
d18f436
1
Parent(s):
aa4d65d
Bump version from 0.1.2-alpha.0 to 0.1.2-alpha.1
Browse files- app.py +10 -4
- data/test_zh_t.txt +31 -69
- litbee/__init__.py +1 -1
- litbee/app.py +10 -4
- litbee/fetch_paste.py +39 -4
- litbee/fetch_upload.py +128 -0
- litbee/fetch_urls.py +165 -4
- litbee/home.py +50 -136
- litbee/pad.txt +21 -0
- litbee/settings.py +2 -2
- litbee/url2txt.py +97 -0
- poetry.lock +245 -1
- pyproject.toml +4 -1
app.py
CHANGED
@@ -107,7 +107,7 @@ loggu.remove()
|
|
107 |
_ = (
|
108 |
"<green>{time:YY-MM-DD HH:mm:ss}</green> | "
|
109 |
"<level>{level: <5}</level> | <level>{message}</level> "
|
110 |
-
"<cyan>{name}</cyan>:<cyan>{line}</cyan>"
|
111 |
)
|
112 |
loggu.add(
|
113 |
sys.stderr,
|
@@ -131,11 +131,15 @@ st.set_page_config( # type: ignore
|
|
131 |
pd.set_option("display.precision", 2)
|
132 |
pd.options.display.float_format = "{:,.2f}".format
|
133 |
|
|
|
|
|
|
|
|
|
134 |
_ = dict(
|
135 |
beetype="ezbee",
|
136 |
-
sourcetype=
|
137 |
sourcecount=2,
|
138 |
-
|
139 |
src_filename="",
|
140 |
tgt_filename="",
|
141 |
src_fileio=b"",
|
@@ -148,6 +152,7 @@ _ = dict(
|
|
148 |
df_a=None,
|
149 |
df_s_a=None,
|
150 |
count=1,
|
|
|
151 |
)
|
152 |
if "ns" not in state:
|
153 |
state.ns = SimpleNamespace(**_)
|
@@ -186,7 +191,8 @@ def main():
|
|
186 |
if set_loglevel() <= 10:
|
187 |
st.markdown(state.ns.count)
|
188 |
loggu.debug(f" run: {state.ns.count}")
|
|
|
189 |
state.ns.count += 1
|
190 |
-
|
191 |
|
192 |
main()
|
|
|
107 |
_ = (
|
108 |
"<green>{time:YY-MM-DD HH:mm:ss}</green> | "
|
109 |
"<level>{level: <5}</level> | <level>{message}</level> "
|
110 |
+
"<cyan>{module}.{name}</cyan>:<cyan>{line}</cyan>"
|
111 |
)
|
112 |
loggu.add(
|
113 |
sys.stderr,
|
|
|
131 |
pd.set_option("display.precision", 2)
|
132 |
pd.options.display.float_format = "{:,.2f}".format
|
133 |
|
134 |
+
sourcetype = "upload"
|
135 |
+
if set_loglevel() <= 10:
|
136 |
+
sourcetype = "urls"
|
137 |
+
|
138 |
_ = dict(
|
139 |
beetype="ezbee",
|
140 |
+
sourcetype=sourcetype,
|
141 |
sourcecount=2,
|
142 |
+
sentali=None,
|
143 |
src_filename="",
|
144 |
tgt_filename="",
|
145 |
src_fileio=b"",
|
|
|
152 |
df_a=None,
|
153 |
df_s_a=None,
|
154 |
count=1,
|
155 |
+
updated=False,
|
156 |
)
|
157 |
if "ns" not in state:
|
158 |
state.ns = SimpleNamespace(**_)
|
|
|
191 |
if set_loglevel() <= 10:
|
192 |
st.markdown(state.ns.count)
|
193 |
loggu.debug(f" run: {state.ns.count}")
|
194 |
+
logger.debug(f" run: {state.ns.count}")
|
195 |
state.ns.count += 1
|
196 |
+
state.ns.updated = False
|
197 |
|
198 |
main()
|
data/test_zh_t.txt
CHANGED
@@ -1,74 +1,36 @@
|
|
1 |
呼嘯山莊
|
2 |
-
|
3 |
-
-------------------------------------------------- ------------------------------
|
4 |
-
|
5 |
第二章
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
“倒霉的人至少就沒有禮了,我就直接為我家就這樣了。”我還在搖家裡孤零零地打電話。它。苦臉的約瑟夫從穀倉的一個圓窗裡探出頭來。
|
17 |
-
|
18 |
-
“你幹嗎?”他大叫。 “主人在牛欄裡,你找他說話,就從這條路口繞過去。”
|
19 |
-
|
20 |
-
“屋裡開門嗎?”我也叫起來。
|
21 |
-
|
22 |
-
“除了老婆沒有別人。你就是騰到夜裡,她也不會開。”
|
23 |
-
|
24 |
-
“為什麼?你就不能告訴她我是誰,呃,約瑟夫?”
|
25 |
-
|
26 |
“別找我!我才不管這些閒事呢,”這個腦袋咕嚕著,又不見了。
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
倒霉,原來又是一堆死兔子。我輕咳一聲,向火爐湊近,又把今晚天氣不好的話評論一通。
|
47 |
-
|
48 |
-
“你本來就不會。。她說,站起來去拿”該台上的兩個茶葉罐。
|
49 |
-
|
50 |
-
我沒有她的小官現在還沒有全身出現纖細的透明狀態,她的臉還沒有明顯的淡麗,而且她的身體還沒有明顯的淡麗。捲髮還不如說說,垂垂垂垂的她脖子上說說,輕而易舉地在與眼神能拒一些和說,要使人抗拒。一,而那張眼神 眼神 眼神 不是自然的。
|
51 |
-
|
52 |
-
她夠不到。 動了 一動 ,她 猛 猛 猛 猛 地 向 像 像 守財奴 守財奴 他 他 的 金子 金子
|
53 |
-
|
54 |
-
“我不要你幫忙,”她怒氣沖沖地說,“我自己得到了。”
|
55 |
-
|
56 |
-
“對不起!”我連忙回答。
|
57 |
-
|
58 |
-
“是請你來喫茶的嗎?”她問,把一條圍裙系在那乾淨的���衣服上,就這樣站著,拿一盒茶葉〣邌候茶葉〣邌候茶葉〣邌
|
59 |
-
|
60 |
-
“我很想喝杯茶。”我回答。
|
61 |
-
|
62 |
-
“是請你來的嗎?”她又問。
|
63 |
-
|
64 |
-
“沒有,”我說,勉強笑一笑。 “您請我喝茶。”
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
目錄
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
返回首頁
|
|
|
1 |
呼嘯山莊
|
2 |
+
--------------------------------------------------------------------------------
|
|
|
|
|
3 |
第二章
|
4 |
+
英文
|
5 |
+
昨天下午又冷又有霧。 我想就在書房爐邊消磨一下午,不想踩著雜草污泥到呼嘯山莊了。
|
6 |
+
但是,吃過午飯(注意——我在十二點與一點鐘之間吃午飯,而可以當作這所房子的附屬物的管家婆,一比特慈祥的家後卻不能,或者並不願理解我請求在五點鐘開飯的用意),在我懷着這個懶惰的想法上了樓,邁進屋子的時候,看見一個女僕跪在地上,身邊是掃帚和煤鬥。 她正在用一堆堆煤渣封火,搞起一片彌漫的灰塵。 這景象立刻把我趕回頭了。 我拿了帽子,走了四裡路,到達了希刺克厲夫的花園口口,剛好躲過了一場今年初降的鹅毛大雪。
|
7 |
+
在那荒凉的山頂上,土地由於結了一層黑冰而凍得堅硬,冷空氣使我四肢發抖。 我弄不開門鏈,就跳進去,順著兩邊種著蔓延的醋栗樹叢的石路跑去。 我白白地敲了半天門,一直敲到我的手指骨都痛了,狗也狂吠起來。
|
8 |
+
“倒楣的人家!” 我心裡直叫,“只為你這樣無禮待客,就該一輩子跟人群隔離。我至少還不會在白天把門閂住。我才不管呢——我要進去!” 如此决定了。 我就抓住門閂,使勁搖它。 苦臉的約瑟夫從穀倉的一個圓窗裏探出頭來。
|
9 |
+
“你幹嗎?” 他大叫。 “主人在牛欄裏,你要是找他說話,就從這條路口繞過去。”
|
10 |
+
“屋裡沒人開門嗎?” 我也叫起來。
|
11 |
+
“除了家後沒有別人。你就是鬧騰到夜裡,她也不會開。”
|
12 |
+
“為什麼?你就不能告訴她我是誰嗎,呃,約瑟夫?”
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
“別找我!我才不管這些閒事呢,”這個腦袋咕嚕著,又不見了。
|
14 |
+
雪開始下大了。 我握住門柄又試一回。 這時一個沒穿外衣的年輕人,扛著一根草耙,在後面院子裏出現了。 他招呼我跟著他走,穿過了一個洗衣房和一片鋪平的地,那兒有煤棚、抽水機和鴿籠,我們終於到了我上次被接待過的那間溫暖的、熱鬧的大屋子。 煤、炭和木材混合在一起燃起的熊熊爐火,使這屋子放著光彩。 在準備擺上豐盛晚餐的桌旁,我很高興地看到了那位“家後”,以前我從未料想到會有這麼一個人存在的。 我鞠躬等候,以為她會叫我坐下。 她望望我,往她的椅背一靠,不動,也不出聲。
|
15 |
+
“天氣真壞!” 我說,“希刺克厲夫家後,恐怕大門因為您的僕人偷懶而大吃苦頭,我費了好大勁才使他們聽見我敲門!”
|
16 |
+
她死不開口。 我瞪眼——她也瞪眼。 反正她總是以一種冷冷的、漠不關心的神氣盯住我,使人十分窘,而且不愉快。
|
17 |
+
“坐下吧,”那年輕人粗聲粗氣地說,“他就要來了。”
|
18 |
+
我服從了; 輕輕咳了一下,叫喚那惡狗朱諾。 臨到第二次會面,它總算賞臉,搖起尾巴尖,表示認我是熟人了。
|
19 |
+
“好漂亮的狗!” 我又開始說話。 “您是不是打算不要這些小的呢,夫人?”
|
20 |
+
“那些不是我的,”這可愛可親的女主人說,比希刺克厲夫本人所能回答的腔調還要更冷淡些。
|
21 |
+
“啊,您所心愛的是在這一堆裏啦!” 我轉身指著一個看不清楚的靠墊上那一堆像猫似的東西,接著說下去。
|
22 |
+
“誰會愛這些東西那才怪呢!” 她輕蔑地說。
|
23 |
+
倒楣,原來那是堆死兔子。 我又輕咳一聲,向火爐凑近些,又把今晚天氣不好的話評論一通。
|
24 |
+
“你本來就不該出來。” 她說,站起來去拿壁爐臺上的兩個彩色茶葉罐。
|
25 |
+
她原先坐在光線被遮住的地方,現在我把她的全身和面貌都看得清清楚楚。 她苗條,顯然還沒有過青春期。 挺好看的體態,還有一張我生平從未有幸見過的絕妙的小臉蛋。 五官纖麗,非常漂亮。 淡黃色的卷髮,或者不如說是金黃色的,松松地垂在她那細嫩的頸上。 至於眼睛,要是眼神能顯得和悅些,就要使人無法抗拒了。 對我這容易動情的心說來倒是常事,因為它們所表現的只是在輕蔑與近似絕望之間的一種情緒,而在那張臉上看見那樣的眼神是特別不自然的。
|
26 |
+
她簡直够不到茶葉罐。 我動了一動,想幫她一下。 她猛地扭轉身向我,像守財奴看見別人打算幫他數他的金子一樣。
|
27 |
+
“我不要你幫忙,”她怒氣衝衝地說,“我自己拿得到。”
|
28 |
+
“對不起!” 我連忙回答。
|
29 |
+
“是請你來吃茶的嗎?” 她問,把一條圍裙系在她那乾淨的黑衣服上,就這樣站著,拿一匙茶葉正要往茶壺裏倒。
|
30 |
+
“我很想喝杯茶。” 我回答。
|
31 |
+
“是請你來的嗎?” 她又問。
|
32 |
+
“沒有,”我說,勉强笑一笑。 “您正好請我喝茶。”
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
目錄
|
34 |
+
上一章
|
35 |
+
下一章
|
|
|
|
|
36 |
返回首頁
|
litbee/__init__.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
"""Init."""
|
2 |
-
__version__ = "0.1.
|
3 |
from .litbee import litbee
|
4 |
|
5 |
__all__ = ("litbee",)
|
|
|
1 |
"""Init."""
|
2 |
+
__version__ = "0.1.2a1"
|
3 |
from .litbee import litbee
|
4 |
|
5 |
__all__ = ("litbee",)
|
litbee/app.py
CHANGED
@@ -107,7 +107,7 @@ loggu.remove()
|
|
107 |
_ = (
|
108 |
"<green>{time:YY-MM-DD HH:mm:ss}</green> | "
|
109 |
"<level>{level: <5}</level> | <level>{message}</level> "
|
110 |
-
"<cyan>{name}</cyan>:<cyan>{line}</cyan>"
|
111 |
)
|
112 |
loggu.add(
|
113 |
sys.stderr,
|
@@ -131,11 +131,15 @@ st.set_page_config( # type: ignore
|
|
131 |
pd.set_option("display.precision", 2)
|
132 |
pd.options.display.float_format = "{:,.2f}".format
|
133 |
|
|
|
|
|
|
|
|
|
134 |
_ = dict(
|
135 |
beetype="ezbee",
|
136 |
-
sourcetype=
|
137 |
sourcecount=2,
|
138 |
-
|
139 |
src_filename="",
|
140 |
tgt_filename="",
|
141 |
src_fileio=b"",
|
@@ -148,6 +152,7 @@ _ = dict(
|
|
148 |
df_a=None,
|
149 |
df_s_a=None,
|
150 |
count=1,
|
|
|
151 |
)
|
152 |
if "ns" not in state:
|
153 |
state.ns = SimpleNamespace(**_)
|
@@ -186,7 +191,8 @@ def main():
|
|
186 |
if set_loglevel() <= 10:
|
187 |
st.markdown(state.ns.count)
|
188 |
loggu.debug(f" run: {state.ns.count}")
|
|
|
189 |
state.ns.count += 1
|
190 |
-
|
191 |
|
192 |
main()
|
|
|
107 |
_ = (
|
108 |
"<green>{time:YY-MM-DD HH:mm:ss}</green> | "
|
109 |
"<level>{level: <5}</level> | <level>{message}</level> "
|
110 |
+
"<cyan>{module}.{name}</cyan>:<cyan>{line}</cyan>"
|
111 |
)
|
112 |
loggu.add(
|
113 |
sys.stderr,
|
|
|
131 |
pd.set_option("display.precision", 2)
|
132 |
pd.options.display.float_format = "{:,.2f}".format
|
133 |
|
134 |
+
sourcetype = "upload"
|
135 |
+
if set_loglevel() <= 10:
|
136 |
+
sourcetype = "urls"
|
137 |
+
|
138 |
_ = dict(
|
139 |
beetype="ezbee",
|
140 |
+
sourcetype=sourcetype,
|
141 |
sourcecount=2,
|
142 |
+
sentali=None,
|
143 |
src_filename="",
|
144 |
tgt_filename="",
|
145 |
src_fileio=b"",
|
|
|
152 |
df_a=None,
|
153 |
df_s_a=None,
|
154 |
count=1,
|
155 |
+
updated=False,
|
156 |
)
|
157 |
if "ns" not in state:
|
158 |
state.ns = SimpleNamespace(**_)
|
|
|
191 |
if set_loglevel() <= 10:
|
192 |
st.markdown(state.ns.count)
|
193 |
loggu.debug(f" run: {state.ns.count}")
|
194 |
+
logger.debug(f" run: {state.ns.count}")
|
195 |
state.ns.count += 1
|
196 |
+
state.ns.updated = False
|
197 |
|
198 |
main()
|
litbee/fetch_paste.py
CHANGED
@@ -1,9 +1,44 @@
|
|
1 |
-
"""
|
2 |
-
|
3 |
import streamlit as st
|
4 |
from logzero import logger
|
|
|
5 |
|
6 |
|
7 |
def fetch_paste():
|
8 |
-
"""
|
9 |
-
st.write("Coming soon")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Fetch pasted text and convert to state.ns.list1/list2."""
|
2 |
+
# pylint: disable=invalid-name
|
3 |
import streamlit as st
|
4 |
from logzero import logger
|
5 |
+
from streamlit import session_state as state
|
6 |
|
7 |
|
8 |
def fetch_paste():
|
9 |
+
"""Fetch from clipboard."""
|
10 |
+
# st.write("Coming soon")
|
11 |
+
with st.form(key="paste_in_form"):
|
12 |
+
_ = st.expander(f"{state.ns.beetype}: Paste text", expanded=True)
|
13 |
+
with _:
|
14 |
+
col1, col2 = st.columns(2)
|
15 |
+
with col1:
|
16 |
+
text1 = st.text_area(
|
17 |
+
label="Paste your stuff here",
|
18 |
+
key="paste_text1",
|
19 |
+
# help=""
|
20 |
+
height=500,
|
21 |
+
)
|
22 |
+
|
23 |
+
with col2:
|
24 |
+
text2 = st.text_area(
|
25 |
+
label="Paste your stuff here",
|
26 |
+
# help=""
|
27 |
+
key="paste_text2",
|
28 |
+
height=500,
|
29 |
+
)
|
30 |
+
|
31 |
+
submitted = st.form_submit_button("Submit")
|
32 |
+
|
33 |
+
logger.debug("text1[:10]: %s, text2[:10]: %s", text1[:10], text2[:10])
|
34 |
+
|
35 |
+
list1 = [_.strip() for _ in text1.splitlines() if _.strip()]
|
36 |
+
list2 = [_.strip() for _ in text2.splitlines() if _.strip()]
|
37 |
+
|
38 |
+
state.ns.list1 = list1[:]
|
39 |
+
state.ns.list2 = list2[:]
|
40 |
+
|
41 |
+
logger.debug("len(list1): %s, len(list2): %s", len(list1), len(list2))
|
42 |
+
|
43 |
+
state.ns.updated = True
|
44 |
+
logger.debug("state.ns.updated: %s", state.ns.updated)
|
litbee/fetch_upload.py
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Fetch upload and convert to list1/list2."""
|
2 |
+
from logzero import logger
|
3 |
+
import streamlit as st
|
4 |
+
from streamlit import session_state as state
|
5 |
+
|
6 |
+
|
7 |
+
def fetch_upload():
|
8 |
+
"""Fetch upload and convert to list1/list2."""
|
9 |
+
# src_fileio tgt_fileio
|
10 |
+
with st.form(key="upload_in_form"):
|
11 |
+
_ = st.expander(f"{state.ns.beetype}: Pick two files", expanded=True)
|
12 |
+
with _:
|
13 |
+
col1, col2 = st.columns(2)
|
14 |
+
with col1:
|
15 |
+
src_fileio = st.file_uploader(
|
16 |
+
"Choose source file (utf8 txt)",
|
17 |
+
type=[
|
18 |
+
"txt",
|
19 |
+
],
|
20 |
+
key="src_text",
|
21 |
+
# accept_multiple_files=True,
|
22 |
+
# accept_multiple_files=False,
|
23 |
+
)
|
24 |
+
|
25 |
+
with col2:
|
26 |
+
tgt_fileio = st.file_uploader(
|
27 |
+
"Choose target file (utf8 txt)",
|
28 |
+
type=[
|
29 |
+
"txt",
|
30 |
+
],
|
31 |
+
key="tgt_text",
|
32 |
+
# accept_multiple_files=True,
|
33 |
+
)
|
34 |
+
submitted = st.form_submit_button("Submit")
|
35 |
+
|
36 |
+
# logger.debug(" len(src_fileio): %s", len(src_fileio))
|
37 |
+
# logger.debug(" len(tgt_fileio): %s", len(tgt_fileio))
|
38 |
+
|
39 |
+
filename1 = ""
|
40 |
+
if src_fileio:
|
41 |
+
logger.debug(" type(src_fileio): %s", type(src_fileio))
|
42 |
+
|
43 |
+
# for st.file_uploade accept_multiple_files=True
|
44 |
+
if isinstance(src_fileio, list):
|
45 |
+
logger.debug(" len(src_fileio): %s", len(src_fileio))
|
46 |
+
filenames = []
|
47 |
+
try:
|
48 |
+
filenames = [elm.name for elm in src_fileio] # type: ignore
|
49 |
+
except Exception as exc:
|
50 |
+
logger.error(exc)
|
51 |
+
logger.debug("src_fileio names: *%s*", filenames)
|
52 |
+
|
53 |
+
# state.ns.src_fileio = src_fileio
|
54 |
+
state.ns.src_file = src_fileio[-1].getvalue().decode()
|
55 |
+
state.ns.src_filename = src_fileio[-1].name
|
56 |
+
else:
|
57 |
+
logger.debug("src_fileio.name: [%s]", src_fileio.name)
|
58 |
+
filenames = [src_fileio.name]
|
59 |
+
logger.debug("src_fileio names: %s", filenames)
|
60 |
+
|
61 |
+
# state.ns.src_fileio = src_fileio
|
62 |
+
state.ns.src_file = src_fileio.getvalue().decode()
|
63 |
+
state.ns.src_filename = src_fileio.name
|
64 |
+
filename1 = state.ns.src_filename
|
65 |
+
|
66 |
+
filename2 = ""
|
67 |
+
if tgt_fileio:
|
68 |
+
if isinstance(tgt_fileio, list):
|
69 |
+
logger.warning("not set to handle multiple files")
|
70 |
+
logger.warning("set accept_multiple_files=False in the meantime")
|
71 |
+
else:
|
72 |
+
state.ns.tgt_file = tgt_fileio.getvalue().decode()
|
73 |
+
state.ns.tgt_filename = tgt_fileio.name
|
74 |
+
filename2 = tgt_fileio.name
|
75 |
+
|
76 |
+
# proceed when Submit is clicked
|
77 |
+
msg1 = ""
|
78 |
+
if filename1:
|
79 |
+
msg1 += f" file1 {filename1}"
|
80 |
+
msg2 = ""
|
81 |
+
if filename2:
|
82 |
+
msg2 += f" file2 {filename2}"
|
83 |
+
glue = ""
|
84 |
+
if filename1 and filename2:
|
85 |
+
glue = ", "
|
86 |
+
|
87 |
+
upload_placeholder = st.empty()
|
88 |
+
prefix = f" Upload submitted: {msg1}{glue}{msg2}"
|
89 |
+
upload_placeholder.write(prefix)
|
90 |
+
|
91 |
+
# st.write(f" Submitted upload: {msg1}{glue}{msg2}")
|
92 |
+
if not submitted:
|
93 |
+
return None
|
94 |
+
|
95 |
+
if not (filename1 or filename2):
|
96 |
+
# st.write("| no file uploaded")
|
97 |
+
upload_placeholder.write(f"{prefix} no file uploaded")
|
98 |
+
return None
|
99 |
+
|
100 |
+
if not filename1:
|
101 |
+
# st.write("| file1 not ready")
|
102 |
+
upload_placeholder.write(f"{prefix}, file1 not ready")
|
103 |
+
return None
|
104 |
+
|
105 |
+
if not filename2:
|
106 |
+
# st.write("| file2 not ready")
|
107 |
+
upload_placeholder.write(f"{prefix}, file2 not ready")
|
108 |
+
return None
|
109 |
+
|
110 |
+
try:
|
111 |
+
_ = state.ns.src_file.splitlines()
|
112 |
+
list1 = [elm.strip() for elm in _ if elm.strip()]
|
113 |
+
_ = state.ns.tgt_file.splitlines()
|
114 |
+
list2 = [elm.strip() for elm in _ if elm.strip()]
|
115 |
+
except Exception as exc:
|
116 |
+
logger.error(exc)
|
117 |
+
list1 = [""]
|
118 |
+
list2 = [""]
|
119 |
+
|
120 |
+
logger.debug("len(list1): %s, len(list2): %s", len(list1), len(list2))
|
121 |
+
|
122 |
+
state.ns.list1 = list1[:]
|
123 |
+
state.ns.list2 = list2[:]
|
124 |
+
|
125 |
+
state.ns.updated = True
|
126 |
+
logger.debug("state.ns.updated: %s", state.ns.updated)
|
127 |
+
|
128 |
+
return None
|
litbee/fetch_urls.py
CHANGED
@@ -1,9 +1,170 @@
|
|
1 |
-
"""Fetch from urls."""
|
2 |
-
|
3 |
import streamlit as st
|
|
|
4 |
from logzero import logger
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
|
7 |
def fetch_urls():
|
8 |
-
"""
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Fetch text from urls and convert to state.ns.list1/list2."""
|
2 |
+
# pylint: disable=invalid-name
|
3 |
import streamlit as st
|
4 |
+
from icecream import ic
|
5 |
from logzero import logger
|
6 |
+
from streamlit import session_state as state
|
7 |
+
from litbee.url2txt import url2txt
|
8 |
+
|
9 |
+
ic.configureOutput(
|
10 |
+
includeContext=True,
|
11 |
+
outputFunction=logger.debug, # outputFunction=logger.info,
|
12 |
+
)
|
13 |
|
14 |
|
15 |
def fetch_urls():
|
16 |
+
"""Fetch text from urls and convert to state.ns.list1/list2."""
|
17 |
+
beetype = state.ns.beetype
|
18 |
+
sourcecount = state.ns.sourcecount
|
19 |
+
value = ""
|
20 |
+
if beetype == "ezbee":
|
21 |
+
url1 = "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/test_en.txt"
|
22 |
+
url2 = "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/test_zh.txt"
|
23 |
+
value = f"{url1} {url2}"
|
24 |
+
if beetype == "dzbee":
|
25 |
+
url1 = "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/sternstunden04-de.txt"
|
26 |
+
url2 = "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/sternstunden04-zh.txt"
|
27 |
+
value = f"{url1} {url2}"
|
28 |
+
if beetype == "debee":
|
29 |
+
url1 = "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/sternstunden04-de.txt"
|
30 |
+
url2 = "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/sternstunden04-en.txt"
|
31 |
+
value = f"{url1} {url2}"
|
32 |
+
|
33 |
+
dict_ = dict(text1="", text2="")
|
34 |
+
|
35 |
+
def fetch_cb():
|
36 |
+
"""Fetch text (dict_["text1"|"text2"]) from urls."""
|
37 |
+
ic("fetch_cb")
|
38 |
+
urls = [elm.strip() for elm in text_inp.split(" ") if elm.strip()]
|
39 |
+
|
40 |
+
# supply http:// if not startswith http
|
41 |
+
urls = [elm if elm.startswith("http") else "http://" + elm for elm in urls]
|
42 |
+
|
43 |
+
_ = "\n\t"
|
44 |
+
# st.markdown(f" urls submitted: \n{_.join(urls)}")
|
45 |
+
ic(f" urls submitted: \n{_.join(urls)}")
|
46 |
+
|
47 |
+
# st.write(" TODO: fetch text from urls.")
|
48 |
+
|
49 |
+
if state.ns.sourcecount == 2: # 2-sep
|
50 |
+
for idx, url in enumerate(urls[:2]):
|
51 |
+
try:
|
52 |
+
_ = url2txt(url)
|
53 |
+
except Exception as e:
|
54 |
+
logger.error(e)
|
55 |
+
_ = str(e)
|
56 |
+
dict_[f"text{idx + 1}"] = _
|
57 |
+
ic(f"{idx + 1}: [{url}] {dict_['text' + str(idx + 1)][:100]}")
|
58 |
+
|
59 |
+
ic(dict_["text1"][:10])
|
60 |
+
ic(dict_["text2"][:10])
|
61 |
+
else: # 1-mix
|
62 |
+
text1 = ""
|
63 |
+
for url in urls:
|
64 |
+
try:
|
65 |
+
_ = url2txt(url)
|
66 |
+
except Exception as e:
|
67 |
+
logger.error(e)
|
68 |
+
_ = str(e)
|
69 |
+
text1 += _
|
70 |
+
ic(text1[:10])
|
71 |
+
dict_["text1"] = text1[:]
|
72 |
+
|
73 |
+
_ = [elm.strip() for elm in dict_["text1"].splitlines() if elm.strip()]
|
74 |
+
state.ns.list1 = _
|
75 |
+
_ = [elm.strip() for elm in dict_["text2"].splitlines() if elm.strip()]
|
76 |
+
state.ns.list2 = _
|
77 |
+
|
78 |
+
list1 = state.ns.list1
|
79 |
+
list2 = state.ns.list2
|
80 |
+
ic(len(list1), len(list2))
|
81 |
+
|
82 |
+
state.fetched_text1 = dict_["text1"]
|
83 |
+
state.fetched_text2 = dict_["text2"]
|
84 |
+
|
85 |
+
# streamlit complains if an initial value of
|
86 |
+
# a widget with this key is set
|
87 |
+
# state.text_area_urls = text_inp
|
88 |
+
|
89 |
+
# with st.form(key="urls_in_form"):
|
90 |
+
# _ = st.expander(f"{beetype}: Paste urls below and press Ctl+Enter or Space Ctl+Enter to testdrive", expanded=True)
|
91 |
+
# with _:
|
92 |
+
label = f"{beetype}: Paste urls below and press Ctl+Enter or Space Ctl+Enter to testdrive"
|
93 |
+
text_inp = st.text_area(
|
94 |
+
label=label,
|
95 |
+
value=value,
|
96 |
+
key="text_area_urls",
|
97 |
+
height=25,
|
98 |
+
help=" URLs separated by at least a space or a newline(贴网址,空格分开或另起一行, Ctrl+回车提交)",
|
99 |
+
on_change=fetch_cb,
|
100 |
+
# args=(text_inp,),
|
101 |
+
)
|
102 |
+
|
103 |
+
# st.button("Fetch", on_click=fetch_cb, args=(text_inp,))
|
104 |
+
|
105 |
+
def text2lists():
|
106 |
+
"""Convert text(s) to list(s)."""
|
107 |
+
if text1:
|
108 |
+
try:
|
109 |
+
list1 = [elm.strip() for elm in text1.splitlines() if elm.strip()]
|
110 |
+
state.ns.list1 = list1[:]
|
111 |
+
except Exception as e:
|
112 |
+
logger.warning("text1 to list1 errors: %s", e)
|
113 |
+
|
114 |
+
if text2:
|
115 |
+
try:
|
116 |
+
list2 = [elm.strip() for elm in text2.splitlines() if elm.strip()]
|
117 |
+
state.ns.list2 = list2[:]
|
118 |
+
except Exception as e:
|
119 |
+
logger.warning("text2 to list2 errors: %s", e)
|
120 |
+
|
121 |
+
# show fetch text(s)
|
122 |
+
text1 = dict_["text1"]
|
123 |
+
text2 = dict_["text2"]
|
124 |
+
if state.ns.sourcecount == 2: # 2-sep
|
125 |
+
with st.form(key="fetched_2texts_in_form"):
|
126 |
+
_ = st.expander(f"{state.ns.beetype}: fetched text", expanded=True)
|
127 |
+
with _:
|
128 |
+
col1, col2 = st.columns(2)
|
129 |
+
with col1:
|
130 |
+
text1 = st.text_area(
|
131 |
+
label="Edit when necessary, click Submit when ready",
|
132 |
+
key="fetched_text1",
|
133 |
+
# help=""
|
134 |
+
height=500,
|
135 |
+
value=text1,
|
136 |
+
)
|
137 |
+
|
138 |
+
with col2:
|
139 |
+
text2 = st.text_area(
|
140 |
+
label="Edit when necessary, click Submit when ready",
|
141 |
+
# help=""
|
142 |
+
key="fetched_text2",
|
143 |
+
height=500,
|
144 |
+
value=text2,
|
145 |
+
)
|
146 |
+
|
147 |
+
submitted = st.form_submit_button(
|
148 |
+
"Submit",
|
149 |
+
on_click=text2lists
|
150 |
+
)
|
151 |
+
|
152 |
+
else: # 1-mix
|
153 |
+
with st.form(key="fetched_1_text_in_form"):
|
154 |
+
_ = st.expander(f"{state.ns.beetype}: fetched mixed text", expanded=True)
|
155 |
+
with _:
|
156 |
+
text1 = st.text_area(
|
157 |
+
label="Edit when necessary, click Submit when ready",
|
158 |
+
key="fetched_mixed_text1",
|
159 |
+
height=500,
|
160 |
+
value=text1,
|
161 |
+
)
|
162 |
+
submitted = st.form_submit_button("Submit", on_click=text2lists)
|
163 |
+
|
164 |
+
# _ = """
|
165 |
+
if not submitted:
|
166 |
+
ic("Submit not yet clicked")
|
167 |
+
return
|
168 |
+
# """
|
169 |
+
|
170 |
+
state.ns.updated = True
|
litbee/home.py
CHANGED
@@ -32,14 +32,26 @@ from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
|
|
32 |
# from st_aggrid.grid_options_builder import GridOptionsBuilder
|
33 |
from streamlit import session_state as state
|
34 |
|
|
|
|
|
|
|
35 |
from litbee.t2s import t2s
|
36 |
|
37 |
|
38 |
def home(): # noqa
|
39 |
-
"""
|
40 |
-
# st.write(state.ns.beetype)
|
41 |
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
st.write("Coming soooooooon...")
|
44 |
return None
|
45 |
|
@@ -47,122 +59,37 @@ def home(): # noqa
|
|
47 |
st.write("Coming soon...")
|
48 |
return None
|
49 |
|
50 |
-
#
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
key="src_text",
|
62 |
-
# accept_multiple_files=True,
|
63 |
-
# accept_multiple_files=False,
|
64 |
-
)
|
65 |
-
|
66 |
-
with col2:
|
67 |
-
tgt_fileio = st.file_uploader(
|
68 |
-
"Choose target file (utf8 txt)",
|
69 |
-
type=[
|
70 |
-
"txt",
|
71 |
-
],
|
72 |
-
key="tgt_text",
|
73 |
-
# accept_multiple_files=True,
|
74 |
-
)
|
75 |
-
submitted = st.form_submit_button("Submit")
|
76 |
-
|
77 |
-
# logger.debug(" len(src_fileio): %s", len(src_fileio))
|
78 |
-
# logger.debug(" len(tgt_fileio): %s", len(tgt_fileio))
|
79 |
-
|
80 |
-
filename1 = ""
|
81 |
-
if src_fileio:
|
82 |
-
logger.debug(" type(src_fileio): %s", type(src_fileio))
|
83 |
-
|
84 |
-
# for st.file_uploade accept_multiple_files=True
|
85 |
-
if isinstance(src_fileio, list):
|
86 |
-
logger.debug(" len(src_fileio): %s", len(src_fileio))
|
87 |
-
filenames = []
|
88 |
-
try:
|
89 |
-
filenames = [elm.name for elm in src_fileio] # type: ignore
|
90 |
-
except Exception as exc:
|
91 |
-
logger.error(exc)
|
92 |
-
logger.debug("src_fileio names: *%s*", filenames)
|
93 |
-
|
94 |
-
# state.ns.src_fileio = src_fileio
|
95 |
-
state.ns.src_file = src_fileio[-1].getvalue().decode()
|
96 |
-
state.ns.src_filename = src_fileio[-1].name
|
97 |
-
else:
|
98 |
-
logger.debug("src_fileio.name: [%s]", src_fileio.name)
|
99 |
-
filenames = [src_fileio.name]
|
100 |
-
logger.debug("src_fileio names: %s", filenames)
|
101 |
-
|
102 |
-
# state.ns.src_fileio = src_fileio
|
103 |
-
state.ns.src_file = src_fileio.getvalue().decode()
|
104 |
-
state.ns.src_filename = src_fileio.name
|
105 |
-
filename1 = state.ns.src_filename
|
106 |
-
|
107 |
-
filename2 = ""
|
108 |
-
if tgt_fileio:
|
109 |
-
if isinstance(tgt_fileio, list):
|
110 |
-
logger.warning("not set to handle multiple files")
|
111 |
-
logger.warning("set accept_multiple_files=False in the meantime")
|
112 |
-
else:
|
113 |
-
state.ns.tgt_file = tgt_fileio.getvalue().decode()
|
114 |
-
state.ns.tgt_filename = tgt_fileio.name
|
115 |
-
filename2 = tgt_fileio.name
|
116 |
-
|
117 |
-
# proceed when Submit is clicked
|
118 |
-
msg1 = ""
|
119 |
-
if filename1:
|
120 |
-
msg1 += f" file1 {filename1}"
|
121 |
-
msg2 = ""
|
122 |
-
if filename2:
|
123 |
-
msg2 += f" file2 {filename2}"
|
124 |
-
glue = ""
|
125 |
-
if filename1 and filename2:
|
126 |
-
glue = ", "
|
127 |
-
|
128 |
-
upload_placeholder = st.empty()
|
129 |
-
prefix = f" Submitted upload: {msg1}{glue}{msg2}"
|
130 |
-
upload_placeholder.write(prefix)
|
131 |
-
|
132 |
-
# st.write(f" Submitted upload: {msg1}{glue}{msg2}")
|
133 |
-
if not submitted:
|
134 |
return None
|
135 |
|
136 |
-
|
137 |
-
# st.write("| no file uploaded")
|
138 |
-
upload_placeholder.write(f"{prefix} no file uploaded")
|
139 |
-
return None
|
140 |
|
141 |
-
if not
|
142 |
-
|
143 |
-
|
|
|
144 |
return None
|
145 |
|
146 |
-
|
147 |
-
|
148 |
-
upload_placeholder.write(f"{prefix}, file2 not ready")
|
149 |
-
return None
|
150 |
|
151 |
-
|
152 |
-
|
153 |
-
list1 = [elm.strip() for elm in _ if elm.strip()]
|
154 |
-
_ = state.ns.tgt_file.splitlines()
|
155 |
-
list2 = [elm.strip() for elm in _ if elm.strip()]
|
156 |
-
except Exception as exc:
|
157 |
-
logger.error(exc)
|
158 |
-
list1 = [""]
|
159 |
-
list2 = [""]
|
160 |
-
state.ns.list1 = list1[:]
|
161 |
-
state.ns.list2 = list2[:]
|
162 |
|
163 |
df = pd.DataFrame(zip_longest(list1, list2, fillvalue=""))
|
164 |
try:
|
165 |
-
df.columns = ["text1", "text2"]
|
|
|
166 |
except Exception as exc:
|
167 |
logger.debug("df: \n%s", df)
|
168 |
logger.error("%s", exc)
|
@@ -171,6 +98,7 @@ def home(): # noqa
|
|
171 |
logger.debug("df: %s", df)
|
172 |
|
173 |
# st.table(df) # looks alright
|
|
|
174 |
|
175 |
# stlyed pd dataframe?
|
176 |
# bigger, no pagination
|
@@ -186,28 +114,11 @@ def home(): # noqa
|
|
186 |
logger.debug(" df.empty, return")
|
187 |
return None
|
188 |
|
189 |
-
#
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
_ = """
|
196 |
-
ag_exp = st.expander("done aligned") # , expanded=False
|
197 |
-
with ag_exp:
|
198 |
-
agdf = AgGrid(
|
199 |
-
df,
|
200 |
-
# fit_columns_on_grid_load=True,
|
201 |
-
editable=True,
|
202 |
-
gridOptions=gridOptions,
|
203 |
-
key="ag_exp",
|
204 |
-
)
|
205 |
-
# """
|
206 |
-
|
207 |
-
list1 = [elm.strip() for elm in df.text1 if elm.strip()]
|
208 |
-
list2 = [elm.strip() for elm in df.text2 if elm.strip()]
|
209 |
-
logger.debug("list1[:3]: %s", list1[:3])
|
210 |
-
logger.debug("list2[:3]: %s", list2[:3])
|
211 |
|
212 |
logger.info("Processing data... %s", state.ns.beetype)
|
213 |
# if state.ns.beetype in ["ezbee", "dzbee"]:
|
@@ -245,12 +156,12 @@ def home(): # noqa
|
|
245 |
# min_samples=min_samples,
|
246 |
)
|
247 |
except Exception as e:
|
248 |
-
# logger.error("aset = ezbee(...) exc: %s", e)
|
249 |
logger.exception("aset = globals()[state.ns.beetype](...) exc: %s", e)
|
250 |
aset = ""
|
251 |
-
# st.write(e)
|
252 |
st.write("Collecting inputs...")
|
|
|
253 |
return None
|
|
|
254 |
st.success(f"Done, took {t.duration_human}")
|
255 |
|
256 |
else:
|
@@ -279,8 +190,8 @@ def home(): # noqa
|
|
279 |
# aligned_pairs = gen_pairs(list1, list2, aset)
|
280 |
aligned_pairs = aset2pairs(list1, list2, aset)
|
281 |
if aligned_pairs:
|
282 |
-
logger.debug("%s...%s", aligned_pairs[:1], aligned_pairs[-1:])
|
283 |
-
|
284 |
|
285 |
df_a = pd.DataFrame(
|
286 |
aligned_pairs, columns=["text1", "text2", "llh"], dtype="object"
|
@@ -323,4 +234,7 @@ def home(): # noqa
|
|
323 |
update_mode=GridUpdateMode.MODEL_CHANGED,
|
324 |
)
|
325 |
|
|
|
|
|
|
|
326 |
return None
|
|
|
32 |
# from st_aggrid.grid_options_builder import GridOptionsBuilder
|
33 |
from streamlit import session_state as state
|
34 |
|
35 |
+
from litbee.fetch_upload import fetch_upload
|
36 |
+
from litbee.fetch_paste import fetch_paste
|
37 |
+
from litbee.fetch_urls import fetch_urls
|
38 |
from litbee.t2s import t2s
|
39 |
|
40 |
|
41 |
def home(): # noqa
|
42 |
+
"""Run tasks.
|
|
|
43 |
|
44 |
+
beetype
|
45 |
+
|
46 |
+
sourcetype
|
47 |
+
fetch_upload/fetch_paste, fetch_url
|
48 |
+
sourcecount
|
49 |
+
|
50 |
+
align: para-align/sent-align
|
51 |
+
|
52 |
+
save xlsx/tsv
|
53 |
+
"""
|
54 |
+
if state.ns.sourcetype not in ["upload", "paste", "urls"]:
|
55 |
st.write("Coming soooooooon...")
|
56 |
return None
|
57 |
|
|
|
59 |
st.write("Coming soon...")
|
60 |
return None
|
61 |
|
62 |
+
# process sourcetype and fetch list1/list2
|
63 |
+
list1 = list2 = []
|
64 |
+
# fetch_upload/fetch_paste
|
65 |
+
if state.ns.sourcetype in ["upload"]:
|
66 |
+
fetch_upload()
|
67 |
+
elif state.ns.sourcetype in ["paste"]:
|
68 |
+
fetch_paste()
|
69 |
+
elif state.ns.sourcetype in ["urls"]:
|
70 |
+
fetch_urls()
|
71 |
+
else:
|
72 |
+
st.warning(f"{state.ns.sourcetype}: Not implemented")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
return None
|
74 |
|
75 |
+
logger.debug("state.ns.updated: %s", state.ns.updated)
|
|
|
|
|
|
|
76 |
|
77 |
+
# if not updated, quit: this does not quite work
|
78 |
+
# only prevents the first run/missing upload
|
79 |
+
if not state.ns.updated:
|
80 |
+
logger.debug(" not updated, early exit.")
|
81 |
return None
|
82 |
|
83 |
+
list1 = state.ns.list1[:]
|
84 |
+
list2 = state.ns.list2[:]
|
|
|
|
|
85 |
|
86 |
+
logger.debug("list1[:3]: %s", list1[:3])
|
87 |
+
logger.debug("list2[:3]: %s", list2[:3])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
df = pd.DataFrame(zip_longest(list1, list2, fillvalue=""))
|
90 |
try:
|
91 |
+
# df.columns = ["text1", "text2"]
|
92 |
+
df.columns = [f"text{i + 1}" for i in range(len(df.columns))]
|
93 |
except Exception as exc:
|
94 |
logger.debug("df: \n%s", df)
|
95 |
logger.error("%s", exc)
|
|
|
98 |
logger.debug("df: %s", df)
|
99 |
|
100 |
# st.table(df) # looks alright
|
101 |
+
# equiv to st.markdown(df.to_markdown())?
|
102 |
|
103 |
# stlyed pd dataframe?
|
104 |
# bigger, no pagination
|
|
|
114 |
logger.debug(" df.empty, return")
|
115 |
return None
|
116 |
|
117 |
+
# only show this for upload
|
118 |
+
if state.ns.sourcetype in ["upload"]:
|
119 |
+
_ = st.expander("to be aligned", expanded=False)
|
120 |
+
with _:
|
121 |
+
st.write(df)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
|
123 |
logger.info("Processing data... %s", state.ns.beetype)
|
124 |
# if state.ns.beetype in ["ezbee", "dzbee"]:
|
|
|
156 |
# min_samples=min_samples,
|
157 |
)
|
158 |
except Exception as e:
|
|
|
159 |
logger.exception("aset = globals()[state.ns.beetype](...) exc: %s", e)
|
160 |
aset = ""
|
|
|
161 |
st.write("Collecting inputs...")
|
162 |
+
logger.debug("Collecting inputs...")
|
163 |
return None
|
164 |
+
|
165 |
st.success(f"Done, took {t.duration_human}")
|
166 |
|
167 |
else:
|
|
|
190 |
# aligned_pairs = gen_pairs(list1, list2, aset)
|
191 |
aligned_pairs = aset2pairs(list1, list2, aset)
|
192 |
if aligned_pairs:
|
193 |
+
# logger.debug("%s...%s", aligned_pairs[:1], aligned_pairs[-1:])
|
194 |
+
logger.debug("%s...s", aligned_pairs[:1])
|
195 |
|
196 |
df_a = pd.DataFrame(
|
197 |
aligned_pairs, columns=["text1", "text2", "llh"], dtype="object"
|
|
|
234 |
update_mode=GridUpdateMode.MODEL_CHANGED,
|
235 |
)
|
236 |
|
237 |
+
# reset
|
238 |
+
state.ns.updated = False
|
239 |
+
|
240 |
return None
|
litbee/pad.txt
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
if text1:
|
3 |
+
try:
|
4 |
+
list1 = [elm.strip() for elm in text1.splitlines() if elm.strip()]
|
5 |
+
state.ns.list1 = list1[:]
|
6 |
+
except Exception as e:
|
7 |
+
logger.warning("text1 to list1 errors: %s", e)
|
8 |
+
st.text_area(
|
9 |
+
label="Your pasted",
|
10 |
+
value=text1,
|
11 |
+
key="pasted_text1",
|
12 |
+
# help=""
|
13 |
+
height=500,
|
14 |
+
)
|
15 |
+
|
16 |
+
if text2:
|
17 |
+
try:
|
18 |
+
list1 = [elm.strip() for elm in text1.splitlines() if elm.strip()]
|
19 |
+
state.ns.list1 = list1[:]
|
20 |
+
except Exception as e:
|
21 |
+
logger.warning("text1 to list1 errors: %s", e)
|
litbee/settings.py
CHANGED
@@ -9,7 +9,7 @@ from streamlit import session_state as state
|
|
9 |
|
10 |
|
11 |
def settings():
|
12 |
-
"""Prep
|
13 |
|
14 |
Refer to options.py
|
15 |
"""
|
@@ -53,7 +53,7 @@ def settings():
|
|
53 |
index=index,
|
54 |
format_func=lambda x: f"{x:<8} |",
|
55 |
help="upload: one or two files; paste: from clipboard; urls: from the net",
|
56 |
-
disabled=True,
|
57 |
)
|
58 |
state.ns.sourcetype = sourcetype
|
59 |
|
|
|
9 |
|
10 |
|
11 |
def settings():
|
12 |
+
"""Prep Settings/Options page.
|
13 |
|
14 |
Refer to options.py
|
15 |
"""
|
|
|
53 |
index=index,
|
54 |
format_func=lambda x: f"{x:<8} |",
|
55 |
help="upload: one or two files; paste: from clipboard; urls: from the net",
|
56 |
+
# disabled=True,
|
57 |
)
|
58 |
state.ns.sourcetype = sourcetype
|
59 |
|
litbee/url2txt.py
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Fetch text from url."""
|
2 |
+
from typing import Optional
|
3 |
+
from urllib.parse import urlparse
|
4 |
+
|
5 |
+
import html2text
|
6 |
+
import httpx
|
7 |
+
import streamlit as st
|
8 |
+
from logzero import logger
|
9 |
+
from readability import Document
|
10 |
+
|
11 |
+
|
12 |
+
@st.cache
|
13 |
+
def url2txt(
|
14 |
+
url: str,
|
15 |
+
bodywidth: Optional[int] = 5000,
|
16 |
+
remove: bool = False,
|
17 |
+
show_url: bool = True,
|
18 |
+
ignore_links: bool = True,
|
19 |
+
) -> str:
|
20 |
+
"""Fetch text from url.
|
21 |
+
|
22 |
+
Args:
|
23 |
+
url: netloc from which to fetch text
|
24 |
+
bodywidth: if set to None, fall back to default bodywidth of
|
25 |
+
html2text.HTML2Text
|
26 |
+
remove: remove blank lines if set to True
|
27 |
+
show_url: prepend url if set to True
|
28 |
+
ignore_links: remove [ur](url)
|
29 |
+
|
30 |
+
Return:
|
31 |
+
main body in text
|
32 |
+
|
33 |
+
bodywidth: Optional[int] = 5000
|
34 |
+
remove: bool = False
|
35 |
+
show_url: bool = True
|
36 |
+
ignore_links: bool = True
|
37 |
+
"""
|
38 |
+
url = url.strip()
|
39 |
+
if not url.startswith("http"):
|
40 |
+
url = "http://" + url
|
41 |
+
|
42 |
+
logger.info("url: %s", url)
|
43 |
+
|
44 |
+
parsed = urlparse(url)
|
45 |
+
if not parsed.scheme or not parsed.netloc: # no scheme or netloc present
|
46 |
+
raise Exception(f"Invalid url: {url}")
|
47 |
+
|
48 |
+
try:
|
49 |
+
resp = httpx.get(url, timeout=30)
|
50 |
+
resp.raise_for_status()
|
51 |
+
except Exception as exc:
|
52 |
+
logger.error(exc)
|
53 |
+
raise
|
54 |
+
|
55 |
+
try:
|
56 |
+
content_type = resp.headers["content-type"]
|
57 |
+
except Exception as e:
|
58 |
+
logger.error(e)
|
59 |
+
content_type = ""
|
60 |
+
# output text if text/plain
|
61 |
+
if "text/plain" in content_type:
|
62 |
+
return resp.text
|
63 |
+
|
64 |
+
# handle html and the rest
|
65 |
+
try:
|
66 |
+
doc = Document(resp.text)
|
67 |
+
except Exception as exc:
|
68 |
+
logger.error(exc)
|
69 |
+
raise
|
70 |
+
|
71 |
+
if not doc.summary().strip():
|
72 |
+
raise Exception("No content for some reason...")
|
73 |
+
|
74 |
+
if bodywidth is not None:
|
75 |
+
handle = html2text.HTML2Text(bodywidth=bodywidth)
|
76 |
+
else:
|
77 |
+
handle = html2text.HTML2Text()
|
78 |
+
|
79 |
+
handle.ignore_links = ignore_links
|
80 |
+
|
81 |
+
try:
|
82 |
+
res = handle.handle(doc.summary())
|
83 |
+
except Exception as exc:
|
84 |
+
logger.error(exc)
|
85 |
+
raise
|
86 |
+
|
87 |
+
# remove double blank lines
|
88 |
+
if remove:
|
89 |
+
res = "\n".join(elm for elm in res.splitlines() if elm.strip())
|
90 |
+
|
91 |
+
if not res.strip(): # warn if empty output
|
92 |
+
logger.warning("Output seems to be empty...")
|
93 |
+
|
94 |
+
if show_url:
|
95 |
+
return f"{url}\n# {doc.title()}\n{res}"
|
96 |
+
|
97 |
+
return f"# {doc.title()}\n{res}"
|
poetry.lock
CHANGED
@@ -37,6 +37,23 @@ toolz = "*"
|
|
37 |
[package.extras]
|
38 |
dev = ["black", "docutils", "ipython", "flake8", "pytest", "sphinx", "mistune (<2.0.0)", "m2r", "vega-datasets", "recommonmark"]
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
[[package]]
|
41 |
name = "appnope"
|
42 |
version = "0.1.3"
|
@@ -216,6 +233,14 @@ python-versions = "*"
|
|
216 |
[package.dependencies]
|
217 |
pycparser = "*"
|
218 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
[[package]]
|
220 |
name = "charset-normalizer"
|
221 |
version = "2.0.12"
|
@@ -273,6 +298,14 @@ python-versions = "*"
|
|
273 |
[package.extras]
|
274 |
test = ["flake8 (==3.7.8)", "hypothesis (==3.55.3)"]
|
275 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
[[package]]
|
277 |
name = "de2en"
|
278 |
version = "0.1.1"
|
@@ -519,6 +552,14 @@ python-versions = "*"
|
|
519 |
[package.extras]
|
520 |
test = ["pytest", "sphinx", "sphinx-autobuild", "wheel", "twine"]
|
521 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
522 |
[[package]]
|
523 |
name = "hanzidentifier"
|
524 |
version = "1.0.2"
|
@@ -530,6 +571,52 @@ python-versions = "*"
|
|
530 |
[package.dependencies]
|
531 |
zhon = ">=1.1.3"
|
532 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
533 |
[[package]]
|
534 |
name = "icecream"
|
535 |
version = "2.1.2"
|
@@ -842,6 +929,20 @@ python-versions = "*"
|
|
842 |
[package.dependencies]
|
843 |
colorama = {version = "*", markers = "sys_platform == \"win32\""}
|
844 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
845 |
[[package]]
|
846 |
name = "markupsafe"
|
847 |
version = "2.1.1"
|
@@ -1463,6 +1564,22 @@ python-versions = ">=3.6"
|
|
1463 |
cffi = {version = "*", markers = "implementation_name == \"pypy\""}
|
1464 |
py = {version = "*", markers = "implementation_name == \"pypy\""}
|
1465 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1466 |
[[package]]
|
1467 |
name = "regex"
|
1468 |
version = "2022.4.24"
|
@@ -1489,6 +1606,20 @@ urllib3 = ">=1.21.1,<1.27"
|
|
1489 |
socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"]
|
1490 |
use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"]
|
1491 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1492 |
[[package]]
|
1493 |
name = "rich"
|
1494 |
version = "12.4.4"
|
@@ -1612,6 +1743,14 @@ category = "main"
|
|
1612 |
optional = false
|
1613 |
python-versions = ">=3.6"
|
1614 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1615 |
[[package]]
|
1616 |
name = "snowballstemmer"
|
1617 |
version = "2.2.0"
|
@@ -2027,7 +2166,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-
|
|
2027 |
[metadata]
|
2028 |
lock-version = "1.1"
|
2029 |
python-versions = "^3.8.3"
|
2030 |
-
content-hash = "
|
2031 |
|
2032 |
[metadata.files]
|
2033 |
about-time = [
|
@@ -2042,6 +2181,10 @@ altair = [
|
|
2042 |
{file = "altair-4.2.0-py3-none-any.whl", hash = "sha256:0c724848ae53410c13fa28be2b3b9a9dcb7b5caa1a70f7f217bd663bb419935a"},
|
2043 |
{file = "altair-4.2.0.tar.gz", hash = "sha256:d87d9372e63b48cd96b2a6415f0cf9457f50162ab79dc7a31cd7e024dd840026"},
|
2044 |
]
|
|
|
|
|
|
|
|
|
2045 |
appnope = [
|
2046 |
{file = "appnope-0.1.3-py2.py3-none-any.whl", hash = "sha256:265a455292d0bd8a72453494fa24df5a11eb18373a60c7c0430889f22548605e"},
|
2047 |
{file = "appnope-0.1.3.tar.gz", hash = "sha256:02bd91c4de869fbb1e1c50aafc4098827a7a54ab2f39d9dcba6c9547ed920e24"},
|
@@ -2213,6 +2356,10 @@ cffi = [
|
|
2213 |
{file = "cffi-1.15.0-cp39-cp39-win_amd64.whl", hash = "sha256:3773c4d81e6e818df2efbc7dd77325ca0dcb688116050fb2b3011218eda36139"},
|
2214 |
{file = "cffi-1.15.0.tar.gz", hash = "sha256:920f0d66a896c2d99f0adbb391f990a84091179542c205fa53ce5787aff87954"},
|
2215 |
]
|
|
|
|
|
|
|
|
|
2216 |
charset-normalizer = [
|
2217 |
{file = "charset-normalizer-2.0.12.tar.gz", hash = "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597"},
|
2218 |
{file = "charset_normalizer-2.0.12-py3-none-any.whl", hash = "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df"},
|
@@ -2233,6 +2380,10 @@ commonmark = [
|
|
2233 |
{file = "commonmark-0.9.1-py2.py3-none-any.whl", hash = "sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9"},
|
2234 |
{file = "commonmark-0.9.1.tar.gz", hash = "sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60"},
|
2235 |
]
|
|
|
|
|
|
|
|
|
2236 |
de2en = [
|
2237 |
{file = "de2en-0.1.1-py3-none-any.whl", hash = "sha256:6a04c77a04d3a8d426012b29a4561701416aaede64b650b4a1161c3b803fa928"},
|
2238 |
{file = "de2en-0.1.1.tar.gz", hash = "sha256:830dfc7a168cc1dcaea582655fc738db9add98ded30f56ddaca86b7b23dd6eff"},
|
@@ -2319,9 +2470,25 @@ gitpython = [
|
|
2319 |
grapheme = [
|
2320 |
{file = "grapheme-0.6.0.tar.gz", hash = "sha256:44c2b9f21bbe77cfb05835fec230bd435954275267fea1858013b102f8603cca"},
|
2321 |
]
|
|
|
|
|
|
|
|
|
2322 |
hanzidentifier = [
|
2323 |
{file = "hanzidentifier-1.0.2.tar.gz", hash = "sha256:793a298430aa9a9d6ab344dc0ca0ab4bd1161d88c7da941d6554571093003cba"},
|
2324 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2325 |
icecream = [
|
2326 |
{file = "icecream-2.1.2-py2.py3-none-any.whl", hash = "sha256:04b9cea4d9931cf6960db0430ebf11fa34464ce7152e384ddf50f96d25b201b4"},
|
2327 |
{file = "icecream-2.1.2.tar.gz", hash = "sha256:09300b2d1c678712410cbd47c95198eb1b580f66f311a554ccd6b9e758ece0ee"},
|
@@ -2406,6 +2573,71 @@ logzero = [
|
|
2406 |
{file = "logzero-1.7.0-py2.py3-none-any.whl", hash = "sha256:23eb1f717a2736f9ab91ca0d43160fd2c996ad49ae6bad34652d47aba908769d"},
|
2407 |
{file = "logzero-1.7.0.tar.gz", hash = "sha256:7f73ddd3ae393457236f081ffebd044a3aa2e423a47ae6ddb5179ab90d0ad082"},
|
2408 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2409 |
markupsafe = [
|
2410 |
{file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812"},
|
2411 |
{file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a"},
|
@@ -2931,6 +3163,10 @@ pyzmq = [
|
|
2931 |
{file = "pyzmq-23.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:4d861ae20040afc17adef33053c328667da78d4d3676b2936788fd031665e3a8"},
|
2932 |
{file = "pyzmq-23.0.0.tar.gz", hash = "sha256:a45f5c0477d12df05ef2e2922b49b7c0ae9d0f4ff9b6bb0d666558df0ef37122"},
|
2933 |
]
|
|
|
|
|
|
|
|
|
2934 |
regex = [
|
2935 |
{file = "regex-2022.4.24-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f86aef546add4ff1202e1f31e9bb54f9268f17d996b2428877283146bf9bc013"},
|
2936 |
{file = "regex-2022.4.24-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e944268445b5694f5d41292c9228f0ca46d5a32a67f195d5f8547c1f1d91f4bc"},
|
@@ -3011,6 +3247,10 @@ requests = [
|
|
3011 |
{file = "requests-2.27.1-py2.py3-none-any.whl", hash = "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"},
|
3012 |
{file = "requests-2.27.1.tar.gz", hash = "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61"},
|
3013 |
]
|
|
|
|
|
|
|
|
|
3014 |
rich = [
|
3015 |
{file = "rich-12.4.4-py3-none-any.whl", hash = "sha256:d2bbd99c320a2532ac71ff6a3164867884357da3e3301f0240090c5d2fdac7ec"},
|
3016 |
{file = "rich-12.4.4.tar.gz", hash = "sha256:4c586de507202505346f3e32d1363eb9ed6932f0c2f63184dea88983ff4971e2"},
|
@@ -3146,6 +3386,10 @@ smmap = [
|
|
3146 |
{file = "smmap-5.0.0-py3-none-any.whl", hash = "sha256:2aba19d6a040e78d8b09de5c57e96207b09ed71d8e55ce0959eeee6c8e190d94"},
|
3147 |
{file = "smmap-5.0.0.tar.gz", hash = "sha256:c840e62059cd3be204b0c9c9f74be2c09d5648eddd4580d9314c3ecde0b30936"},
|
3148 |
]
|
|
|
|
|
|
|
|
|
3149 |
snowballstemmer = [
|
3150 |
{file = "snowballstemmer-2.2.0-py2.py3-none-any.whl", hash = "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"},
|
3151 |
{file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"},
|
|
|
37 |
[package.extras]
|
38 |
dev = ["black", "docutils", "ipython", "flake8", "pytest", "sphinx", "mistune (<2.0.0)", "m2r", "vega-datasets", "recommonmark"]
|
39 |
|
40 |
+
[[package]]
|
41 |
+
name = "anyio"
|
42 |
+
version = "3.6.1"
|
43 |
+
description = "High level compatibility layer for multiple asynchronous event loop implementations"
|
44 |
+
category = "main"
|
45 |
+
optional = false
|
46 |
+
python-versions = ">=3.6.2"
|
47 |
+
|
48 |
+
[package.dependencies]
|
49 |
+
idna = ">=2.8"
|
50 |
+
sniffio = ">=1.1"
|
51 |
+
|
52 |
+
[package.extras]
|
53 |
+
doc = ["packaging", "sphinx-rtd-theme", "sphinx-autodoc-typehints (>=1.2.0)"]
|
54 |
+
test = ["coverage[toml] (>=4.5)", "hypothesis (>=4.0)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "contextlib2", "uvloop (<0.15)", "mock (>=4)", "uvloop (>=0.15)"]
|
55 |
+
trio = ["trio (>=0.16)"]
|
56 |
+
|
57 |
[[package]]
|
58 |
name = "appnope"
|
59 |
version = "0.1.3"
|
|
|
233 |
[package.dependencies]
|
234 |
pycparser = "*"
|
235 |
|
236 |
+
[[package]]
|
237 |
+
name = "chardet"
|
238 |
+
version = "4.0.0"
|
239 |
+
description = "Universal encoding detector for Python 2 and 3"
|
240 |
+
category = "main"
|
241 |
+
optional = false
|
242 |
+
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
243 |
+
|
244 |
[[package]]
|
245 |
name = "charset-normalizer"
|
246 |
version = "2.0.12"
|
|
|
298 |
[package.extras]
|
299 |
test = ["flake8 (==3.7.8)", "hypothesis (==3.55.3)"]
|
300 |
|
301 |
+
[[package]]
|
302 |
+
name = "cssselect"
|
303 |
+
version = "1.1.0"
|
304 |
+
description = "cssselect parses CSS3 Selectors and translates them to XPath 1.0"
|
305 |
+
category = "main"
|
306 |
+
optional = false
|
307 |
+
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
308 |
+
|
309 |
[[package]]
|
310 |
name = "de2en"
|
311 |
version = "0.1.1"
|
|
|
552 |
[package.extras]
|
553 |
test = ["pytest", "sphinx", "sphinx-autobuild", "wheel", "twine"]
|
554 |
|
555 |
+
[[package]]
|
556 |
+
name = "h11"
|
557 |
+
version = "0.12.0"
|
558 |
+
description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
|
559 |
+
category = "main"
|
560 |
+
optional = false
|
561 |
+
python-versions = ">=3.6"
|
562 |
+
|
563 |
[[package]]
|
564 |
name = "hanzidentifier"
|
565 |
version = "1.0.2"
|
|
|
571 |
[package.dependencies]
|
572 |
zhon = ">=1.1.3"
|
573 |
|
574 |
+
[[package]]
|
575 |
+
name = "html2text"
|
576 |
+
version = "2020.1.16"
|
577 |
+
description = "Turn HTML into equivalent Markdown-structured text."
|
578 |
+
category = "main"
|
579 |
+
optional = false
|
580 |
+
python-versions = ">=3.5"
|
581 |
+
|
582 |
+
[[package]]
|
583 |
+
name = "httpcore"
|
584 |
+
version = "0.15.0"
|
585 |
+
description = "A minimal low-level HTTP client."
|
586 |
+
category = "main"
|
587 |
+
optional = false
|
588 |
+
python-versions = ">=3.7"
|
589 |
+
|
590 |
+
[package.dependencies]
|
591 |
+
anyio = ">=3.0.0,<4.0.0"
|
592 |
+
certifi = "*"
|
593 |
+
h11 = ">=0.11,<0.13"
|
594 |
+
sniffio = ">=1.0.0,<2.0.0"
|
595 |
+
|
596 |
+
[package.extras]
|
597 |
+
http2 = ["h2 (>=3,<5)"]
|
598 |
+
socks = ["socksio (>=1.0.0,<2.0.0)"]
|
599 |
+
|
600 |
+
[[package]]
|
601 |
+
name = "httpx"
|
602 |
+
version = "0.23.0"
|
603 |
+
description = "The next generation HTTP client."
|
604 |
+
category = "main"
|
605 |
+
optional = false
|
606 |
+
python-versions = ">=3.7"
|
607 |
+
|
608 |
+
[package.dependencies]
|
609 |
+
certifi = "*"
|
610 |
+
httpcore = ">=0.15.0,<0.16.0"
|
611 |
+
rfc3986 = {version = ">=1.3,<2", extras = ["idna2008"]}
|
612 |
+
sniffio = "*"
|
613 |
+
|
614 |
+
[package.extras]
|
615 |
+
brotli = ["brotlicffi", "brotli"]
|
616 |
+
cli = ["click (>=8.0.0,<9.0.0)", "rich (>=10,<13)", "pygments (>=2.0.0,<3.0.0)"]
|
617 |
+
http2 = ["h2 (>=3,<5)"]
|
618 |
+
socks = ["socksio (>=1.0.0,<2.0.0)"]
|
619 |
+
|
620 |
[[package]]
|
621 |
name = "icecream"
|
622 |
version = "2.1.2"
|
|
|
929 |
[package.dependencies]
|
930 |
colorama = {version = "*", markers = "sys_platform == \"win32\""}
|
931 |
|
932 |
+
[[package]]
|
933 |
+
name = "lxml"
|
934 |
+
version = "4.9.0"
|
935 |
+
description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API."
|
936 |
+
category = "main"
|
937 |
+
optional = false
|
938 |
+
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*"
|
939 |
+
|
940 |
+
[package.extras]
|
941 |
+
cssselect = ["cssselect (>=0.7)"]
|
942 |
+
html5 = ["html5lib"]
|
943 |
+
htmlsoup = ["beautifulsoup4"]
|
944 |
+
source = ["Cython (>=0.29.7)"]
|
945 |
+
|
946 |
[[package]]
|
947 |
name = "markupsafe"
|
948 |
version = "2.1.1"
|
|
|
1564 |
cffi = {version = "*", markers = "implementation_name == \"pypy\""}
|
1565 |
py = {version = "*", markers = "implementation_name == \"pypy\""}
|
1566 |
|
1567 |
+
[[package]]
|
1568 |
+
name = "readability-lxml"
|
1569 |
+
version = "0.8.1"
|
1570 |
+
description = "fast html to text parser (article readability tool) with python 3 support"
|
1571 |
+
category = "main"
|
1572 |
+
optional = false
|
1573 |
+
python-versions = "*"
|
1574 |
+
|
1575 |
+
[package.dependencies]
|
1576 |
+
chardet = "*"
|
1577 |
+
cssselect = "*"
|
1578 |
+
lxml = "*"
|
1579 |
+
|
1580 |
+
[package.extras]
|
1581 |
+
test = ["timeout-decorator"]
|
1582 |
+
|
1583 |
[[package]]
|
1584 |
name = "regex"
|
1585 |
version = "2022.4.24"
|
|
|
1606 |
socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"]
|
1607 |
use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"]
|
1608 |
|
1609 |
+
[[package]]
|
1610 |
+
name = "rfc3986"
|
1611 |
+
version = "1.5.0"
|
1612 |
+
description = "Validating URI References per RFC 3986"
|
1613 |
+
category = "main"
|
1614 |
+
optional = false
|
1615 |
+
python-versions = "*"
|
1616 |
+
|
1617 |
+
[package.dependencies]
|
1618 |
+
idna = {version = "*", optional = true, markers = "extra == \"idna2008\""}
|
1619 |
+
|
1620 |
+
[package.extras]
|
1621 |
+
idna2008 = ["idna"]
|
1622 |
+
|
1623 |
[[package]]
|
1624 |
name = "rich"
|
1625 |
version = "12.4.4"
|
|
|
1743 |
optional = false
|
1744 |
python-versions = ">=3.6"
|
1745 |
|
1746 |
+
[[package]]
|
1747 |
+
name = "sniffio"
|
1748 |
+
version = "1.2.0"
|
1749 |
+
description = "Sniff out which async library your code is running under"
|
1750 |
+
category = "main"
|
1751 |
+
optional = false
|
1752 |
+
python-versions = ">=3.5"
|
1753 |
+
|
1754 |
[[package]]
|
1755 |
name = "snowballstemmer"
|
1756 |
version = "2.2.0"
|
|
|
2166 |
[metadata]
|
2167 |
lock-version = "1.1"
|
2168 |
python-versions = "^3.8.3"
|
2169 |
+
content-hash = "f594ea8ffe579f3c01e737f1d41548f25e8d16bf5a3dab9344aad0376fb456d9"
|
2170 |
|
2171 |
[metadata.files]
|
2172 |
about-time = [
|
|
|
2181 |
{file = "altair-4.2.0-py3-none-any.whl", hash = "sha256:0c724848ae53410c13fa28be2b3b9a9dcb7b5caa1a70f7f217bd663bb419935a"},
|
2182 |
{file = "altair-4.2.0.tar.gz", hash = "sha256:d87d9372e63b48cd96b2a6415f0cf9457f50162ab79dc7a31cd7e024dd840026"},
|
2183 |
]
|
2184 |
+
anyio = [
|
2185 |
+
{file = "anyio-3.6.1-py3-none-any.whl", hash = "sha256:cb29b9c70620506a9a8f87a309591713446953302d7d995344d0d7c6c0c9a7be"},
|
2186 |
+
{file = "anyio-3.6.1.tar.gz", hash = "sha256:413adf95f93886e442aea925f3ee43baa5a765a64a0f52c6081894f9992fdd0b"},
|
2187 |
+
]
|
2188 |
appnope = [
|
2189 |
{file = "appnope-0.1.3-py2.py3-none-any.whl", hash = "sha256:265a455292d0bd8a72453494fa24df5a11eb18373a60c7c0430889f22548605e"},
|
2190 |
{file = "appnope-0.1.3.tar.gz", hash = "sha256:02bd91c4de869fbb1e1c50aafc4098827a7a54ab2f39d9dcba6c9547ed920e24"},
|
|
|
2356 |
{file = "cffi-1.15.0-cp39-cp39-win_amd64.whl", hash = "sha256:3773c4d81e6e818df2efbc7dd77325ca0dcb688116050fb2b3011218eda36139"},
|
2357 |
{file = "cffi-1.15.0.tar.gz", hash = "sha256:920f0d66a896c2d99f0adbb391f990a84091179542c205fa53ce5787aff87954"},
|
2358 |
]
|
2359 |
+
chardet = [
|
2360 |
+
{file = "chardet-4.0.0-py2.py3-none-any.whl", hash = "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"},
|
2361 |
+
{file = "chardet-4.0.0.tar.gz", hash = "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa"},
|
2362 |
+
]
|
2363 |
charset-normalizer = [
|
2364 |
{file = "charset-normalizer-2.0.12.tar.gz", hash = "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597"},
|
2365 |
{file = "charset_normalizer-2.0.12-py3-none-any.whl", hash = "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df"},
|
|
|
2380 |
{file = "commonmark-0.9.1-py2.py3-none-any.whl", hash = "sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9"},
|
2381 |
{file = "commonmark-0.9.1.tar.gz", hash = "sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60"},
|
2382 |
]
|
2383 |
+
cssselect = [
|
2384 |
+
{file = "cssselect-1.1.0-py2.py3-none-any.whl", hash = "sha256:f612ee47b749c877ebae5bb77035d8f4202c6ad0f0fc1271b3c18ad6c4468ecf"},
|
2385 |
+
{file = "cssselect-1.1.0.tar.gz", hash = "sha256:f95f8dedd925fd8f54edb3d2dfb44c190d9d18512377d3c1e2388d16126879bc"},
|
2386 |
+
]
|
2387 |
de2en = [
|
2388 |
{file = "de2en-0.1.1-py3-none-any.whl", hash = "sha256:6a04c77a04d3a8d426012b29a4561701416aaede64b650b4a1161c3b803fa928"},
|
2389 |
{file = "de2en-0.1.1.tar.gz", hash = "sha256:830dfc7a168cc1dcaea582655fc738db9add98ded30f56ddaca86b7b23dd6eff"},
|
|
|
2470 |
grapheme = [
|
2471 |
{file = "grapheme-0.6.0.tar.gz", hash = "sha256:44c2b9f21bbe77cfb05835fec230bd435954275267fea1858013b102f8603cca"},
|
2472 |
]
|
2473 |
+
h11 = [
|
2474 |
+
{file = "h11-0.12.0-py3-none-any.whl", hash = "sha256:36a3cb8c0a032f56e2da7084577878a035d3b61d104230d4bd49c0c6b555a9c6"},
|
2475 |
+
{file = "h11-0.12.0.tar.gz", hash = "sha256:47222cb6067e4a307d535814917cd98fd0a57b6788ce715755fa2b6c28b56042"},
|
2476 |
+
]
|
2477 |
hanzidentifier = [
|
2478 |
{file = "hanzidentifier-1.0.2.tar.gz", hash = "sha256:793a298430aa9a9d6ab344dc0ca0ab4bd1161d88c7da941d6554571093003cba"},
|
2479 |
]
|
2480 |
+
html2text = [
|
2481 |
+
{file = "html2text-2020.1.16-py3-none-any.whl", hash = "sha256:c7c629882da0cf377d66f073329ccf34a12ed2adf0169b9285ae4e63ef54c82b"},
|
2482 |
+
{file = "html2text-2020.1.16.tar.gz", hash = "sha256:e296318e16b059ddb97f7a8a1d6a5c1d7af4544049a01e261731d2d5cc277bbb"},
|
2483 |
+
]
|
2484 |
+
httpcore = [
|
2485 |
+
{file = "httpcore-0.15.0-py3-none-any.whl", hash = "sha256:1105b8b73c025f23ff7c36468e4432226cbb959176eab66864b8e31c4ee27fa6"},
|
2486 |
+
{file = "httpcore-0.15.0.tar.gz", hash = "sha256:18b68ab86a3ccf3e7dc0f43598eaddcf472b602aba29f9aa6ab85fe2ada3980b"},
|
2487 |
+
]
|
2488 |
+
httpx = [
|
2489 |
+
{file = "httpx-0.23.0-py3-none-any.whl", hash = "sha256:42974f577483e1e932c3cdc3cd2303e883cbfba17fe228b0f63589764d7b9c4b"},
|
2490 |
+
{file = "httpx-0.23.0.tar.gz", hash = "sha256:f28eac771ec9eb4866d3fb4ab65abd42d38c424739e80c08d8d20570de60b0ef"},
|
2491 |
+
]
|
2492 |
icecream = [
|
2493 |
{file = "icecream-2.1.2-py2.py3-none-any.whl", hash = "sha256:04b9cea4d9931cf6960db0430ebf11fa34464ce7152e384ddf50f96d25b201b4"},
|
2494 |
{file = "icecream-2.1.2.tar.gz", hash = "sha256:09300b2d1c678712410cbd47c95198eb1b580f66f311a554ccd6b9e758ece0ee"},
|
|
|
2573 |
{file = "logzero-1.7.0-py2.py3-none-any.whl", hash = "sha256:23eb1f717a2736f9ab91ca0d43160fd2c996ad49ae6bad34652d47aba908769d"},
|
2574 |
{file = "logzero-1.7.0.tar.gz", hash = "sha256:7f73ddd3ae393457236f081ffebd044a3aa2e423a47ae6ddb5179ab90d0ad082"},
|
2575 |
]
|
2576 |
+
lxml = [
|
2577 |
+
{file = "lxml-4.9.0-cp27-cp27m-macosx_10_15_x86_64.whl", hash = "sha256:b5031d151d6147eac53366d6ec87da84cd4d8c5e80b1d9948a667a7164116e39"},
|
2578 |
+
{file = "lxml-4.9.0-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5d52e1173f52020392f593f87a6af2d4055dd800574a5cb0af4ea3878801d307"},
|
2579 |
+
{file = "lxml-4.9.0-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:3af00ee88376022589ceeb8170eb67dacf5f7cd625ea59fa0977d719777d4ae8"},
|
2580 |
+
{file = "lxml-4.9.0-cp27-cp27m-win32.whl", hash = "sha256:1057356b808d149bc14eb8f37bb89129f237df488661c1e0fc0376ca90e1d2c3"},
|
2581 |
+
{file = "lxml-4.9.0-cp27-cp27m-win_amd64.whl", hash = "sha256:f6d23a01921b741774f35e924d418a43cf03eca1444f3fdfd7978d35a5aaab8b"},
|
2582 |
+
{file = "lxml-4.9.0-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:56e19fb6e4b8bd07fb20028d03d3bc67bcc0621347fbde64f248e44839771756"},
|
2583 |
+
{file = "lxml-4.9.0-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:4cd69bca464e892ea4ed544ba6a7850aaff6f8d792f8055a10638db60acbac18"},
|
2584 |
+
{file = "lxml-4.9.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:94b181dd2777890139e49a5336bf3a9a3378ce66132c665fe8db4e8b7683cde2"},
|
2585 |
+
{file = "lxml-4.9.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:607224ffae9a0cf0a2f6e14f5f6bce43e83a6fbdaa647891729c103bdd6a5593"},
|
2586 |
+
{file = "lxml-4.9.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:11d62c97ceff9bab94b6b29c010ea5fb6831743459bb759c917f49ba75601cd0"},
|
2587 |
+
{file = "lxml-4.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:70a198030d26f5e569367f0f04509b63256faa76a22886280eea69a4f535dd40"},
|
2588 |
+
{file = "lxml-4.9.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:3cf816aed8125cfc9e6e5c6c31ff94278320d591bd7970c4a0233bee0d1c8790"},
|
2589 |
+
{file = "lxml-4.9.0-cp310-cp310-win32.whl", hash = "sha256:65b3b5f12c6fb5611e79157214f3cd533083f9b058bf2fc8a1c5cc5ee40fdc5a"},
|
2590 |
+
{file = "lxml-4.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:0aa4cce579512c33373ca4c5e23c21e40c1aa1a33533a75e51b654834fd0e4f2"},
|
2591 |
+
{file = "lxml-4.9.0-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:63419db39df8dc5564f6f103102c4665f7e4d9cb64030e98cf7a74eae5d5760d"},
|
2592 |
+
{file = "lxml-4.9.0-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:d8e5021e770b0a3084c30dda5901d5fce6d4474feaf0ced8f8e5a82702502fbb"},
|
2593 |
+
{file = "lxml-4.9.0-cp35-cp35m-win32.whl", hash = "sha256:f17b9df97c5ecdfb56c5e85b3c9df9831246df698f8581c6e111ac664c7c656e"},
|
2594 |
+
{file = "lxml-4.9.0-cp35-cp35m-win_amd64.whl", hash = "sha256:75da29a0752c8f2395df0115ac1681cefbdd4418676015be8178b733704cbff2"},
|
2595 |
+
{file = "lxml-4.9.0-cp36-cp36m-macosx_10_15_x86_64.whl", hash = "sha256:e4d020ecf3740b7312bacab2cb966bb720fd4d3490562d373b4ad91dd1857c0d"},
|
2596 |
+
{file = "lxml-4.9.0-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:b71c52d69b91af7d18c13aef1b0cc3baee36b78607c711eb14a52bf3aa7c815e"},
|
2597 |
+
{file = "lxml-4.9.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28cf04a1a38e961d4a764d2940af9b941b66263ed5584392ef875ee9c1e360a3"},
|
2598 |
+
{file = "lxml-4.9.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:915ecf7d486df17cc65aeefdb680d5ad4390cc8c857cf8db3fe241ed234f856a"},
|
2599 |
+
{file = "lxml-4.9.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e564d5a771b4015f34166a05ea2165b7e283635c41b1347696117f780084b46d"},
|
2600 |
+
{file = "lxml-4.9.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:c2a57755e366e0ac7ebdb3e9207f159c3bf1afed02392ab18453ce81f5ee92ee"},
|
2601 |
+
{file = "lxml-4.9.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:00f3a6f88fd5f4357844dd91a1abac5f466c6799f1b7f1da2df6665253845b11"},
|
2602 |
+
{file = "lxml-4.9.0-cp36-cp36m-win32.whl", hash = "sha256:9093a359a86650a3dbd6532c3e4d21a6f58ba2cb60d0e72db0848115d24c10ba"},
|
2603 |
+
{file = "lxml-4.9.0-cp36-cp36m-win_amd64.whl", hash = "sha256:d1690c4d37674a5f0cdafbc5ed7e360800afcf06928c2a024c779c046891bf09"},
|
2604 |
+
{file = "lxml-4.9.0-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:6af7f51a6010748fc1bb71917318d953c9673e4ae3f6d285aaf93ef5b2eb11c1"},
|
2605 |
+
{file = "lxml-4.9.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:eabdbe04ee0a7e760fa6cd9e799d2b020d098c580ba99107d52e1e5e538b1ecb"},
|
2606 |
+
{file = "lxml-4.9.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:b1e22f3ee4d75ca261b6bffbf64f6f178cb194b1be3191065a09f8d98828daa9"},
|
2607 |
+
{file = "lxml-4.9.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:53b0410b220766321759f7f9066da67b1d0d4a7f6636a477984cbb1d98483955"},
|
2608 |
+
{file = "lxml-4.9.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d76da27f5e3e9bc40eba6ed7a9e985f57547e98cf20521d91215707f2fb57e0f"},
|
2609 |
+
{file = "lxml-4.9.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:686565ac77ff94a8965c11829af253d9e2ce3bf0d9225b1d2eb5c4d4666d0dca"},
|
2610 |
+
{file = "lxml-4.9.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:b62d1431b4c40cda43cc986f19b8c86b1d2ae8918cfc00f4776fdf070b65c0c4"},
|
2611 |
+
{file = "lxml-4.9.0-cp37-cp37m-win32.whl", hash = "sha256:4becd16750ca5c2a1b1588269322b2cebd10c07738f336c922b658dbab96a61c"},
|
2612 |
+
{file = "lxml-4.9.0-cp37-cp37m-win_amd64.whl", hash = "sha256:e35a298691b9e10e5a5631f8f0ba605b30ebe19208dc8f58b670462f53753641"},
|
2613 |
+
{file = "lxml-4.9.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:aa7447bf7c1a15ef24e2b86a277b585dd3f055e8890ac7f97374d170187daa97"},
|
2614 |
+
{file = "lxml-4.9.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:612ef8f2795a89ba3a1d4c8c1af84d8453fd53ee611aa5ad460fdd2cab426fc2"},
|
2615 |
+
{file = "lxml-4.9.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:1bfb791a8fcdbf55d1d41b8be940393687bec0e9b12733f0796668086d1a23ff"},
|
2616 |
+
{file = "lxml-4.9.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:024684e0c5cfa121c22140d3a0898a3a9b2ea0f0fd2c229b6658af4bdf1155e5"},
|
2617 |
+
{file = "lxml-4.9.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:81c29c8741fa07ecec8ec7417c3d8d1e2f18cf5a10a280f4e1c3f8c3590228b2"},
|
2618 |
+
{file = "lxml-4.9.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6467626fa74f96f4d80fc6ec2555799e97fff8f36e0bfc7f67769f83e59cff40"},
|
2619 |
+
{file = "lxml-4.9.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:9cae837b988f44925d14d048fa6a8c54f197c8b1223fd9ee9c27084f84606143"},
|
2620 |
+
{file = "lxml-4.9.0-cp38-cp38-win32.whl", hash = "sha256:5a49ad78543925e1a4196e20c9c54492afa4f1502c2a563f73097e2044c75190"},
|
2621 |
+
{file = "lxml-4.9.0-cp38-cp38-win_amd64.whl", hash = "sha256:bb7c1b029e54e26e01b1d1d912fc21abb65650d16ea9a191d026def4ed0859ed"},
|
2622 |
+
{file = "lxml-4.9.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:d0d03b9636f1326772e6854459728676354d4c7731dae9902b180e2065ba3da6"},
|
2623 |
+
{file = "lxml-4.9.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:9af19eb789d674b59a9bee5005779757aab857c40bf9cc313cb01eafac55ce55"},
|
2624 |
+
{file = "lxml-4.9.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:dd00d28d1ab5fa7627f5abc957f29a6338a7395b724571a8cbff8fbed83aaa82"},
|
2625 |
+
{file = "lxml-4.9.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:754a1dd04bff8a509a31146bd8f3a5dc8191a8694d582dd5fb71ff09f0722c22"},
|
2626 |
+
{file = "lxml-4.9.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b7679344f2270840dc5babc9ccbedbc04f7473c1f66d4676bb01680c0db85bcc"},
|
2627 |
+
{file = "lxml-4.9.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:d882c2f3345261e898b9f604be76b61c901fbfa4ac32e3f51d5dc1edc89da3cb"},
|
2628 |
+
{file = "lxml-4.9.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4e97c8fc761ad63909198acc892f34c20f37f3baa2c50a62d5ec5d7f1efc68a1"},
|
2629 |
+
{file = "lxml-4.9.0-cp39-cp39-win32.whl", hash = "sha256:cf9ec915857d260511399ab87e1e70fa13d6b2972258f8e620a3959468edfc32"},
|
2630 |
+
{file = "lxml-4.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:1254a79f8a67a3908de725caf59eae62d86738f6387b0a34b32e02abd6ae73db"},
|
2631 |
+
{file = "lxml-4.9.0-pp37-pypy37_pp73-macosx_10_15_x86_64.whl", hash = "sha256:03370ec37fe562238d385e2c53089076dee53aabf8325cab964fdb04a9130fa0"},
|
2632 |
+
{file = "lxml-4.9.0-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:f386def57742aacc3d864169dfce644a8c396f95aa35b41b69df53f558d56dd0"},
|
2633 |
+
{file = "lxml-4.9.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:ea3f2e9eb41f973f73619e88bf7bd950b16b4c2ce73d15f24a11800ce1eaf276"},
|
2634 |
+
{file = "lxml-4.9.0-pp38-pypy38_pp73-macosx_10_15_x86_64.whl", hash = "sha256:2d10659e6e5c53298e6d718fd126e793285bff904bb71d7239a17218f6a197b7"},
|
2635 |
+
{file = "lxml-4.9.0-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:fcdf70191f0d1761d190a436db06a46f05af60e1410e1507935f0332280c9268"},
|
2636 |
+
{file = "lxml-4.9.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:2b9c2341d96926b0d0e132e5c49ef85eb53fa92ae1c3a70f9072f3db0d32bc07"},
|
2637 |
+
{file = "lxml-4.9.0-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:615886ee84b6f42f1bdf1852a9669b5fe3b96b6ff27f1a7a330b67ad9911200a"},
|
2638 |
+
{file = "lxml-4.9.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:94f2e45b054dd759bed137b6e14ae8625495f7d90ddd23cf62c7a68f72b62656"},
|
2639 |
+
{file = "lxml-4.9.0.tar.gz", hash = "sha256:520461c36727268a989790aef08884347cd41f2d8ae855489ccf40b50321d8d7"},
|
2640 |
+
]
|
2641 |
markupsafe = [
|
2642 |
{file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812"},
|
2643 |
{file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a"},
|
|
|
3163 |
{file = "pyzmq-23.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:4d861ae20040afc17adef33053c328667da78d4d3676b2936788fd031665e3a8"},
|
3164 |
{file = "pyzmq-23.0.0.tar.gz", hash = "sha256:a45f5c0477d12df05ef2e2922b49b7c0ae9d0f4ff9b6bb0d666558df0ef37122"},
|
3165 |
]
|
3166 |
+
readability-lxml = [
|
3167 |
+
{file = "readability-lxml-0.8.1.tar.gz", hash = "sha256:e51fea56b5909aaf886d307d48e79e096293255afa567b7d08bca94d25b1a4e1"},
|
3168 |
+
{file = "readability_lxml-0.8.1-py3-none-any.whl", hash = "sha256:e0d366a21b1bd6cca17de71a4e6ea16fcfaa8b0a5b4004e39e2c7eff884e6305"},
|
3169 |
+
]
|
3170 |
regex = [
|
3171 |
{file = "regex-2022.4.24-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f86aef546add4ff1202e1f31e9bb54f9268f17d996b2428877283146bf9bc013"},
|
3172 |
{file = "regex-2022.4.24-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e944268445b5694f5d41292c9228f0ca46d5a32a67f195d5f8547c1f1d91f4bc"},
|
|
|
3247 |
{file = "requests-2.27.1-py2.py3-none-any.whl", hash = "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"},
|
3248 |
{file = "requests-2.27.1.tar.gz", hash = "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61"},
|
3249 |
]
|
3250 |
+
rfc3986 = [
|
3251 |
+
{file = "rfc3986-1.5.0-py2.py3-none-any.whl", hash = "sha256:a86d6e1f5b1dc238b218b012df0aa79409667bb209e58da56d0b94704e712a97"},
|
3252 |
+
{file = "rfc3986-1.5.0.tar.gz", hash = "sha256:270aaf10d87d0d4e095063c65bf3ddbc6ee3d0b226328ce21e036f946e421835"},
|
3253 |
+
]
|
3254 |
rich = [
|
3255 |
{file = "rich-12.4.4-py3-none-any.whl", hash = "sha256:d2bbd99c320a2532ac71ff6a3164867884357da3e3301f0240090c5d2fdac7ec"},
|
3256 |
{file = "rich-12.4.4.tar.gz", hash = "sha256:4c586de507202505346f3e32d1363eb9ed6932f0c2f63184dea88983ff4971e2"},
|
|
|
3386 |
{file = "smmap-5.0.0-py3-none-any.whl", hash = "sha256:2aba19d6a040e78d8b09de5c57e96207b09ed71d8e55ce0959eeee6c8e190d94"},
|
3387 |
{file = "smmap-5.0.0.tar.gz", hash = "sha256:c840e62059cd3be204b0c9c9f74be2c09d5648eddd4580d9314c3ecde0b30936"},
|
3388 |
]
|
3389 |
+
sniffio = [
|
3390 |
+
{file = "sniffio-1.2.0-py3-none-any.whl", hash = "sha256:471b71698eac1c2112a40ce2752bb2f4a4814c22a54a3eed3676bc0f5ca9f663"},
|
3391 |
+
{file = "sniffio-1.2.0.tar.gz", hash = "sha256:c4666eecec1d3f50960c6bdf61ab7bc350648da6c126e3cf6898d8cd4ddcd3de"},
|
3392 |
+
]
|
3393 |
snowballstemmer = [
|
3394 |
{file = "snowballstemmer-2.2.0-py2.py3-none-any.whl", hash = "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"},
|
3395 |
{file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"},
|
pyproject.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
[tool.poetry]
|
2 |
name = "litbee"
|
3 |
-
version = "0.1.2-alpha.
|
4 |
description = "align (en, de, zh) texts via streamlit "
|
5 |
authors = ["ffreemt"]
|
6 |
license = "MIT"
|
@@ -26,6 +26,9 @@ plotly = "^5.8.0"
|
|
26 |
hanzidentifier = "^1.0.2"
|
27 |
opencc-python-reimplemented = "^0.1.6"
|
28 |
tabulate = "^0.8.9"
|
|
|
|
|
|
|
29 |
|
30 |
[tool.poe.executor]
|
31 |
type = "poetry"
|
|
|
1 |
[tool.poetry]
|
2 |
name = "litbee"
|
3 |
+
version = "0.1.2-alpha.1"
|
4 |
description = "align (en, de, zh) texts via streamlit "
|
5 |
authors = ["ffreemt"]
|
6 |
license = "MIT"
|
|
|
26 |
hanzidentifier = "^1.0.2"
|
27 |
opencc-python-reimplemented = "^0.1.6"
|
28 |
tabulate = "^0.8.9"
|
29 |
+
httpx = "^0.23.0"
|
30 |
+
html2text = "^2020.1.16"
|
31 |
+
readability-lxml = "^0.8.1"
|
32 |
|
33 |
[tool.poe.executor]
|
34 |
type = "poetry"
|