freemt commited on
Commit
b496854
·
1 Parent(s): 46a9657

Update dev added seg-text

Browse files
Files changed (3) hide show
  1. app.py +19 -4
  2. poetry.lock +158 -1
  3. pyproject.toml +1 -0
app.py CHANGED
@@ -7,6 +7,7 @@ from about_time import about_time
7
  from aset2pairs import aset2pairs
8
  from cmat2aset import cmat2aset
9
  from logzero import logger
 
10
  from typing import List, Optional, Union
11
 
12
  from radio_mlbee import __version__
@@ -25,6 +26,7 @@ def ml_fn(
25
  text1: str,
26
  text2: str,
27
  split_to_sents: bool = False,
 
28
  ) -> pd.DataFrame:
29
  """Align text1 text2"""
30
  text1 = str(text1)
@@ -42,7 +44,15 @@ def ml_fn(
42
  logger.error(" praras slpitlines erros: %s, setting to ['']", exc)
43
  paras2 = [""]
44
 
45
- # if split_to_sents: ... # TODO
 
 
 
 
 
 
 
 
46
 
47
  with about_time() as t:
48
  cmat = gen_cmat(paras1, paras2)
@@ -55,6 +65,10 @@ def ml_fn(
55
  pairs = aset2pairs(paras1, paras2, aset)
56
  df = pd.DataFrame(pairs, columns=["text1", "text2", "llh"])
57
 
 
 
 
 
58
  # return pd.DataFrame([["", "", ""]])
59
  # return df.to_html()
60
  return df
@@ -66,14 +80,15 @@ mlbee = gr.Interface(
66
  "textarea",
67
  "textarea",
68
  gr.Checkbox(label="Split to sents?"),
 
69
  ],
70
- outputs="dataframe",
71
  # outputs="html",
72
  title=f"radio-mlbee {__version__}",
73
  description="mlbee rest api on dev ",
74
  examples=[
75
- [text1, text2, False],
76
- # [text1[:len(text1) // 2], text2[:len(text2) // 2], False],
77
  ]
78
  )
79
 
 
7
  from aset2pairs import aset2pairs
8
  from cmat2aset import cmat2aset
9
  from logzero import logger
10
+ from seg_text impotr seg_text
11
  from typing import List, Optional, Union
12
 
13
  from radio_mlbee import __version__
 
26
  text1: str,
27
  text2: str,
28
  split_to_sents: bool = False,
29
+ preview: bool = False,
30
  ) -> pd.DataFrame:
31
  """Align text1 text2"""
32
  text1 = str(text1)
 
44
  logger.error(" praras slpitlines erros: %s, setting to ['']", exc)
45
  paras2 = [""]
46
 
47
+ if split_to_sents: # TODO
48
+ try:
49
+ paras1 = seg_text(paras1)
50
+ except Exception as exc:
51
+ logger.error(exc)
52
+ try:
53
+ paras2 = seg_text(paras2)
54
+ except Exception as exc:
55
+ logger.error(exc)
56
 
57
  with about_time() as t:
58
  cmat = gen_cmat(paras1, paras2)
 
65
  pairs = aset2pairs(paras1, paras2, aset)
66
  df = pd.DataFrame(pairs, columns=["text1", "text2", "llh"])
67
 
68
+ html = None
69
+ if preview:
70
+ html = df.to_html()
71
+
72
  # return pd.DataFrame([["", "", ""]])
73
  # return df.to_html()
74
  return df
 
80
  "textarea",
81
  "textarea",
82
  gr.Checkbox(label="Split to sents?"),
83
+ gr.Checkbox(label="Preview?"),
84
  ],
85
+ outputs=["dataframe", "html"],
86
  # outputs="html",
87
  title=f"radio-mlbee {__version__}",
88
  description="mlbee rest api on dev ",
89
  examples=[
90
+ # [text1, text2, False],
91
+ [text1[:len(text1) // 5], text2[:len(text2) // 5], False],
92
  ]
93
  )
94
 
poetry.lock CHANGED
@@ -317,6 +317,31 @@ dev = ["python-jose[cryptography] (>=3.3.0,<4.0.0)", "passlib[bcrypt] (>=1.7.2,<
317
  doc = ["mkdocs (>=1.1.2,<2.0.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "mdx-include (>=1.4.1,<2.0.0)", "mkdocs-markdownextradata-plugin (>=0.1.7,<0.3.0)", "typer (>=0.4.1,<0.5.0)", "pyyaml (>=5.3.1,<7.0.0)"]
318
  test = ["pytest (>=6.2.4,<7.0.0)", "pytest-cov (>=2.12.0,<4.0.0)", "mypy (==0.910)", "flake8 (>=3.8.3,<4.0.0)", "black (==22.3.0)", "isort (>=5.0.6,<6.0.0)", "requests (>=2.24.0,<3.0.0)", "httpx (>=0.14.0,<0.19.0)", "email_validator (>=1.1.1,<2.0.0)", "sqlalchemy (>=1.3.18,<1.5.0)", "peewee (>=3.13.3,<4.0.0)", "databases[sqlite] (>=0.3.2,<0.6.0)", "orjson (>=3.2.1,<4.0.0)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,<6.0.0)", "python-multipart (>=0.0.5,<0.0.6)", "flask (>=1.1.2,<3.0.0)", "anyio[trio] (>=3.2.1,<4.0.0)", "types-ujson (==4.2.1)", "types-orjson (==3.6.2)", "types-dataclasses (==0.6.5)"]
319
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320
  [[package]]
321
  name = "ffmpy"
322
  version = "0.3.0"
@@ -683,6 +708,17 @@ category = "main"
683
  optional = false
684
  python-versions = ">=3.5"
685
 
 
 
 
 
 
 
 
 
 
 
 
686
  [[package]]
687
  name = "multidict"
688
  version = "6.0.2"
@@ -833,6 +869,17 @@ category = "dev"
833
  optional = false
834
  python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
835
 
 
 
 
 
 
 
 
 
 
 
 
836
  [[package]]
837
  name = "pycodestyle"
838
  version = "2.7.0"
@@ -1063,6 +1110,48 @@ python-versions = ">=3.7"
1063
  [package.dependencies]
1064
  numpy = ">=1.16.5"
1065
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1066
  [[package]]
1067
  name = "sentence-transformers"
1068
  version = "2.2.0"
@@ -1103,6 +1192,18 @@ python-versions = ">=3.8.3,<4.0.0"
1103
  environs = ">=9.5.0,<10.0.0"
1104
  logzero = ">=1.7.0,<2.0.0"
1105
 
 
 
 
 
 
 
 
 
 
 
 
 
1106
  [[package]]
1107
  name = "setuptools-scm"
1108
  version = "6.4.2"
@@ -1388,6 +1489,19 @@ h11 = ">=0.8"
1388
  [package.extras]
1389
  standard = ["websockets (>=10.0)", "httptools (>=0.4.0)", "watchgod (>=0.6)", "python-dotenv (>=0.13)", "PyYAML (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "colorama (>=0.4)"]
1390
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1391
  [[package]]
1392
  name = "yarl"
1393
  version = "1.7.2"
@@ -1403,7 +1517,7 @@ multidict = ">=4.0"
1403
  [metadata]
1404
  lock-version = "1.1"
1405
  python-versions = "^3.8.3"
1406
- content-hash = "2d3903058f606b1b8e7ef9af655113bd876da2a06d6a5bb9119cda5e2a2c65ba"
1407
 
1408
  [metadata.files]
1409
  about-time = [
@@ -1684,6 +1798,13 @@ fastapi = [
1684
  {file = "fastapi-0.78.0-py3-none-any.whl", hash = "sha256:15fcabd5c78c266fa7ae7d8de9b384bfc2375ee0503463a6febbe3bab69d6f65"},
1685
  {file = "fastapi-0.78.0.tar.gz", hash = "sha256:3233d4a789ba018578658e2af1a4bb5e38bdd122ff722b313666a9b2c6786a83"},
1686
  ]
 
 
 
 
 
 
 
1687
  ffmpy = [
1688
  {file = "ffmpy-0.3.0.tar.gz", hash = "sha256:757591581eee25b4a50ac9ffb9b58035a2794533db47e0512f53fb2d7b6f9adc"},
1689
  ]
@@ -1963,6 +2084,10 @@ more-itertools = [
1963
  {file = "more-itertools-8.13.0.tar.gz", hash = "sha256:a42901a0a5b169d925f6f217cd5a190e32ef54360905b9c39ee7db5313bfec0f"},
1964
  {file = "more_itertools-8.13.0-py3-none-any.whl", hash = "sha256:c5122bffc5f104d37c1626b8615b511f3427aa5389b94d61e5ef8236bfbc3ddb"},
1965
  ]
 
 
 
 
1966
  multidict = [
1967
  {file = "multidict-6.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b9e95a740109c6047602f4db4da9949e6c5945cefbad34a1299775ddc9a62e2"},
1968
  {file = "multidict-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ac0e27844758d7177989ce406acc6a83c16ed4524ebc363c1f748cba184d89d3"},
@@ -2174,6 +2299,10 @@ py = [
2174
  {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"},
2175
  {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
2176
  ]
 
 
 
 
2177
  pycodestyle = [
2178
  {file = "pycodestyle-2.7.0-py2.py3-none-any.whl", hash = "sha256:514f76d918fcc0b55c6680472f0a37970994e07bbb80725808c17089be302068"},
2179
  {file = "pycodestyle-2.7.0.tar.gz", hash = "sha256:c389c1d06bf7904078ca03399a4816f974a1d590090fecea0c63ec26ebaf1cef"},
@@ -2458,6 +2587,18 @@ scipy = [
2458
  {file = "scipy-1.6.1-cp39-cp39-win_amd64.whl", hash = "sha256:a5193a098ae9f29af283dcf0041f762601faf2e595c0db1da929875b7570353f"},
2459
  {file = "scipy-1.6.1.tar.gz", hash = "sha256:c4fceb864890b6168e79b0e714c585dbe2fd4222768ee90bc1aa0f8218691b11"},
2460
  ]
 
 
 
 
 
 
 
 
 
 
 
 
2461
  sentence-transformers = [
2462
  {file = "sentence-transformers-2.2.0.tar.gz", hash = "sha256:c7365228b6d24e4d73cf49d409421820887ca43c86d49f264b68cfa00d761f96"},
2463
  ]
@@ -2511,6 +2652,10 @@ set-loglevel = [
2511
  {file = "set_loglevel-0.1.2-py3-none-any.whl", hash = "sha256:fcfe76ccd3791511e2cb51ec11ded6cc2186a727e441a934c547a211d0fdf773"},
2512
  {file = "set_loglevel-0.1.2.tar.gz", hash = "sha256:4da23414a798cea918801b113e17af092f13ccda96345203b14042b6d6d3a896"},
2513
  ]
 
 
 
 
2514
  setuptools-scm = [
2515
  {file = "setuptools_scm-6.4.2-py3-none-any.whl", hash = "sha256:acea13255093849de7ccb11af9e1fb8bde7067783450cee9ef7a93139bddf6d4"},
2516
  {file = "setuptools_scm-6.4.2.tar.gz", hash = "sha256:6833ac65c6ed9711a4d5d2266f8024cfa07c533a0e55f4c12f6eff280a5a9e30"},
@@ -2635,6 +2780,18 @@ uvicorn = [
2635
  {file = "uvicorn-0.17.6-py3-none-any.whl", hash = "sha256:19e2a0e96c9ac5581c01eb1a79a7d2f72bb479691acd2b8921fce48ed5b961a6"},
2636
  {file = "uvicorn-0.17.6.tar.gz", hash = "sha256:5180f9d059611747d841a4a4c4ab675edf54c8489e97f96d0583ee90ac3bfc23"},
2637
  ]
 
 
 
 
 
 
 
 
 
 
 
 
2638
  yarl = [
2639
  {file = "yarl-1.7.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f2a8508f7350512434e41065684076f640ecce176d262a7d54f0da41d99c5a95"},
2640
  {file = "yarl-1.7.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:da6df107b9ccfe52d3a48165e48d72db0eca3e3029b5b8cb4fe6ee3cb870ba8b"},
 
317
  doc = ["mkdocs (>=1.1.2,<2.0.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "mdx-include (>=1.4.1,<2.0.0)", "mkdocs-markdownextradata-plugin (>=0.1.7,<0.3.0)", "typer (>=0.4.1,<0.5.0)", "pyyaml (>=5.3.1,<7.0.0)"]
318
  test = ["pytest (>=6.2.4,<7.0.0)", "pytest-cov (>=2.12.0,<4.0.0)", "mypy (==0.910)", "flake8 (>=3.8.3,<4.0.0)", "black (==22.3.0)", "isort (>=5.0.6,<6.0.0)", "requests (>=2.24.0,<3.0.0)", "httpx (>=0.14.0,<0.19.0)", "email_validator (>=1.1.1,<2.0.0)", "sqlalchemy (>=1.3.18,<1.5.0)", "peewee (>=3.13.3,<4.0.0)", "databases[sqlite] (>=0.3.2,<0.6.0)", "orjson (>=3.2.1,<4.0.0)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,<6.0.0)", "python-multipart (>=0.0.5,<0.0.6)", "flask (>=1.1.2,<3.0.0)", "anyio[trio] (>=3.2.1,<4.0.0)", "types-ujson (==4.2.1)", "types-orjson (==3.6.2)", "types-dataclasses (==0.6.5)"]
319
 
320
+ [[package]]
321
+ name = "fastlid"
322
+ version = "0.1.7"
323
+ description = "Detect languages via a fasttext model"
324
+ category = "main"
325
+ optional = false
326
+ python-versions = ">=3.6,<4.0"
327
+
328
+ [package.dependencies]
329
+ fasttext = ">=0.9.2,<0.10.0"
330
+ logzero = ">=1.7.0,<2.0.0"
331
+ numpy = ">=1.20.3,<2.0.0"
332
+
333
+ [[package]]
334
+ name = "fasttext"
335
+ version = "0.9.2"
336
+ description = "fasttext Python bindings"
337
+ category = "main"
338
+ optional = false
339
+ python-versions = "*"
340
+
341
+ [package.dependencies]
342
+ numpy = "*"
343
+ pybind11 = ">=2.2"
344
+
345
  [[package]]
346
  name = "ffmpy"
347
  version = "0.3.0"
 
708
  optional = false
709
  python-versions = ">=3.5"
710
 
711
+ [[package]]
712
+ name = "morfessor"
713
+ version = "2.0.6"
714
+ description = "Morfessor"
715
+ category = "main"
716
+ optional = false
717
+ python-versions = "*"
718
+
719
+ [package.extras]
720
+ docs = ["sphinx", "sphinxcontrib-napoleon"]
721
+
722
  [[package]]
723
  name = "multidict"
724
  version = "6.0.2"
 
869
  optional = false
870
  python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
871
 
872
+ [[package]]
873
+ name = "pybind11"
874
+ version = "2.9.2"
875
+ description = "Seamless operability between C++11 and Python"
876
+ category = "main"
877
+ optional = false
878
+ python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
879
+
880
+ [package.extras]
881
+ global = ["pybind11-global (==2.9.2)"]
882
+
883
  [[package]]
884
  name = "pycodestyle"
885
  version = "2.7.0"
 
1110
  [package.dependencies]
1111
  numpy = ">=1.16.5"
1112
 
1113
+ [[package]]
1114
+ name = "seg-text"
1115
+ version = "0.1.2"
1116
+ description = "pack_name descr "
1117
+ category = "main"
1118
+ optional = false
1119
+ python-versions = ">=3.8,<4.0"
1120
+
1121
+ [package.dependencies]
1122
+ fastlid = ">=0.1.7,<0.2.0"
1123
+ icecream = ">=2.1.1,<3.0.0"
1124
+ install = ">=1.3.5,<2.0.0"
1125
+ logzero = ">=1.7.0,<2.0.0"
1126
+ Morfessor = "2.0.6"
1127
+ numpy = ">=1.22.2,<2.0.0"
1128
+ sentence-splitter = ">=1.4,<2.0"
1129
+ tqdm = ">=4.62.3,<5.0.0"
1130
+ vtext = ">=0.2.0,<0.3.0"
1131
+
1132
+ [[package]]
1133
+ name = "semantic-version"
1134
+ version = "2.10.0"
1135
+ description = "A library implementing the 'SemVer' scheme."
1136
+ category = "main"
1137
+ optional = false
1138
+ python-versions = ">=2.7"
1139
+
1140
+ [package.extras]
1141
+ dev = ["Django (>=1.11)", "nose2", "tox", "check-manifest", "coverage", "flake8", "wheel", "zest.releaser", "readme-renderer (<25.0)", "colorama (<=0.4.1)"]
1142
+ doc = ["sphinx", "sphinx-rtd-theme"]
1143
+
1144
+ [[package]]
1145
+ name = "sentence-splitter"
1146
+ version = "1.4"
1147
+ description = "Text to sentence splitter using heuristic algorithm by Philipp Koehn and Josh Schroeder"
1148
+ category = "main"
1149
+ optional = false
1150
+ python-versions = ">=3.5"
1151
+
1152
+ [package.dependencies]
1153
+ regex = ">=2017.12.12"
1154
+
1155
  [[package]]
1156
  name = "sentence-transformers"
1157
  version = "2.2.0"
 
1192
  environs = ">=9.5.0,<10.0.0"
1193
  logzero = ">=1.7.0,<2.0.0"
1194
 
1195
+ [[package]]
1196
+ name = "setuptools-rust"
1197
+ version = "1.3.0"
1198
+ description = "Setuptools Rust extension plugin"
1199
+ category = "main"
1200
+ optional = false
1201
+ python-versions = ">=3.7"
1202
+
1203
+ [package.dependencies]
1204
+ semantic-version = ">=2.8.2,<3"
1205
+ typing-extensions = ">=3.7.4.3"
1206
+
1207
  [[package]]
1208
  name = "setuptools-scm"
1209
  version = "6.4.2"
 
1489
  [package.extras]
1490
  standard = ["websockets (>=10.0)", "httptools (>=0.4.0)", "watchgod (>=0.6)", "python-dotenv (>=0.13)", "PyYAML (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "colorama (>=0.4)"]
1491
 
1492
+ [[package]]
1493
+ name = "vtext"
1494
+ version = "0.2.0"
1495
+ description = "Natural Language Processing in Rust with Python bidings"
1496
+ category = "main"
1497
+ optional = false
1498
+ python-versions = ">=3.6"
1499
+
1500
+ [package.dependencies]
1501
+ numpy = ">=1.15.0"
1502
+ scipy = ">=1.1.0"
1503
+ setuptools-rust = ">=0.10.2"
1504
+
1505
  [[package]]
1506
  name = "yarl"
1507
  version = "1.7.2"
 
1517
  [metadata]
1518
  lock-version = "1.1"
1519
  python-versions = "^3.8.3"
1520
+ content-hash = "8a3e87bc76faae5ea5b48c1e0cdb5bf98a1b671e2d90baa30696f228d8a6113b"
1521
 
1522
  [metadata.files]
1523
  about-time = [
 
1798
  {file = "fastapi-0.78.0-py3-none-any.whl", hash = "sha256:15fcabd5c78c266fa7ae7d8de9b384bfc2375ee0503463a6febbe3bab69d6f65"},
1799
  {file = "fastapi-0.78.0.tar.gz", hash = "sha256:3233d4a789ba018578658e2af1a4bb5e38bdd122ff722b313666a9b2c6786a83"},
1800
  ]
1801
+ fastlid = [
1802
+ {file = "fastlid-0.1.7-py3-none-any.whl", hash = "sha256:591dbee44ac501c9aa89abb97a13b11cf964c3b8c4add1bdf02b44d30463e18f"},
1803
+ {file = "fastlid-0.1.7.tar.gz", hash = "sha256:a6693ea05b9e070b4656ce9320704688c0c0c6f09bf873d0add5184e96bdb055"},
1804
+ ]
1805
+ fasttext = [
1806
+ {file = "fasttext-0.9.2.tar.gz", hash = "sha256:665556f1f6dcb4fcbe25fa8ebcd4f71b18fa96a090de09d88d97a60cbd29dcb5"},
1807
+ ]
1808
  ffmpy = [
1809
  {file = "ffmpy-0.3.0.tar.gz", hash = "sha256:757591581eee25b4a50ac9ffb9b58035a2794533db47e0512f53fb2d7b6f9adc"},
1810
  ]
 
2084
  {file = "more-itertools-8.13.0.tar.gz", hash = "sha256:a42901a0a5b169d925f6f217cd5a190e32ef54360905b9c39ee7db5313bfec0f"},
2085
  {file = "more_itertools-8.13.0-py3-none-any.whl", hash = "sha256:c5122bffc5f104d37c1626b8615b511f3427aa5389b94d61e5ef8236bfbc3ddb"},
2086
  ]
2087
+ morfessor = [
2088
+ {file = "Morfessor-2.0.6-py3-none-any.whl", hash = "sha256:7215e37909ebd2bafeeec5fdf4e339a25e61aee4895ff99317b9fb44eddab562"},
2089
+ {file = "Morfessor-2.0.6.tar.gz", hash = "sha256:bb3beac234341724c5f640f65803071f62373a50dba854d5a398567f9aefbab2"},
2090
+ ]
2091
  multidict = [
2092
  {file = "multidict-6.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b9e95a740109c6047602f4db4da9949e6c5945cefbad34a1299775ddc9a62e2"},
2093
  {file = "multidict-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ac0e27844758d7177989ce406acc6a83c16ed4524ebc363c1f748cba184d89d3"},
 
2299
  {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"},
2300
  {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
2301
  ]
2302
+ pybind11 = [
2303
+ {file = "pybind11-2.9.2-py2.py3-none-any.whl", hash = "sha256:20f56674da31c96bca7569b91e60f2bd30d693f0728541412ec927574f7bc9df"},
2304
+ {file = "pybind11-2.9.2.tar.gz", hash = "sha256:e5541f8bccf9111d1a94f7897593b55c4cf1a28d5e8cfc8225a855651f011071"},
2305
+ ]
2306
  pycodestyle = [
2307
  {file = "pycodestyle-2.7.0-py2.py3-none-any.whl", hash = "sha256:514f76d918fcc0b55c6680472f0a37970994e07bbb80725808c17089be302068"},
2308
  {file = "pycodestyle-2.7.0.tar.gz", hash = "sha256:c389c1d06bf7904078ca03399a4816f974a1d590090fecea0c63ec26ebaf1cef"},
 
2587
  {file = "scipy-1.6.1-cp39-cp39-win_amd64.whl", hash = "sha256:a5193a098ae9f29af283dcf0041f762601faf2e595c0db1da929875b7570353f"},
2588
  {file = "scipy-1.6.1.tar.gz", hash = "sha256:c4fceb864890b6168e79b0e714c585dbe2fd4222768ee90bc1aa0f8218691b11"},
2589
  ]
2590
+ seg-text = [
2591
+ {file = "seg_text-0.1.2-py3-none-any.whl", hash = "sha256:9e67af219b81259d916a11708799ef52bb7d765f9d8010028dd8d48e053eda17"},
2592
+ {file = "seg_text-0.1.2.tar.gz", hash = "sha256:37332d6fa755659aba3d93faa1248242c56a5a84e1f9332990802226ad9c4ca8"},
2593
+ ]
2594
+ semantic-version = [
2595
+ {file = "semantic_version-2.10.0-py2.py3-none-any.whl", hash = "sha256:de78a3b8e0feda74cabc54aab2da702113e33ac9d9eb9d2389bcf1f58b7d9177"},
2596
+ {file = "semantic_version-2.10.0.tar.gz", hash = "sha256:bdabb6d336998cbb378d4b9db3a4b56a1e3235701dc05ea2690d9a997ed5041c"},
2597
+ ]
2598
+ sentence-splitter = [
2599
+ {file = "sentence_splitter-1.4-py2.py3-none-any.whl", hash = "sha256:5645a3ad9c348e4287f4bc73bd573d92dccd4139042fddd51fff0591f1376763"},
2600
+ {file = "sentence_splitter-1.4.tar.gz", hash = "sha256:3d1d773d07cc733ca2955aa87d0fa1c0a7274c6bdeec1daac5c5e92efb512f63"},
2601
+ ]
2602
  sentence-transformers = [
2603
  {file = "sentence-transformers-2.2.0.tar.gz", hash = "sha256:c7365228b6d24e4d73cf49d409421820887ca43c86d49f264b68cfa00d761f96"},
2604
  ]
 
2652
  {file = "set_loglevel-0.1.2-py3-none-any.whl", hash = "sha256:fcfe76ccd3791511e2cb51ec11ded6cc2186a727e441a934c547a211d0fdf773"},
2653
  {file = "set_loglevel-0.1.2.tar.gz", hash = "sha256:4da23414a798cea918801b113e17af092f13ccda96345203b14042b6d6d3a896"},
2654
  ]
2655
+ setuptools-rust = [
2656
+ {file = "setuptools-rust-1.3.0.tar.gz", hash = "sha256:958c5bf4ab6483d59dab888538121871cc5006354a42fb0fbd50acf03caad1de"},
2657
+ {file = "setuptools_rust-1.3.0-py3-none-any.whl", hash = "sha256:7ead7398d6b6fe70a7743408dc2f7257dbcb8ca9b2d7a9f8b281c09bd86f36a5"},
2658
+ ]
2659
  setuptools-scm = [
2660
  {file = "setuptools_scm-6.4.2-py3-none-any.whl", hash = "sha256:acea13255093849de7ccb11af9e1fb8bde7067783450cee9ef7a93139bddf6d4"},
2661
  {file = "setuptools_scm-6.4.2.tar.gz", hash = "sha256:6833ac65c6ed9711a4d5d2266f8024cfa07c533a0e55f4c12f6eff280a5a9e30"},
 
2780
  {file = "uvicorn-0.17.6-py3-none-any.whl", hash = "sha256:19e2a0e96c9ac5581c01eb1a79a7d2f72bb479691acd2b8921fce48ed5b961a6"},
2781
  {file = "uvicorn-0.17.6.tar.gz", hash = "sha256:5180f9d059611747d841a4a4c4ab675edf54c8489e97f96d0583ee90ac3bfc23"},
2782
  ]
2783
+ vtext = [
2784
+ {file = "vtext-0.2.0-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:1791aad4a999525a7c19ae25ffdeb491839e81e958995567151a3bf8012c32ff"},
2785
+ {file = "vtext-0.2.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:7ddde99b3153f7bf439b06f69f221c59945b1ce103368ce3a4957e7112ab904b"},
2786
+ {file = "vtext-0.2.0-cp36-cp36m-win_amd64.whl", hash = "sha256:c54d2b4496afa0d8687345b2b89bed7e9aa03b223f0dc58ac923348d0f879a2c"},
2787
+ {file = "vtext-0.2.0-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:397823cda22d04de43312e27cbe74be4318c20ec2ef38df9c66493580be06ec8"},
2788
+ {file = "vtext-0.2.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:be3d75845af06d92af9fb65dde8c37ea890f8ed00bb236884fe3b8e2c4b08e32"},
2789
+ {file = "vtext-0.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:1fa5b18b31637ce012fdfddb1c6a207989320bcf246d5f131695c9fc92b2a32c"},
2790
+ {file = "vtext-0.2.0-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:c7a7826a44b81e9d1779bc800a5ee133647c7943c52b434ae8415df18933f77f"},
2791
+ {file = "vtext-0.2.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:eb37f4b72cf754ff20323f11519da9d3864c7f0a428be847da2ed55a3665cc44"},
2792
+ {file = "vtext-0.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:02c1dbefd2b6fd3522a96a9bd8f8e85ae4722ee088e2d952bbec830b0e88727c"},
2793
+ {file = "vtext-0.2.0.tar.gz", hash = "sha256:0ce1b0bb7e1cc0adcf5c8064757adaa6ea7bf52e366a3d30d2eac0588145f0e6"},
2794
+ ]
2795
  yarl = [
2796
  {file = "yarl-1.7.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f2a8508f7350512434e41065684076f640ecce176d262a7d54f0da41d99c5a95"},
2797
  {file = "yarl-1.7.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:da6df107b9ccfe52d3a48165e48d72db0eca3e3029b5b8cb4fe6ee3cb870ba8b"},
pyproject.toml CHANGED
@@ -25,6 +25,7 @@ cmat2aset = "^0.1.0-alpha.7"
25
  more-itertools = "^8.13.0"
26
  cchardet = "^2.1.7"
27
  typer = "^0.4.1"
 
28
 
29
  [tool.poe.executor]
30
  type = "poetry"
 
25
  more-itertools = "^8.13.0"
26
  cchardet = "^2.1.7"
27
  typer = "^0.4.1"
28
+ seg-text = "^0.1.2"
29
 
30
  [tool.poe.executor]
31
  type = "poetry"