Abhinav Gavireddi commited on
Commit
ba6dae6
·
1 Parent(s): fdfb25d

[fix]: fixed pdf parsing

Browse files
Files changed (2) hide show
  1. requirements.in +48 -0
  2. requirements.txt +789 -47
requirements.in ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core
2
+ streamlit>=1.25.0
3
+ sentence-transformers>=2.2.2 # Re-enabled for local embeddings
4
+ # rank-bm25>=0.2.2 - Replaced by ChromaDB
5
+ # hnswlib>=0.7.0 - Replaced by ChromaDB
6
+ chromadb>=0.4.18
7
+ huggingface-hub>=0.16.4
8
+ langchain>=0.1.9
9
+ langchain-openai>=0.1.9
10
+ python-dotenv>=1.0.0
11
+ structlog>=23.1.0
12
+ bleach>=6.0.0
13
+ werkzeug>=2.0.0
14
+ boto3>=1.28.43
15
+ Brotli>=1.1.0
16
+ click>=8.1.7
17
+ PyMuPDF>=1.24.9,<1.25.0
18
+ loguru>=0.6.0
19
+ numpy>=1.21.6,<2.0.0
20
+ fast-langdetect>=0.2.3,<0.3.0
21
+ scikit-learn>=1.0.2
22
+ pdfminer.six>=20231228
23
+ torch>=2.6.0
24
+ torchvision
25
+ # matplotlib>=3.10 - Removed, not used in the app
26
+ ultralytics>=8.3.48
27
+ rapid-table>=1.0.3,<2.0.0
28
+ doclayout-yolo==0.0.2b1
29
+ dill>=0.3.9,<1
30
+ PyYAML>=6.0.2,<7
31
+ ftfy>=6.3.1,<7
32
+ openai>=1.70.0,<2
33
+ pydantic>=2.7.2,<2.11
34
+ # transformers>=4.49.0,<5.0.0 - Removed as reranker is disabled
35
+ gradio-pdf>=0.0.21
36
+ shapely>=2.0.7,<3
37
+ pyclipper>=1.3.0,<2
38
+ omegaconf>=2.3.0,<3
39
+ tqdm>=4.67.1
40
+ # MinerU
41
+ git+https://github.com/opendatalab/MinerU.git@dev
42
+ chroma-hnswlib>=0.7.3
43
+ chromadb>=0.4.24
44
+ PyMuPDF>=1.23.26
45
+ tiktoken>=0.6.0
46
+ loguru>=0.7.2
47
+ unstructured>=0.12.6
48
+ magic_pdf>=0.9.12
requirements.txt CHANGED
@@ -1,48 +1,790 @@
1
- # Core
2
- streamlit>=1.25.0
3
- sentence-transformers>=2.2.2 # Re-enabled for local embeddings
4
- # rank-bm25>=0.2.2 - Replaced by ChromaDB
5
- # hnswlib>=0.7.0 - Replaced by ChromaDB
6
- chromadb>=0.4.18
7
- huggingface-hub>=0.16.4
8
- langchain>=0.1.9
9
- langchain-openai>=0.1.9
10
- python-dotenv>=1.0.0
11
- structlog>=23.1.0
12
- bleach>=6.0.0
13
- werkzeug>=2.0.0
14
- boto3>=1.28.43
15
- Brotli>=1.1.0
16
- click>=8.1.7
17
- PyMuPDF>=1.24.9,<1.25.0
18
- loguru>=0.6.0
19
- numpy>=1.21.6,<2.0.0
20
- fast-langdetect>=0.2.3,<0.3.0
21
- scikit-learn>=1.0.2
22
- pdfminer.six>=20231228
23
- torch>=2.6.0
24
- torchvision
25
- # matplotlib>=3.10 - Removed, not used in the app
26
- ultralytics>=8.3.48
27
- rapid-table>=1.0.3,<2.0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  doclayout-yolo==0.0.2b1
29
- dill>=0.3.9,<1
30
- PyYAML>=6.0.2,<7
31
- ftfy>=6.3.1,<7
32
- openai>=1.70.0,<2
33
- pydantic>=2.7.2,<2.11
34
- # transformers>=4.49.0,<5.0.0 - Removed as reranker is disabled
35
- gradio-pdf>=0.0.21
36
- shapely>=2.0.7,<3
37
- pyclipper>=1.3.0,<2
38
- omegaconf>=2.3.0,<3
39
- tqdm>=4.67.1
40
- # MinerU
41
- git+https://github.com/opendatalab/MinerU.git@dev
42
- chroma-hnswlib>=0.7.3
43
- chromadb>=0.4.24
44
- PyMuPDF>=1.23.26
45
- tiktoken>=0.6.0
46
- loguru>=0.7.2
47
- unstructured>=0.12.6
48
- magic_pdf>=0.9.12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file was autogenerated by uv via the following command:
2
+ # uv pip compile requirements.in -o requirements.txt
3
+ aiofiles==24.1.0
4
+ # via
5
+ # gradio
6
+ # unstructured-client
7
+ albucore==0.0.24
8
+ # via albumentations
9
+ albumentations==2.0.8
10
+ # via doclayout-yolo
11
+ altair==5.5.0
12
+ # via streamlit
13
+ annotated-types==0.7.0
14
+ # via pydantic
15
+ antlr4-python3-runtime==4.9.3
16
+ # via omegaconf
17
+ anyio==4.9.0
18
+ # via
19
+ # gradio
20
+ # httpx
21
+ # openai
22
+ # starlette
23
+ # watchfiles
24
+ attrs==25.3.0
25
+ # via
26
+ # jsonschema
27
+ # referencing
28
+ backoff==1.11.1
29
+ # via
30
+ # opentelemetry-exporter-otlp-proto-grpc
31
+ # posthog
32
+ # unstructured
33
+ bcrypt==4.3.0
34
+ # via chromadb
35
+ beautifulsoup4==4.13.4
36
+ # via unstructured
37
+ bleach==6.2.0
38
+ # via -r requirements.in
39
+ blinker==1.9.0
40
+ # via streamlit
41
+ boto3==1.38.40
42
+ # via
43
+ # -r requirements.in
44
+ # magic-pdf
45
+ # mineru
46
+ botocore==1.38.40
47
+ # via
48
+ # boto3
49
+ # s3transfer
50
+ brotli==1.1.0
51
+ # via
52
+ # -r requirements.in
53
+ # magic-pdf
54
+ build==1.2.2.post1
55
+ # via chromadb
56
+ cachetools==6.1.0
57
+ # via
58
+ # google-auth
59
+ # streamlit
60
+ certifi==2025.6.15
61
+ # via
62
+ # httpcore
63
+ # httpx
64
+ # kubernetes
65
+ # requests
66
+ cffi==1.17.1
67
+ # via cryptography
68
+ chardet==5.2.0
69
+ # via unstructured
70
+ charset-normalizer==3.4.2
71
+ # via
72
+ # pdfminer-six
73
+ # reportlab
74
+ # requests
75
+ chroma-hnswlib==0.7.6
76
+ # via -r requirements.in
77
+ chromadb==1.0.13
78
+ # via -r requirements.in
79
+ click==8.2.1
80
+ # via
81
+ # -r requirements.in
82
+ # magic-pdf
83
+ # mineru
84
+ # nltk
85
+ # pdftext
86
+ # python-oxmsg
87
+ # streamlit
88
+ # typer
89
+ # uvicorn
90
+ coloredlogs==15.0.1
91
+ # via onnxruntime
92
+ colorlog==6.9.0
93
+ # via
94
+ # rapid-table
95
+ # robust-downloader
96
+ contourpy==1.3.2
97
+ # via matplotlib
98
+ cryptography==45.0.4
99
+ # via
100
+ # pdfminer-six
101
+ # unstructured-client
102
+ cycler==0.12.1
103
+ # via matplotlib
104
+ dataclasses-json==0.6.7
105
+ # via unstructured
106
+ dill==0.4.0
107
+ # via -r requirements.in
108
+ distro==1.9.0
109
+ # via
110
+ # openai
111
+ # posthog
112
  doclayout-yolo==0.0.2b1
113
+ # via -r requirements.in
114
+ durationpy==0.10
115
+ # via kubernetes
116
+ emoji==2.14.1
117
+ # via unstructured
118
+ eval-type-backport==0.2.2
119
+ # via unstructured-client
120
+ fast-langdetect==0.2.5
121
+ # via
122
+ # -r requirements.in
123
+ # magic-pdf
124
+ fastapi==0.115.13
125
+ # via gradio
126
+ fasttext-predict==0.9.2.4
127
+ # via fast-langdetect
128
+ ffmpy==0.6.0
129
+ # via gradio
130
+ filelock==3.18.0
131
+ # via
132
+ # huggingface-hub
133
+ # torch
134
+ # transformers
135
+ filetype==1.2.0
136
+ # via unstructured
137
+ flatbuffers==25.2.10
138
+ # via onnxruntime
139
+ fonttools==4.58.4
140
+ # via matplotlib
141
+ fsspec==2025.5.1
142
+ # via
143
+ # gradio-client
144
+ # huggingface-hub
145
+ # torch
146
+ ftfy==6.3.1
147
+ # via -r requirements.in
148
+ gitdb==4.0.12
149
+ # via gitpython
150
+ gitpython==3.1.44
151
+ # via streamlit
152
+ google-auth==1.6.3
153
+ # via kubernetes
154
+ googleapis-common-protos==1.70.0
155
+ # via opentelemetry-exporter-otlp-proto-grpc
156
+ gradio==5.34.2
157
+ # via gradio-pdf
158
+ gradio-client==1.10.3
159
+ # via gradio
160
+ gradio-pdf==0.0.22
161
+ # via -r requirements.in
162
+ groovy==0.1.2
163
+ # via gradio
164
+ grpcio==1.73.0
165
+ # via
166
+ # chromadb
167
+ # opentelemetry-exporter-otlp-proto-grpc
168
+ h11==0.16.0
169
+ # via
170
+ # httpcore
171
+ # uvicorn
172
+ hf-xet==1.1.4
173
+ # via huggingface-hub
174
+ html5lib==1.1
175
+ # via unstructured
176
+ httpcore==1.0.9
177
+ # via httpx
178
+ httptools==0.6.4
179
+ # via uvicorn
180
+ httpx==0.28.1
181
+ # via
182
+ # chromadb
183
+ # gradio
184
+ # gradio-client
185
+ # langsmith
186
+ # mineru
187
+ # openai
188
+ # safehttpx
189
+ # unstructured-client
190
+ huggingface-hub==0.33.0
191
+ # via
192
+ # -r requirements.in
193
+ # gradio
194
+ # gradio-client
195
+ # mineru
196
+ # sentence-transformers
197
+ # tokenizers
198
+ # transformers
199
+ humanfriendly==10.0
200
+ # via coloredlogs
201
+ idna==3.10
202
+ # via
203
+ # anyio
204
+ # httpx
205
+ # requests
206
+ importlib-metadata==8.7.0
207
+ # via opentelemetry-api
208
+ importlib-resources==6.5.2
209
+ # via chromadb
210
+ jinja2==3.1.6
211
+ # via
212
+ # altair
213
+ # gradio
214
+ # pydeck
215
+ # torch
216
+ jiter==0.10.0
217
+ # via openai
218
+ jmespath==1.0.1
219
+ # via
220
+ # boto3
221
+ # botocore
222
+ joblib==1.5.1
223
+ # via
224
+ # nltk
225
+ # scikit-learn
226
+ json-repair==0.47.1
227
+ # via mineru
228
+ jsonpatch==1.33
229
+ # via langchain-core
230
+ jsonpointer==3.0.0
231
+ # via jsonpatch
232
+ jsonschema==4.24.0
233
+ # via
234
+ # altair
235
+ # chromadb
236
+ jsonschema-specifications==2025.4.1
237
+ # via jsonschema
238
+ kiwisolver==1.4.8
239
+ # via matplotlib
240
+ kubernetes==33.1.0
241
+ # via chromadb
242
+ langchain==0.3.25
243
+ # via -r requirements.in
244
+ langchain-core==0.3.65
245
+ # via
246
+ # langchain
247
+ # langchain-openai
248
+ # langchain-text-splitters
249
+ langchain-openai==0.3.24
250
+ # via -r requirements.in
251
+ langchain-text-splitters==0.3.8
252
+ # via langchain
253
+ langdetect==1.0.9
254
+ # via unstructured
255
+ langsmith==0.3.45
256
+ # via
257
+ # langchain
258
+ # langchain-core
259
+ loguru==0.7.3
260
+ # via
261
+ # -r requirements.in
262
+ # magic-pdf
263
+ # mineru
264
+ lxml==5.4.0
265
+ # via unstructured
266
+ magic-pdf==1.3.12
267
+ # via -r requirements.in
268
+ markdown-it-py==3.0.0
269
+ # via rich
270
+ markupsafe==3.0.2
271
+ # via
272
+ # gradio
273
+ # jinja2
274
+ # werkzeug
275
+ marshmallow==3.26.1
276
+ # via dataclasses-json
277
+ matplotlib==3.10.3
278
+ # via
279
+ # doclayout-yolo
280
+ # seaborn
281
+ # ultralytics
282
+ mdurl==0.1.2
283
+ # via markdown-it-py
284
+ mineru @ git+https://github.com/opendatalab/MinerU.git@6162ae2be150b53ea755fbc06c67f815f38e2ea6
285
+ # via -r requirements.in
286
+ mmh3==5.1.0
287
+ # via chromadb
288
+ modelscope==1.27.0
289
+ # via mineru
290
+ mpmath==1.3.0
291
+ # via sympy
292
+ mypy-extensions==1.1.0
293
+ # via typing-inspect
294
+ narwhals==1.43.1
295
+ # via altair
296
+ nest-asyncio==1.6.0
297
+ # via unstructured-client
298
+ networkx==3.5
299
+ # via torch
300
+ nltk==3.9.1
301
+ # via unstructured
302
+ numpy==1.26.4
303
+ # via
304
+ # -r requirements.in
305
+ # albucore
306
+ # albumentations
307
+ # chroma-hnswlib
308
+ # chromadb
309
+ # contourpy
310
+ # gradio
311
+ # magic-pdf
312
+ # matplotlib
313
+ # mineru
314
+ # onnxruntime
315
+ # opencv-python
316
+ # opencv-python-headless
317
+ # pandas
318
+ # pydeck
319
+ # rapid-table
320
+ # scikit-learn
321
+ # scipy
322
+ # seaborn
323
+ # shapely
324
+ # streamlit
325
+ # torchvision
326
+ # transformers
327
+ # ultralytics
328
+ # ultralytics-thop
329
+ # unstructured
330
+ oauthlib==3.3.1
331
+ # via
332
+ # kubernetes
333
+ # requests-oauthlib
334
+ olefile==0.47
335
+ # via python-oxmsg
336
+ omegaconf==2.3.0
337
+ # via -r requirements.in
338
+ onnxruntime==1.22.0
339
+ # via
340
+ # chromadb
341
+ # rapid-table
342
+ openai==1.88.0
343
+ # via
344
+ # -r requirements.in
345
+ # langchain-openai
346
+ opencv-python==4.11.0.86
347
+ # via
348
+ # doclayout-yolo
349
+ # rapid-table
350
+ # ultralytics
351
+ opencv-python-headless==4.11.0.86
352
+ # via
353
+ # albucore
354
+ # albumentations
355
+ opentelemetry-api==1.34.1
356
+ # via
357
+ # chromadb
358
+ # opentelemetry-exporter-otlp-proto-grpc
359
+ # opentelemetry-sdk
360
+ # opentelemetry-semantic-conventions
361
+ opentelemetry-exporter-otlp-proto-grpc==1.11.1
362
+ # via chromadb
363
+ opentelemetry-proto==1.11.1
364
+ # via opentelemetry-exporter-otlp-proto-grpc
365
+ opentelemetry-sdk==1.34.1
366
+ # via
367
+ # chromadb
368
+ # opentelemetry-exporter-otlp-proto-grpc
369
+ opentelemetry-semantic-conventions==0.55b1
370
+ # via opentelemetry-sdk
371
+ orjson==3.10.18
372
+ # via
373
+ # chromadb
374
+ # gradio
375
+ # langsmith
376
+ overrides==7.7.0
377
+ # via chromadb
378
+ packaging==24.2
379
+ # via
380
+ # altair
381
+ # build
382
+ # gradio
383
+ # gradio-client
384
+ # huggingface-hub
385
+ # langchain-core
386
+ # langsmith
387
+ # marshmallow
388
+ # matplotlib
389
+ # onnxruntime
390
+ # streamlit
391
+ # transformers
392
+ pandas==2.3.0
393
+ # via
394
+ # doclayout-yolo
395
+ # gradio
396
+ # seaborn
397
+ # streamlit
398
+ # ultralytics
399
+ pdfminer-six==20250506
400
+ # via
401
+ # -r requirements.in
402
+ # magic-pdf
403
+ # mineru
404
+ pdftext==0.6.3
405
+ # via mineru
406
+ pillow==11.2.1
407
+ # via
408
+ # doclayout-yolo
409
+ # gradio
410
+ # matplotlib
411
+ # mineru
412
+ # rapid-table
413
+ # reportlab
414
+ # sentence-transformers
415
+ # streamlit
416
+ # torchvision
417
+ # ultralytics
418
+ posthog==5.3.0
419
+ # via chromadb
420
+ protobuf==6.31.1
421
+ # via
422
+ # googleapis-common-protos
423
+ # onnxruntime
424
+ # opentelemetry-proto
425
+ # streamlit
426
+ psutil==7.0.0
427
+ # via
428
+ # doclayout-yolo
429
+ # ultralytics
430
+ # unstructured
431
+ py-cpuinfo==9.0.0
432
+ # via
433
+ # doclayout-yolo
434
+ # ultralytics
435
+ pyarrow==20.0.0
436
+ # via streamlit
437
+ pyasn1==0.6.1
438
+ # via
439
+ # pyasn1-modules
440
+ # rsa
441
+ pyasn1-modules==0.4.2
442
+ # via google-auth
443
+ pybase64==1.4.1
444
+ # via chromadb
445
+ pyclipper==1.3.0.post6
446
+ # via -r requirements.in
447
+ pycparser==2.22
448
+ # via cffi
449
+ pydantic==2.10.6
450
+ # via
451
+ # -r requirements.in
452
+ # albumentations
453
+ # chromadb
454
+ # fastapi
455
+ # gradio
456
+ # langchain
457
+ # langchain-core
458
+ # langsmith
459
+ # magic-pdf
460
+ # openai
461
+ # pdftext
462
+ # pydantic-settings
463
+ # unstructured-client
464
+ pydantic-core==2.27.2
465
+ # via pydantic
466
+ pydantic-settings==2.9.1
467
+ # via pdftext
468
+ pydeck==0.9.1
469
+ # via streamlit
470
+ pydub==0.25.1
471
+ # via gradio
472
+ pygments==2.19.1
473
+ # via rich
474
+ pymupdf==1.24.14
475
+ # via
476
+ # -r requirements.in
477
+ # magic-pdf
478
+ pyparsing==3.2.3
479
+ # via matplotlib
480
+ pypdf==5.6.0
481
+ # via
482
+ # mineru
483
+ # unstructured-client
484
+ pypdfium2==4.30.0
485
+ # via
486
+ # mineru
487
+ # pdftext
488
+ pypika==0.48.9
489
+ # via chromadb
490
+ pyproject-hooks==1.2.0
491
+ # via build
492
+ python-dateutil==2.9.0.post0
493
+ # via
494
+ # botocore
495
+ # kubernetes
496
+ # matplotlib
497
+ # pandas
498
+ # posthog
499
+ # unstructured-client
500
+ python-dotenv==1.1.0
501
+ # via
502
+ # -r requirements.in
503
+ # pydantic-settings
504
+ # uvicorn
505
+ python-iso639==2025.2.18
506
+ # via unstructured
507
+ python-magic==0.4.27
508
+ # via unstructured
509
+ python-multipart==0.0.20
510
+ # via gradio
511
+ python-oxmsg==0.0.2
512
+ # via unstructured
513
+ pytz==2025.2
514
+ # via pandas
515
+ pyyaml==6.0.2
516
+ # via
517
+ # -r requirements.in
518
+ # albumentations
519
+ # chromadb
520
+ # doclayout-yolo
521
+ # gradio
522
+ # huggingface-hub
523
+ # kubernetes
524
+ # langchain
525
+ # langchain-core
526
+ # omegaconf
527
+ # transformers
528
+ # ultralytics
529
+ # uvicorn
530
+ rapid-table==1.0.5
531
+ # via -r requirements.in
532
+ rapidfuzz==3.13.0
533
+ # via unstructured
534
+ referencing==0.36.2
535
+ # via
536
+ # jsonschema
537
+ # jsonschema-specifications
538
+ regex==2024.11.6
539
+ # via
540
+ # nltk
541
+ # tiktoken
542
+ # transformers
543
+ reportlab==4.4.2
544
+ # via mineru
545
+ requests==2.32.4
546
+ # via
547
+ # doclayout-yolo
548
+ # fast-langdetect
549
+ # huggingface-hub
550
+ # kubernetes
551
+ # langchain
552
+ # langsmith
553
+ # mineru
554
+ # modelscope
555
+ # posthog
556
+ # rapid-table
557
+ # requests-oauthlib
558
+ # requests-toolbelt
559
+ # robust-downloader
560
+ # streamlit
561
+ # tiktoken
562
+ # transformers
563
+ # ultralytics
564
+ # unstructured
565
+ requests-oauthlib==2.0.0
566
+ # via kubernetes
567
+ requests-toolbelt==1.0.0
568
+ # via
569
+ # langsmith
570
+ # unstructured-client
571
+ rich==14.0.0
572
+ # via
573
+ # chromadb
574
+ # typer
575
+ robust-downloader==0.0.2
576
+ # via fast-langdetect
577
+ rpds-py==0.25.1
578
+ # via
579
+ # jsonschema
580
+ # referencing
581
+ rsa==4.9.1
582
+ # via google-auth
583
+ ruff==0.12.0
584
+ # via gradio
585
+ s3transfer==0.13.0
586
+ # via boto3
587
+ safehttpx==0.1.6
588
+ # via gradio
589
+ safetensors==0.5.3
590
+ # via transformers
591
+ scikit-learn==1.7.0
592
+ # via
593
+ # -r requirements.in
594
+ # magic-pdf
595
+ # sentence-transformers
596
+ scipy==1.15.3
597
+ # via
598
+ # albumentations
599
+ # doclayout-yolo
600
+ # scikit-learn
601
+ # sentence-transformers
602
+ # ultralytics
603
+ seaborn==0.13.2
604
+ # via doclayout-yolo
605
+ semantic-version==2.10.0
606
+ # via gradio
607
+ sentence-transformers==4.1.0
608
+ # via -r requirements.in
609
+ setuptools==80.9.0
610
+ # via
611
+ # modelscope
612
+ # torch
613
+ shapely==2.1.1
614
+ # via -r requirements.in
615
+ shellingham==1.5.4
616
+ # via typer
617
+ simsimd==6.4.9
618
+ # via albucore
619
+ six==1.17.0
620
+ # via
621
+ # google-auth
622
+ # html5lib
623
+ # kubernetes
624
+ # langdetect
625
+ # posthog
626
+ # python-dateutil
627
+ smmap==5.0.2
628
+ # via gitdb
629
+ sniffio==1.3.1
630
+ # via
631
+ # anyio
632
+ # openai
633
+ soupsieve==2.7
634
+ # via beautifulsoup4
635
+ sqlalchemy==2.0.41
636
+ # via langchain
637
+ starlette==0.46.2
638
+ # via
639
+ # fastapi
640
+ # gradio
641
+ streamlit==1.46.0
642
+ # via -r requirements.in
643
+ stringzilla==3.12.5
644
+ # via albucore
645
+ structlog==25.4.0
646
+ # via -r requirements.in
647
+ sympy==1.14.0
648
+ # via
649
+ # onnxruntime
650
+ # torch
651
+ tenacity==9.1.2
652
+ # via
653
+ # chromadb
654
+ # langchain-core
655
+ # streamlit
656
+ thop==0.1.1.post2209072238
657
+ # via doclayout-yolo
658
+ threadpoolctl==3.6.0
659
+ # via scikit-learn
660
+ tiktoken==0.9.0
661
+ # via
662
+ # -r requirements.in
663
+ # langchain-openai
664
+ tokenizers==0.21.1
665
+ # via
666
+ # chromadb
667
+ # transformers
668
+ toml==0.10.2
669
+ # via streamlit
670
+ tomlkit==0.13.3
671
+ # via gradio
672
+ torch==2.7.1
673
+ # via
674
+ # -r requirements.in
675
+ # doclayout-yolo
676
+ # magic-pdf
677
+ # sentence-transformers
678
+ # thop
679
+ # torchvision
680
+ # ultralytics
681
+ # ultralytics-thop
682
+ torchvision==0.22.1
683
+ # via
684
+ # -r requirements.in
685
+ # doclayout-yolo
686
+ # magic-pdf
687
+ # ultralytics
688
+ tornado==6.5.1
689
+ # via streamlit
690
+ tqdm==4.67.1
691
+ # via
692
+ # -r requirements.in
693
+ # chromadb
694
+ # doclayout-yolo
695
+ # huggingface-hub
696
+ # magic-pdf
697
+ # mineru
698
+ # modelscope
699
+ # nltk
700
+ # openai
701
+ # robust-downloader
702
+ # sentence-transformers
703
+ # transformers
704
+ # ultralytics
705
+ # unstructured
706
+ transformers==4.52.4
707
+ # via
708
+ # magic-pdf
709
+ # sentence-transformers
710
+ typer==0.16.0
711
+ # via
712
+ # chromadb
713
+ # gradio
714
+ typing-extensions==4.14.0
715
+ # via
716
+ # altair
717
+ # anyio
718
+ # beautifulsoup4
719
+ # chromadb
720
+ # fastapi
721
+ # gradio
722
+ # gradio-client
723
+ # huggingface-hub
724
+ # langchain-core
725
+ # openai
726
+ # opentelemetry-api
727
+ # opentelemetry-sdk
728
+ # opentelemetry-semantic-conventions
729
+ # pydantic
730
+ # pydantic-core
731
+ # python-oxmsg
732
+ # referencing
733
+ # sentence-transformers
734
+ # sqlalchemy
735
+ # streamlit
736
+ # torch
737
+ # typer
738
+ # typing-inspect
739
+ # typing-inspection
740
+ # unstructured
741
+ typing-inspect==0.9.0
742
+ # via dataclasses-json
743
+ typing-inspection==0.4.1
744
+ # via
745
+ # pydantic-settings
746
+ # unstructured-client
747
+ tzdata==2025.2
748
+ # via pandas
749
+ ultralytics==8.3.156
750
+ # via -r requirements.in
751
+ ultralytics-thop==2.0.14
752
+ # via ultralytics
753
+ unstructured==0.17.2
754
+ # via -r requirements.in
755
+ unstructured-client==0.32.3
756
+ # via unstructured
757
+ urllib3==2.5.0
758
+ # via
759
+ # botocore
760
+ # kubernetes
761
+ # modelscope
762
+ # requests
763
+ uvicorn==0.34.3
764
+ # via
765
+ # chromadb
766
+ # gradio
767
+ uvloop==0.21.0
768
+ # via uvicorn
769
+ watchfiles==1.1.0
770
+ # via uvicorn
771
+ wcwidth==0.2.13
772
+ # via ftfy
773
+ webencodings==0.5.1
774
+ # via
775
+ # bleach
776
+ # html5lib
777
+ websocket-client==1.8.0
778
+ # via kubernetes
779
+ websockets==15.0.1
780
+ # via
781
+ # gradio-client
782
+ # uvicorn
783
+ werkzeug==3.1.3
784
+ # via -r requirements.in
785
+ wrapt==1.17.2
786
+ # via unstructured
787
+ zipp==3.23.0
788
+ # via importlib-metadata
789
+ zstandard==0.23.0
790
+ # via langsmith