Update mdr_pdf_parser.py
Browse files- mdr_pdf_parser.py +0 -20
mdr_pdf_parser.py
CHANGED
@@ -18,26 +18,6 @@
|
|
18 |
|
19 |
|
20 |
|
21 |
-
# -*- coding: utf-8 -*-
|
22 |
-
# /=====================================================================\ #
|
23 |
-
# | MagicDataReadiness - MAGIC PDF Parser | #
|
24 |
-
# |---------------------------------------------------------------------| #
|
25 |
-
# | Description: | #
|
26 |
-
# | Extracts structured content (text, tables, figures, formulas) | #
|
27 |
-
# | from PDF documents using layout analysis and OCR. | #
|
28 |
-
# | Combines logic from various internal components. | #
|
29 |
-
# |---------------------------------------------------------------------| #
|
30 |
-
# | Dependencies: | #
|
31 |
-
# | - Python 3.11+ | #
|
32 |
-
# | - External Libraries (See imports below and installation notes) | #
|
33 |
-
# | - Pre-trained CV Models (Downloaded automatically to model dir) | #
|
34 |
-
# |---------------------------------------------------------------------| #
|
35 |
-
# | Usage: | #
|
36 |
-
# | See the __main__ block at the end of the script for an example. | #
|
37 |
-
# \=====================================================================/ #
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
# --- External Library Imports ---
|
42 |
import os
|
43 |
import re
|
|
|
18 |
|
19 |
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
# --- External Library Imports ---
|
22 |
import os
|
23 |
import re
|