Spaces:
Configuration error
Configuration error
Commit
·
44657b5
1
Parent(s):
e2d718e
docling_serve
Browse filesSigned-off-by: Michele Dolfi <[email protected]>
- .flake8 +7 -0
- .pre-commit-config.yaml +41 -0
- CODE_OF_CONDUCT.md +129 -0
- CONTRIBUTING.md +164 -0
- Containerfile +1 -0
- LICENSE +21 -0
- MAINTAINERS.md +11 -0
- README.md +33 -0
- docling_serve/__init__.py +0 -0
- docling_serve/app.py +96 -0
- docling_serve/py.typed +0 -0
- docling_serve/settings.py +8 -0
- poetry.lock +0 -0
- pyproject.toml +77 -0
- tests/__init__.py +0 -0
.flake8
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[flake8]
|
2 |
+
max-line-length = 88
|
3 |
+
exclude = test/*
|
4 |
+
max-complexity = 18
|
5 |
+
docstring-convention = google
|
6 |
+
ignore = W503,E203
|
7 |
+
classmethod-decorators = classmethod,validator
|
.pre-commit-config.yaml
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fail_fast: true
|
2 |
+
repos:
|
3 |
+
- repo: local
|
4 |
+
hooks:
|
5 |
+
- id: system
|
6 |
+
name: Black
|
7 |
+
entry: poetry run black docling_serve tests
|
8 |
+
pass_filenames: false
|
9 |
+
language: system
|
10 |
+
files: '\.py$'
|
11 |
+
- repo: local
|
12 |
+
hooks:
|
13 |
+
- id: system
|
14 |
+
name: isort
|
15 |
+
entry: poetry run isort docling_serve tests
|
16 |
+
pass_filenames: false
|
17 |
+
language: system
|
18 |
+
files: '\.py$'
|
19 |
+
- repo: local
|
20 |
+
hooks:
|
21 |
+
- id: system
|
22 |
+
name: flake8
|
23 |
+
entry: poetry run flake8 docling_serve
|
24 |
+
pass_filenames: false
|
25 |
+
language: system
|
26 |
+
files: '\.py$'
|
27 |
+
- repo: local
|
28 |
+
hooks:
|
29 |
+
- id: system
|
30 |
+
name: MyPy
|
31 |
+
entry: poetry run mypy docling_serve
|
32 |
+
pass_filenames: false
|
33 |
+
language: system
|
34 |
+
files: '\.py$'
|
35 |
+
- repo: local
|
36 |
+
hooks:
|
37 |
+
- id: system
|
38 |
+
name: Poetry check
|
39 |
+
entry: poetry check --lock
|
40 |
+
pass_filenames: false
|
41 |
+
language: system
|
CODE_OF_CONDUCT.md
ADDED
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Contributor Covenant Code of Conduct
|
2 |
+
|
3 |
+
## Our Pledge
|
4 |
+
|
5 |
+
We as members, contributors, and leaders pledge to make participation in our
|
6 |
+
community a harassment-free experience for everyone, regardless of age, body
|
7 |
+
size, visible or invisible disability, ethnicity, sex characteristics, gender
|
8 |
+
identity and expression, level of experience, education, socio-economic status,
|
9 |
+
nationality, personal appearance, race, religion, or sexual identity
|
10 |
+
and orientation.
|
11 |
+
|
12 |
+
We pledge to act and interact in ways that contribute to an open, welcoming,
|
13 |
+
diverse, inclusive, and healthy community.
|
14 |
+
|
15 |
+
## Our Standards
|
16 |
+
|
17 |
+
Examples of behavior that contributes to a positive environment for our
|
18 |
+
community include:
|
19 |
+
|
20 |
+
* Demonstrating empathy and kindness toward other people
|
21 |
+
* Being respectful of differing opinions, viewpoints, and experiences
|
22 |
+
* Giving and gracefully accepting constructive feedback
|
23 |
+
* Accepting responsibility and apologizing to those affected by our mistakes,
|
24 |
+
and learning from the experience
|
25 |
+
* Focusing on what is best not just for us as individuals, but for the
|
26 |
+
overall community
|
27 |
+
|
28 |
+
Examples of unacceptable behavior include:
|
29 |
+
|
30 |
+
* The use of sexualized language or imagery, and sexual attention or
|
31 |
+
advances of any kind
|
32 |
+
* Trolling, insulting or derogatory comments, and personal or political attacks
|
33 |
+
* Public or private harassment
|
34 |
+
* Publishing others' private information, such as a physical or email
|
35 |
+
address, without their explicit permission
|
36 |
+
* Other conduct which could reasonably be considered inappropriate in a
|
37 |
+
professional setting
|
38 |
+
|
39 |
+
## Enforcement Responsibilities
|
40 |
+
|
41 |
+
Community leaders are responsible for clarifying and enforcing our standards of
|
42 |
+
acceptable behavior and will take appropriate and fair corrective action in
|
43 |
+
response to any behavior that they deem inappropriate, threatening, offensive,
|
44 |
+
or harmful.
|
45 |
+
|
46 |
+
Community leaders have the right and responsibility to remove, edit, or reject
|
47 |
+
comments, commits, code, wiki edits, issues, and other contributions that are
|
48 |
+
not aligned to this Code of Conduct, and will communicate reasons for moderation
|
49 |
+
decisions when appropriate.
|
50 |
+
|
51 |
+
## Scope
|
52 |
+
|
53 |
+
This Code of Conduct applies within all community spaces, and also applies when
|
54 |
+
an individual is officially representing the community in public spaces.
|
55 |
+
Examples of representing our community include using an official e-mail address,
|
56 |
+
posting via an official social media account, or acting as an appointed
|
57 |
+
representative at an online or offline event.
|
58 |
+
|
59 |
+
## Enforcement
|
60 |
+
|
61 |
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
62 |
+
reported to the community leaders responsible for enforcement using
|
63 |
+
[[email protected]](mailto:[email protected]).
|
64 |
+
|
65 |
+
All complaints will be reviewed and investigated promptly and fairly.
|
66 |
+
|
67 |
+
All community leaders are obligated to respect the privacy and security of the
|
68 |
+
reporter of any incident.
|
69 |
+
|
70 |
+
## Enforcement Guidelines
|
71 |
+
|
72 |
+
Community leaders will follow these Community Impact Guidelines in determining
|
73 |
+
the consequences for any action they deem in violation of this Code of Conduct:
|
74 |
+
|
75 |
+
### 1. Correction
|
76 |
+
|
77 |
+
**Community Impact**: Use of inappropriate language or other behavior deemed
|
78 |
+
unprofessional or unwelcome in the community.
|
79 |
+
|
80 |
+
**Consequence**: A private, written warning from community leaders, providing
|
81 |
+
clarity around the nature of the violation and an explanation of why the
|
82 |
+
behavior was inappropriate. A public apology may be requested.
|
83 |
+
|
84 |
+
### 2. Warning
|
85 |
+
|
86 |
+
**Community Impact**: A violation through a single incident or series
|
87 |
+
of actions.
|
88 |
+
|
89 |
+
**Consequence**: A warning with consequences for continued behavior. No
|
90 |
+
interaction with the people involved, including unsolicited interaction with
|
91 |
+
those enforcing the Code of Conduct, for a specified period of time. This
|
92 |
+
includes avoiding interactions in community spaces as well as external channels
|
93 |
+
like social media. Violating these terms may lead to a temporary or
|
94 |
+
permanent ban.
|
95 |
+
|
96 |
+
### 3. Temporary Ban
|
97 |
+
|
98 |
+
**Community Impact**: A serious violation of community standards, including
|
99 |
+
sustained inappropriate behavior.
|
100 |
+
|
101 |
+
**Consequence**: A temporary ban from any sort of interaction or public
|
102 |
+
communication with the community for a specified period of time. No public or
|
103 |
+
private interaction with the people involved, including unsolicited interaction
|
104 |
+
with those enforcing the Code of Conduct, is allowed during this period.
|
105 |
+
Violating these terms may lead to a permanent ban.
|
106 |
+
|
107 |
+
### 4. Permanent Ban
|
108 |
+
|
109 |
+
**Community Impact**: Demonstrating a pattern of violation of community
|
110 |
+
standards, including sustained inappropriate behavior, harassment of an
|
111 |
+
individual, or aggression toward or disparagement of classes of individuals.
|
112 |
+
|
113 |
+
**Consequence**: A permanent ban from any sort of public interaction within
|
114 |
+
the community.
|
115 |
+
|
116 |
+
## Attribution
|
117 |
+
|
118 |
+
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
119 |
+
version 2.0, available at
|
120 |
+
[https://www.contributor-covenant.org/version/2/0/code_of_conduct.html](https://www.contributor-covenant.org/version/2/0/code_of_conduct.html).
|
121 |
+
|
122 |
+
Community Impact Guidelines were inspired by [Mozilla's code of conduct
|
123 |
+
enforcement ladder](https://github.com/mozilla/diversity).
|
124 |
+
|
125 |
+
Homepage: [https://www.contributor-covenant.org](https://www.contributor-covenant.org)
|
126 |
+
|
127 |
+
For answers to common questions about this code of conduct, see the FAQ at
|
128 |
+
[https://www.contributor-covenant.org/faq](https://www.contributor-covenant.org/faq). Translations are available at
|
129 |
+
[https://www.contributor-covenant.org/translations](https://www.contributor-covenant.org/translations).
|
CONTRIBUTING.md
ADDED
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Contributing In General
|
2 |
+
Our project welcomes external contributions. If you have an itch, please feel
|
3 |
+
free to scratch it.
|
4 |
+
|
5 |
+
To contribute code or documentation, please submit a [pull request](https://github.com/DS4SD/docling-serve/pulls).
|
6 |
+
|
7 |
+
A good way to familiarize yourself with the codebase and contribution process is
|
8 |
+
to look for and tackle low-hanging fruit in the [issue tracker](https://github.com/DS4SD/docling-serve/issues).
|
9 |
+
Before embarking on a more ambitious contribution, please quickly [get in touch](#communication) with us.
|
10 |
+
|
11 |
+
For general questions or support requests, please refer to the [discussion section](https://github.com/DS4SD/docling-serve/discussions).
|
12 |
+
|
13 |
+
**Note: We appreciate your effort, and want to avoid a situation where a contribution
|
14 |
+
requires extensive rework (by you or by us), sits in backlog for a long time, or
|
15 |
+
cannot be accepted at all!**
|
16 |
+
|
17 |
+
### Proposing new features
|
18 |
+
|
19 |
+
If you would like to implement a new feature, please [raise an issue](https://github.com/DS4SD/docling-serve/issues)
|
20 |
+
before sending a pull request so the feature can be discussed. This is to avoid
|
21 |
+
you wasting your valuable time working on a feature that the project developers
|
22 |
+
are not interested in accepting into the code base.
|
23 |
+
|
24 |
+
### Fixing bugs
|
25 |
+
|
26 |
+
If you would like to fix a bug, please [raise an issue](https://github.com/DS4SD/docling-serve/issues) before sending a
|
27 |
+
pull request so it can be tracked.
|
28 |
+
|
29 |
+
### Merge approval
|
30 |
+
|
31 |
+
The project maintainers use LGTM (Looks Good To Me) in comments on the code
|
32 |
+
review to indicate acceptance. A change requires LGTMs from two of the
|
33 |
+
maintainers of each component affected.
|
34 |
+
|
35 |
+
For a list of the maintainers, see the [MAINTAINERS.md](MAINTAINERS.md) page.
|
36 |
+
|
37 |
+
|
38 |
+
## Legal
|
39 |
+
|
40 |
+
Each source file must include a license header for the MIT
|
41 |
+
Software. Using the SPDX format is the simplest approach.
|
42 |
+
e.g.
|
43 |
+
|
44 |
+
```
|
45 |
+
/*
|
46 |
+
Copyright IBM Inc. All rights reserved.
|
47 |
+
|
48 |
+
SPDX-License-Identifier: MIT
|
49 |
+
*/
|
50 |
+
```
|
51 |
+
|
52 |
+
We have tried to make it as easy as possible to make contributions. This
|
53 |
+
applies to how we handle the legal aspects of contribution. We use the
|
54 |
+
same approach - the [Developer's Certificate of Origin 1.1 (DCO)](https://github.com/hyperledger/fabric/blob/master/docs/source/DCO1.1.txt) - that the Linux® Kernel [community](https://elinux.org/Developer_Certificate_Of_Origin)
|
55 |
+
uses to manage code contributions.
|
56 |
+
|
57 |
+
We simply ask that when submitting a patch for review, the developer
|
58 |
+
must include a sign-off statement in the commit message.
|
59 |
+
|
60 |
+
Here is an example Signed-off-by line, which indicates that the
|
61 |
+
submitter accepts the DCO:
|
62 |
+
|
63 |
+
```
|
64 |
+
Signed-off-by: John Doe <[email protected]>
|
65 |
+
```
|
66 |
+
|
67 |
+
You can include this automatically when you commit a change to your
|
68 |
+
local git repository using the following command:
|
69 |
+
|
70 |
+
```
|
71 |
+
git commit -s
|
72 |
+
```
|
73 |
+
|
74 |
+
|
75 |
+
## Communication
|
76 |
+
|
77 |
+
Please feel free to connect with us using the [discussion section](https://github.com/DS4SD/docling-serve/discussions).
|
78 |
+
|
79 |
+
|
80 |
+
|
81 |
+
## Developing
|
82 |
+
|
83 |
+
### Usage of Poetry
|
84 |
+
|
85 |
+
We use Poetry to manage dependencies.
|
86 |
+
|
87 |
+
|
88 |
+
#### Install
|
89 |
+
|
90 |
+
To install, see the documentation here: https://python-poetry.org/docs/master/#installing-with-the-official-installer
|
91 |
+
|
92 |
+
1. Install the Poetry globally in your machine
|
93 |
+
```bash
|
94 |
+
curl -sSL https://install.python-poetry.org | python3 -
|
95 |
+
```
|
96 |
+
The installation script will print the installation bin folder `POETRY_BIN` which you need in the next steps.
|
97 |
+
|
98 |
+
2. Make sure Poetry is in your `$PATH`
|
99 |
+
- for `zsh`
|
100 |
+
```sh
|
101 |
+
echo 'export PATH="POETRY_BIN:$PATH"' >> ~/.zshrc
|
102 |
+
```
|
103 |
+
- for `bash`
|
104 |
+
```sh
|
105 |
+
echo 'export PATH="POETRY_BIN:$PATH"' >> ~/.bashrc
|
106 |
+
```
|
107 |
+
|
108 |
+
3. The official guidelines linked above include useful details on the configuration of autocomplete for most shell environments, e.g. Bash and Zsh.
|
109 |
+
|
110 |
+
|
111 |
+
#### Create a Virtual Environment and Install Dependencies
|
112 |
+
|
113 |
+
To activate the Virtual Environment, run:
|
114 |
+
|
115 |
+
```bash
|
116 |
+
poetry shell
|
117 |
+
```
|
118 |
+
|
119 |
+
To spawn a shell with the Virtual Environment activated. If the Virtual Environment doesn't exist, Poetry will create one for you. Then, to install dependencies, run:
|
120 |
+
|
121 |
+
```bash
|
122 |
+
poetry install
|
123 |
+
```
|
124 |
+
|
125 |
+
**(Advanced) Use a Specific Python Version**
|
126 |
+
|
127 |
+
If for whatever reason you need to work in a specific (older) version of Python, run:
|
128 |
+
|
129 |
+
```bash
|
130 |
+
poetry env use $(which python3.10)
|
131 |
+
```
|
132 |
+
|
133 |
+
This creates a Virtual Environment with Python 3.10. For other versions, replace `$(which python3.10)` by the path to the interpreter (e.g., `/usr/bin/python3.8`) or use `$(which pythonX.Y)`.
|
134 |
+
|
135 |
+
|
136 |
+
#### Add a new dependency
|
137 |
+
|
138 |
+
```bash
|
139 |
+
poetry add NAME
|
140 |
+
```
|
141 |
+
|
142 |
+
## Coding style guidelines
|
143 |
+
|
144 |
+
We use the following tools to enforce code style:
|
145 |
+
|
146 |
+
- iSort, to sort imports
|
147 |
+
- Black, to format code
|
148 |
+
|
149 |
+
|
150 |
+
We run a series of checks on the code base on every commit, using `pre-commit`. To install the hooks, run:
|
151 |
+
|
152 |
+
```bash
|
153 |
+
pre-commit install
|
154 |
+
```
|
155 |
+
|
156 |
+
To run the checks on-demand, run:
|
157 |
+
|
158 |
+
```
|
159 |
+
pre-commit run --all-files
|
160 |
+
```
|
161 |
+
|
162 |
+
Note: Checks like `Black` and `isort` will "fail" if they modify files. This is because `pre-commit` doesn't like to see files modified by their Hooks. In these cases, `git add` the modified files and `git commit` again.
|
163 |
+
|
164 |
+
|
Containerfile
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# TODO
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2024 International Business Machines
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
MAINTAINERS.md
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MAINTAINERS
|
2 |
+
|
3 |
+
- Christoph Auer - [@cau-git](https://github.com/cau-git)
|
4 |
+
- Michele Dolfi - [@dolfim-ibm](https://github.com/dolfim-ibm)
|
5 |
+
- Maxim Lysak - [@maxmnemonic](https://github.com/maxmnemonic)
|
6 |
+
- Nikos Livathinos - [@nikos-livathinos](https://github.com/nikos-livathinos)
|
7 |
+
- Ahmed Nassar - [@nassarofficial](https://github.com/nassarofficial)
|
8 |
+
- Panos Vagenas - [@vagenas](https://github.com/vagenas)
|
9 |
+
- Peter Staar - [@PeterStaar-IBM](https://github.com/PeterStaar-IBM)
|
10 |
+
|
11 |
+
Maintainers can be contacted at [[email protected]](mailto:[email protected]).
|
README.md
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Docling Serve
|
2 |
+
|
3 |
+
Running [Docling](https://github.com/DS4SD/docling) as an API service.
|
4 |
+
|
5 |
+
> [!NOTE]
|
6 |
+
> This is an unstable draft implementation which will quickly evolve.
|
7 |
+
|
8 |
+
|
9 |
+
## Development
|
10 |
+
|
11 |
+
Install the dependencies
|
12 |
+
|
13 |
+
```sh
|
14 |
+
# Install dependencies
|
15 |
+
poetry install
|
16 |
+
|
17 |
+
# Run the server
|
18 |
+
poetry run uvicorn docling_serve.app:app --reload
|
19 |
+
```
|
20 |
+
|
21 |
+
Example payload (http source):
|
22 |
+
|
23 |
+
```sh
|
24 |
+
curl -X 'POST' \
|
25 |
+
'http://127.0.0.1:8000/convert' \
|
26 |
+
-H 'accept: application/json' \
|
27 |
+
-H 'Content-Type: application/json' \
|
28 |
+
-d '{
|
29 |
+
"http_source": {
|
30 |
+
"url": "https://arxiv.org/pdf/2206.01062"
|
31 |
+
}
|
32 |
+
}'
|
33 |
+
```
|
docling_serve/__init__.py
ADDED
File without changes
|
docling_serve/app.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
2 |
+
from contextlib import asynccontextmanager
|
3 |
+
from io import BytesIO
|
4 |
+
from pathlib import Path
|
5 |
+
from typing import Any, Dict, Union
|
6 |
+
|
7 |
+
import httpx
|
8 |
+
from docling.datamodel.base_models import (
|
9 |
+
ConversionStatus,
|
10 |
+
DocumentStream,
|
11 |
+
PipelineOptions,
|
12 |
+
)
|
13 |
+
from docling.datamodel.document import ConversionResult, DocumentConversionInput
|
14 |
+
from docling.document_converter import DocumentConverter
|
15 |
+
from fastapi import FastAPI, HTTPException
|
16 |
+
from pydantic import BaseModel
|
17 |
+
|
18 |
+
from docling_serve.settings import Settings
|
19 |
+
|
20 |
+
|
21 |
+
class HttpSource(BaseModel):
|
22 |
+
url: str
|
23 |
+
headers: Dict[str, Any] = {}
|
24 |
+
|
25 |
+
|
26 |
+
class FileSource(BaseModel):
|
27 |
+
base64_string: str
|
28 |
+
filename: str
|
29 |
+
|
30 |
+
|
31 |
+
class ConvertDocumentHttpSourceRequest(BaseModel):
|
32 |
+
http_source: HttpSource
|
33 |
+
|
34 |
+
|
35 |
+
class ConvertDocumentFileSourceRequest(BaseModel):
|
36 |
+
file_source: FileSource
|
37 |
+
|
38 |
+
|
39 |
+
class ConvertDocumentResponse(BaseModel):
|
40 |
+
content_md: str
|
41 |
+
|
42 |
+
|
43 |
+
ConvertDocumentRequest = Union[
|
44 |
+
ConvertDocumentFileSourceRequest, ConvertDocumentHttpSourceRequest
|
45 |
+
]
|
46 |
+
|
47 |
+
|
48 |
+
models = {}
|
49 |
+
|
50 |
+
|
51 |
+
@asynccontextmanager
|
52 |
+
async def lifespan(app: FastAPI):
|
53 |
+
# Converter
|
54 |
+
settings = Settings()
|
55 |
+
pipeline_options = PipelineOptions()
|
56 |
+
pipeline_options.do_ocr = settings.do_ocr
|
57 |
+
pipeline_options.do_table_structure = settings.do_table_structure
|
58 |
+
models["converter"] = DocumentConverter(pipeline_options=pipeline_options)
|
59 |
+
yield
|
60 |
+
|
61 |
+
models.clear()
|
62 |
+
|
63 |
+
|
64 |
+
app = FastAPI(
|
65 |
+
title="Docling Serve",
|
66 |
+
lifespan=lifespan,
|
67 |
+
)
|
68 |
+
|
69 |
+
|
70 |
+
@app.post("/convert")
|
71 |
+
def convert_pdf_document(
|
72 |
+
body: ConvertDocumentRequest,
|
73 |
+
) -> ConvertDocumentResponse:
|
74 |
+
|
75 |
+
filename: str
|
76 |
+
buf: BytesIO
|
77 |
+
|
78 |
+
if isinstance(body, ConvertDocumentFileSourceRequest):
|
79 |
+
buf = BytesIO(base64.b64decode(body.file_source.base64_string))
|
80 |
+
filename = body.file_source.filename
|
81 |
+
elif isinstance(body, ConvertDocumentHttpSourceRequest):
|
82 |
+
http_res = httpx.get(body.http_source.url, headers=body.http_source.headers)
|
83 |
+
buf = BytesIO(http_res.content)
|
84 |
+
filename = Path(
|
85 |
+
body.http_source.url
|
86 |
+
).name # TODO: use better way to detect filename, e.g. from Content-Disposition
|
87 |
+
|
88 |
+
docs_input = DocumentConversionInput.from_streams(
|
89 |
+
[DocumentStream(filename=filename, stream=buf)]
|
90 |
+
)
|
91 |
+
result: ConversionResult = next(models["converter"].convert(docs_input), None)
|
92 |
+
|
93 |
+
if result is None or result.status != ConversionStatus.SUCCESS:
|
94 |
+
raise HTTPException(status_code=500, detail={"errors": result.errors})
|
95 |
+
|
96 |
+
return ConvertDocumentResponse(content_md=result.render_as_markdown())
|
docling_serve/py.typed
ADDED
File without changes
|
docling_serve/settings.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
2 |
+
|
3 |
+
|
4 |
+
class Settings(BaseSettings):
|
5 |
+
do_ocr: bool = True
|
6 |
+
do_table_structure: bool = True
|
7 |
+
|
8 |
+
model_config = SettingsConfigDict(env_prefix="DOCLING_")
|
poetry.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.poetry]
|
2 |
+
name = "docling-serve"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = "Running Docling as a service"
|
5 |
+
license = "MIT"
|
6 |
+
authors = [
|
7 |
+
"Michele Dolfi <[email protected]>",
|
8 |
+
"Christoph Auer <[email protected]>",
|
9 |
+
"Panos Vagenas <[email protected]>",
|
10 |
+
"Cesar Berrospi Ramis <[email protected]>",
|
11 |
+
"Peter Staar <[email protected]>",
|
12 |
+
]
|
13 |
+
maintainers = [
|
14 |
+
"Peter Staar <[email protected]>",
|
15 |
+
"Christoph Auer <[email protected]>",
|
16 |
+
"Michele Dolfi <[email protected]>",
|
17 |
+
"Cesar Berrospi Ramis <[email protected]>",
|
18 |
+
"Panos Vagenas <[email protected]>",
|
19 |
+
]
|
20 |
+
readme = "README.md"
|
21 |
+
repository = "https://github.com/DS4SD/docling-serve"
|
22 |
+
homepage = "https://github.com/DS4SD/docling-serve"
|
23 |
+
classifiers = [
|
24 |
+
"License :: OSI Approved :: MIT License",
|
25 |
+
"Operating System :: OS Independent",
|
26 |
+
# "Development Status :: 5 - Production/Stable",
|
27 |
+
"Intended Audience :: Developers",
|
28 |
+
"Typing :: Typed",
|
29 |
+
"Programming Language :: Python :: 3"
|
30 |
+
]
|
31 |
+
|
32 |
+
|
33 |
+
[tool.poetry.dependencies]
|
34 |
+
python = "^3.10"
|
35 |
+
docling = "^1.9.0"
|
36 |
+
fastapi = {version = "^0.110.2", extras = ["standard"]}
|
37 |
+
uvicorn = "^0.30.6"
|
38 |
+
pydantic-settings = "^2.4.0"
|
39 |
+
httpx = "^0.27.2"
|
40 |
+
|
41 |
+
|
42 |
+
[tool.poetry.group.dev.dependencies]
|
43 |
+
black = "^24.8.0"
|
44 |
+
isort = "^5.13.2"
|
45 |
+
pre-commit = "^3.8.0"
|
46 |
+
autoflake = "^2.3.1"
|
47 |
+
flake8 = "^7.1.1"
|
48 |
+
pytest = "^8.3.2"
|
49 |
+
mypy = "^1.11.2"
|
50 |
+
|
51 |
+
[build-system]
|
52 |
+
requires = ["poetry-core"]
|
53 |
+
build-backend = "poetry.core.masonry.api"
|
54 |
+
|
55 |
+
|
56 |
+
[tool.black]
|
57 |
+
line-length = 88
|
58 |
+
target-version = ["py310"]
|
59 |
+
include = '\.pyi?$'
|
60 |
+
|
61 |
+
[tool.isort]
|
62 |
+
profile = "black"
|
63 |
+
line_length = 88
|
64 |
+
py_version=311
|
65 |
+
|
66 |
+
[tool.autoflake]
|
67 |
+
in-place = true
|
68 |
+
remove-all-unused-imports = true
|
69 |
+
remove-unused-variables = true
|
70 |
+
expand-star-imports = true
|
71 |
+
recursive = true
|
72 |
+
|
73 |
+
[[tool.mypy.overrides]]
|
74 |
+
module = [
|
75 |
+
"docling.*",
|
76 |
+
]
|
77 |
+
ignore_missing_imports = true
|
tests/__init__.py
ADDED
File without changes
|