Spaces:
Configuration error
Configuration error
Michele Dolfi
commited on
fix: set DOCLING_SERVE_ARTIFACTS_PATH in images (#53)
Browse filesSigned-off-by: Michele Dolfi <[email protected]>
- Containerfile +3 -1
- README.md +4 -1
- docling_serve/__main__.py +22 -1
- docling_serve/docling_conversion.py +32 -1
- docling_serve/settings.py +7 -2
Containerfile
CHANGED
@@ -41,13 +41,15 @@ ENV PYTHONIOENCODING=utf-8
|
|
41 |
ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy
|
42 |
ENV UV_PROJECT_ENVIRONMENT=/opt/app-root
|
43 |
|
|
|
|
|
44 |
COPY --chown=1001:0 pyproject.toml uv.lock README.md ./
|
45 |
|
46 |
RUN --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
|
47 |
uv sync --frozen --no-install-project --no-dev --all-extras ${UV_SYNC_EXTRA_ARGS} # --no-extra ${NO_EXTRA}
|
48 |
|
49 |
RUN echo "Downloading models..." && \
|
50 |
-
docling-tools models download ${MODELS_LIST} && \
|
51 |
chown -R 1001:0 /opt/app-root/src/.cache && \
|
52 |
chmod -R g=u /opt/app-root/src/.cache
|
53 |
|
|
|
41 |
ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy
|
42 |
ENV UV_PROJECT_ENVIRONMENT=/opt/app-root
|
43 |
|
44 |
+
ENV DOCLING_SERVE_ARTIFACTS_PATH=/opt/app-root/src/.cache/docling/models
|
45 |
+
|
46 |
COPY --chown=1001:0 pyproject.toml uv.lock README.md ./
|
47 |
|
48 |
RUN --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
|
49 |
uv sync --frozen --no-install-project --no-dev --all-extras ${UV_SYNC_EXTRA_ARGS} # --no-extra ${NO_EXTRA}
|
50 |
|
51 |
RUN echo "Downloading models..." && \
|
52 |
+
docling-tools models download -o "${DOCLING_SERVE_ARTIFACTS_PATH}" ${MODELS_LIST} && \
|
53 |
chown -R 1001:0 /opt/app-root/src/.cache && \
|
54 |
chmod -R g=u /opt/app-root/src/.cache
|
55 |
|
README.md
CHANGED
@@ -381,6 +381,9 @@ $ docling-serve dev --help
|
|
381 |
β --proxy-headers --no-proxy-headers Enable/Disable X-Forwarded-Proto, X-Forwarded-For, β
|
382 |
β X-Forwarded-Port to populate remote address info. β
|
383 |
β [default: proxy-headers] β
|
|
|
|
|
|
|
384 |
β --enable-ui --no-enable-ui Enable the development UI. [default: enable-ui] β
|
385 |
β --help Show this message and exit. β
|
386 |
β°βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ―
|
@@ -398,11 +401,11 @@ The environment variables controlling the `uvicorn` execution can be specified w
|
|
398 |
The environment variables controlling specifics of the Docling Serve app can be specified with the
|
399 |
`DOCLING_SERVE_` prefix:
|
400 |
|
|
|
401 |
- `DOCLING_SERVE_ENABLE_UI`: If `True`, The Gradio UI will be available at `/ui`.
|
402 |
|
403 |
Others:
|
404 |
|
405 |
-
- `DOCLING_ARTIFACTS_PATH`: if set Docling will use only the local weights of models, for example `/opt/app-root/.cache/docling/cache`.
|
406 |
- `TESSDATA_PREFIX`: Tesseract data location, example `/usr/share/tesseract/tessdata/`.
|
407 |
|
408 |
## Get help and support
|
|
|
381 |
β --proxy-headers --no-proxy-headers Enable/Disable X-Forwarded-Proto, X-Forwarded-For, β
|
382 |
β X-Forwarded-Port to populate remote address info. β
|
383 |
β [default: proxy-headers] β
|
384 |
+
β --artifacts-path PATH If set to a valid directory, the model weights will be β
|
385 |
+
β loaded from this path. β
|
386 |
+
β [default: None] β
|
387 |
β --enable-ui --no-enable-ui Enable the development UI. [default: enable-ui] β
|
388 |
β --help Show this message and exit. β
|
389 |
β°βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ―
|
|
|
401 |
The environment variables controlling specifics of the Docling Serve app can be specified with the
|
402 |
`DOCLING_SERVE_` prefix:
|
403 |
|
404 |
+
- `DOCLING_SERVE_ARTIFACTS_PATH`: if set Docling will use only the local weights of models, for example `/opt/app-root/src/.cache/docling/models`.
|
405 |
- `DOCLING_SERVE_ENABLE_UI`: If `True`, The Gradio UI will be available at `/ui`.
|
406 |
|
407 |
Others:
|
408 |
|
|
|
409 |
- `TESSDATA_PREFIX`: Tesseract data location, example `/usr/share/tesseract/tessdata/`.
|
410 |
|
411 |
## Get help and support
|
docling_serve/__main__.py
CHANGED
@@ -3,7 +3,8 @@ import logging
|
|
3 |
import platform
|
4 |
import sys
|
5 |
import warnings
|
6 |
-
from
|
|
|
7 |
|
8 |
import typer
|
9 |
import uvicorn
|
@@ -161,6 +162,15 @@ def dev(
|
|
161 |
),
|
162 |
] = uvicorn_settings.proxy_headers,
|
163 |
# docling options
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
enable_ui: Annotated[bool, typer.Option(help="Enable the development UI.")] = True,
|
165 |
) -> Any:
|
166 |
"""
|
@@ -179,6 +189,7 @@ def dev(
|
|
179 |
uvicorn_settings.root_path = root_path
|
180 |
uvicorn_settings.proxy_headers = proxy_headers
|
181 |
|
|
|
182 |
docling_serve_settings.enable_ui = enable_ui
|
183 |
|
184 |
_run(
|
@@ -243,6 +254,15 @@ def run(
|
|
243 |
),
|
244 |
] = uvicorn_settings.proxy_headers,
|
245 |
# docling options
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
enable_ui: Annotated[
|
247 |
bool, typer.Option(help="Enable the development UI.")
|
248 |
] = docling_serve_settings.enable_ui,
|
@@ -264,6 +284,7 @@ def run(
|
|
264 |
uvicorn_settings.root_path = root_path
|
265 |
uvicorn_settings.proxy_headers = proxy_headers
|
266 |
|
|
|
267 |
docling_serve_settings.enable_ui = enable_ui
|
268 |
|
269 |
_run(
|
|
|
3 |
import platform
|
4 |
import sys
|
5 |
import warnings
|
6 |
+
from pathlib import Path
|
7 |
+
from typing import Annotated, Any, Optional, Union
|
8 |
|
9 |
import typer
|
10 |
import uvicorn
|
|
|
162 |
),
|
163 |
] = uvicorn_settings.proxy_headers,
|
164 |
# docling options
|
165 |
+
artifacts_path: Annotated[
|
166 |
+
Optional[Path],
|
167 |
+
typer.Option(
|
168 |
+
help=(
|
169 |
+
"If set to a valid directory, "
|
170 |
+
"the model weights will be loaded from this path."
|
171 |
+
)
|
172 |
+
),
|
173 |
+
] = docling_serve_settings.artifacts_path,
|
174 |
enable_ui: Annotated[bool, typer.Option(help="Enable the development UI.")] = True,
|
175 |
) -> Any:
|
176 |
"""
|
|
|
189 |
uvicorn_settings.root_path = root_path
|
190 |
uvicorn_settings.proxy_headers = proxy_headers
|
191 |
|
192 |
+
docling_serve_settings.artifacts_path = artifacts_path
|
193 |
docling_serve_settings.enable_ui = enable_ui
|
194 |
|
195 |
_run(
|
|
|
254 |
),
|
255 |
] = uvicorn_settings.proxy_headers,
|
256 |
# docling options
|
257 |
+
artifacts_path: Annotated[
|
258 |
+
Optional[Path],
|
259 |
+
typer.Option(
|
260 |
+
help=(
|
261 |
+
"If set to a valid directory, "
|
262 |
+
"the model weights will be loaded from this path."
|
263 |
+
)
|
264 |
+
),
|
265 |
+
] = docling_serve_settings.artifacts_path,
|
266 |
enable_ui: Annotated[
|
267 |
bool, typer.Option(help="Enable the development UI.")
|
268 |
] = docling_serve_settings.enable_ui,
|
|
|
284 |
uvicorn_settings.root_path = root_path
|
285 |
uvicorn_settings.proxy_headers = proxy_headers
|
286 |
|
287 |
+
docling_serve_settings.artifacts_path = artifacts_path
|
288 |
docling_serve_settings.enable_ui = enable_ui
|
289 |
|
290 |
_run(
|
docling_serve/docling_conversion.py
CHANGED
@@ -39,6 +39,7 @@ from fastapi import HTTPException
|
|
39 |
from pydantic import BaseModel, Field
|
40 |
|
41 |
from docling_serve.helper_functions import _to_list_of_strings
|
|
|
42 |
|
43 |
_log = logging.getLogger(__name__)
|
44 |
|
@@ -276,6 +277,11 @@ def _serialize_pdf_format_option(pdf_format_option: PdfFormatOption) -> str:
|
|
276 |
if pdf_format_option.pipeline_options:
|
277 |
data["pipeline_options"] = pdf_format_option.pipeline_options.model_dump()
|
278 |
|
|
|
|
|
|
|
|
|
|
|
279 |
# Replace `pipeline_cls` with a string representation
|
280 |
data["pipeline_cls"] = repr(data["pipeline_cls"])
|
281 |
|
@@ -293,7 +299,7 @@ def _serialize_pdf_format_option(pdf_format_option: PdfFormatOption) -> str:
|
|
293 |
|
294 |
|
295 |
# Computes the PDF pipeline options and returns the PdfFormatOption and its hash
|
296 |
-
def get_pdf_pipeline_opts(
|
297 |
request: ConvertDocumentsOptions,
|
298 |
) -> Tuple[PdfFormatOption, str]:
|
299 |
|
@@ -364,6 +370,31 @@ def get_pdf_pipeline_opts(
|
|
364 |
else:
|
365 |
raise RuntimeError(f"Unexpected PDF backend type {request.pdf_backend}")
|
366 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
367 |
pdf_format_option = PdfFormatOption(
|
368 |
pipeline_options=pipeline_options,
|
369 |
backend=backend,
|
|
|
39 |
from pydantic import BaseModel, Field
|
40 |
|
41 |
from docling_serve.helper_functions import _to_list_of_strings
|
42 |
+
from docling_serve.settings import docling_serve_settings
|
43 |
|
44 |
_log = logging.getLogger(__name__)
|
45 |
|
|
|
277 |
if pdf_format_option.pipeline_options:
|
278 |
data["pipeline_options"] = pdf_format_option.pipeline_options.model_dump()
|
279 |
|
280 |
+
# Replace `artifacts_path` with a string representation
|
281 |
+
data["pipeline_options"]["artifacts_path"] = repr(
|
282 |
+
data["pipeline_options"]["artifacts_path"]
|
283 |
+
)
|
284 |
+
|
285 |
# Replace `pipeline_cls` with a string representation
|
286 |
data["pipeline_cls"] = repr(data["pipeline_cls"])
|
287 |
|
|
|
299 |
|
300 |
|
301 |
# Computes the PDF pipeline options and returns the PdfFormatOption and its hash
|
302 |
+
def get_pdf_pipeline_opts( # noqa: C901
|
303 |
request: ConvertDocumentsOptions,
|
304 |
) -> Tuple[PdfFormatOption, str]:
|
305 |
|
|
|
370 |
else:
|
371 |
raise RuntimeError(f"Unexpected PDF backend type {request.pdf_backend}")
|
372 |
|
373 |
+
if docling_serve_settings.artifacts_path is not None:
|
374 |
+
if str(docling_serve_settings.artifacts_path.absolute()) == "":
|
375 |
+
_log.info(
|
376 |
+
"artifacts_path is an empty path, model weights will be dowloaded "
|
377 |
+
"at runtime."
|
378 |
+
)
|
379 |
+
pipeline_options.artifacts_path = None
|
380 |
+
elif docling_serve_settings.artifacts_path.is_dir():
|
381 |
+
_log.info(
|
382 |
+
"artifacts_path is set to a valid directory. "
|
383 |
+
"No model weights will be downloaded at runtime."
|
384 |
+
)
|
385 |
+
pipeline_options.artifacts_path = docling_serve_settings.artifacts_path
|
386 |
+
else:
|
387 |
+
_log.warning(
|
388 |
+
"artifacts_path is set to an invalid directory. "
|
389 |
+
"The system will download the model weights at runtime."
|
390 |
+
)
|
391 |
+
pipeline_options.artifacts_path = None
|
392 |
+
else:
|
393 |
+
_log.info(
|
394 |
+
"artifacts_path is unset. "
|
395 |
+
"The system will download the model weights at runtime."
|
396 |
+
)
|
397 |
+
|
398 |
pdf_format_option = PdfFormatOption(
|
399 |
pipeline_options=pipeline_options,
|
400 |
backend=backend,
|
docling_serve/settings.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
-
from
|
|
|
2 |
|
3 |
from pydantic_settings import BaseSettings, SettingsConfigDict
|
4 |
|
@@ -18,10 +19,14 @@ class UvicornSettings(BaseSettings):
|
|
18 |
|
19 |
class DoclingServeSettings(BaseSettings):
|
20 |
model_config = SettingsConfigDict(
|
21 |
-
env_prefix="DOCLING_SERVE_",
|
|
|
|
|
|
|
22 |
)
|
23 |
|
24 |
enable_ui: bool = False
|
|
|
25 |
|
26 |
|
27 |
uvicorn_settings = UvicornSettings()
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
from typing import Optional, Union
|
3 |
|
4 |
from pydantic_settings import BaseSettings, SettingsConfigDict
|
5 |
|
|
|
19 |
|
20 |
class DoclingServeSettings(BaseSettings):
|
21 |
model_config = SettingsConfigDict(
|
22 |
+
env_prefix="DOCLING_SERVE_",
|
23 |
+
env_file=".env",
|
24 |
+
env_parse_none_str="",
|
25 |
+
extra="allow",
|
26 |
)
|
27 |
|
28 |
enable_ui: bool = False
|
29 |
+
artifacts_path: Optional[Path] = None
|
30 |
|
31 |
|
32 |
uvicorn_settings = UvicornSettings()
|