Michele Dolfi commited on
Commit
72320a1
Β·
unverified Β·
1 Parent(s): e6a25a6

fix: set DOCLING_SERVE_ARTIFACTS_PATH in images (#53)

Browse files

Signed-off-by: Michele Dolfi <[email protected]>

Containerfile CHANGED
@@ -41,13 +41,15 @@ ENV PYTHONIOENCODING=utf-8
41
  ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy
42
  ENV UV_PROJECT_ENVIRONMENT=/opt/app-root
43
 
 
 
44
  COPY --chown=1001:0 pyproject.toml uv.lock README.md ./
45
 
46
  RUN --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
47
  uv sync --frozen --no-install-project --no-dev --all-extras ${UV_SYNC_EXTRA_ARGS} # --no-extra ${NO_EXTRA}
48
 
49
  RUN echo "Downloading models..." && \
50
- docling-tools models download ${MODELS_LIST} && \
51
  chown -R 1001:0 /opt/app-root/src/.cache && \
52
  chmod -R g=u /opt/app-root/src/.cache
53
 
 
41
  ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy
42
  ENV UV_PROJECT_ENVIRONMENT=/opt/app-root
43
 
44
+ ENV DOCLING_SERVE_ARTIFACTS_PATH=/opt/app-root/src/.cache/docling/models
45
+
46
  COPY --chown=1001:0 pyproject.toml uv.lock README.md ./
47
 
48
  RUN --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
49
  uv sync --frozen --no-install-project --no-dev --all-extras ${UV_SYNC_EXTRA_ARGS} # --no-extra ${NO_EXTRA}
50
 
51
  RUN echo "Downloading models..." && \
52
+ docling-tools models download -o "${DOCLING_SERVE_ARTIFACTS_PATH}" ${MODELS_LIST} && \
53
  chown -R 1001:0 /opt/app-root/src/.cache && \
54
  chmod -R g=u /opt/app-root/src/.cache
55
 
README.md CHANGED
@@ -381,6 +381,9 @@ $ docling-serve dev --help
381
  β”‚ --proxy-headers --no-proxy-headers Enable/Disable X-Forwarded-Proto, X-Forwarded-For, β”‚
382
  β”‚ X-Forwarded-Port to populate remote address info. β”‚
383
  β”‚ [default: proxy-headers] β”‚
 
 
 
384
  β”‚ --enable-ui --no-enable-ui Enable the development UI. [default: enable-ui] β”‚
385
  β”‚ --help Show this message and exit. β”‚
386
  ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
@@ -398,11 +401,11 @@ The environment variables controlling the `uvicorn` execution can be specified w
398
  The environment variables controlling specifics of the Docling Serve app can be specified with the
399
  `DOCLING_SERVE_` prefix:
400
 
 
401
  - `DOCLING_SERVE_ENABLE_UI`: If `True`, The Gradio UI will be available at `/ui`.
402
 
403
  Others:
404
 
405
- - `DOCLING_ARTIFACTS_PATH`: if set Docling will use only the local weights of models, for example `/opt/app-root/.cache/docling/cache`.
406
  - `TESSDATA_PREFIX`: Tesseract data location, example `/usr/share/tesseract/tessdata/`.
407
 
408
  ## Get help and support
 
381
  β”‚ --proxy-headers --no-proxy-headers Enable/Disable X-Forwarded-Proto, X-Forwarded-For, β”‚
382
  β”‚ X-Forwarded-Port to populate remote address info. β”‚
383
  β”‚ [default: proxy-headers] β”‚
384
+ β”‚ --artifacts-path PATH If set to a valid directory, the model weights will be β”‚
385
+ β”‚ loaded from this path. β”‚
386
+ β”‚ [default: None] β”‚
387
  β”‚ --enable-ui --no-enable-ui Enable the development UI. [default: enable-ui] β”‚
388
  β”‚ --help Show this message and exit. β”‚
389
  ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
 
401
  The environment variables controlling specifics of the Docling Serve app can be specified with the
402
  `DOCLING_SERVE_` prefix:
403
 
404
+ - `DOCLING_SERVE_ARTIFACTS_PATH`: if set Docling will use only the local weights of models, for example `/opt/app-root/src/.cache/docling/models`.
405
  - `DOCLING_SERVE_ENABLE_UI`: If `True`, The Gradio UI will be available at `/ui`.
406
 
407
  Others:
408
 
 
409
  - `TESSDATA_PREFIX`: Tesseract data location, example `/usr/share/tesseract/tessdata/`.
410
 
411
  ## Get help and support
docling_serve/__main__.py CHANGED
@@ -3,7 +3,8 @@ import logging
3
  import platform
4
  import sys
5
  import warnings
6
- from typing import Annotated, Any, Union
 
7
 
8
  import typer
9
  import uvicorn
@@ -161,6 +162,15 @@ def dev(
161
  ),
162
  ] = uvicorn_settings.proxy_headers,
163
  # docling options
 
 
 
 
 
 
 
 
 
164
  enable_ui: Annotated[bool, typer.Option(help="Enable the development UI.")] = True,
165
  ) -> Any:
166
  """
@@ -179,6 +189,7 @@ def dev(
179
  uvicorn_settings.root_path = root_path
180
  uvicorn_settings.proxy_headers = proxy_headers
181
 
 
182
  docling_serve_settings.enable_ui = enable_ui
183
 
184
  _run(
@@ -243,6 +254,15 @@ def run(
243
  ),
244
  ] = uvicorn_settings.proxy_headers,
245
  # docling options
 
 
 
 
 
 
 
 
 
246
  enable_ui: Annotated[
247
  bool, typer.Option(help="Enable the development UI.")
248
  ] = docling_serve_settings.enable_ui,
@@ -264,6 +284,7 @@ def run(
264
  uvicorn_settings.root_path = root_path
265
  uvicorn_settings.proxy_headers = proxy_headers
266
 
 
267
  docling_serve_settings.enable_ui = enable_ui
268
 
269
  _run(
 
3
  import platform
4
  import sys
5
  import warnings
6
+ from pathlib import Path
7
+ from typing import Annotated, Any, Optional, Union
8
 
9
  import typer
10
  import uvicorn
 
162
  ),
163
  ] = uvicorn_settings.proxy_headers,
164
  # docling options
165
+ artifacts_path: Annotated[
166
+ Optional[Path],
167
+ typer.Option(
168
+ help=(
169
+ "If set to a valid directory, "
170
+ "the model weights will be loaded from this path."
171
+ )
172
+ ),
173
+ ] = docling_serve_settings.artifacts_path,
174
  enable_ui: Annotated[bool, typer.Option(help="Enable the development UI.")] = True,
175
  ) -> Any:
176
  """
 
189
  uvicorn_settings.root_path = root_path
190
  uvicorn_settings.proxy_headers = proxy_headers
191
 
192
+ docling_serve_settings.artifacts_path = artifacts_path
193
  docling_serve_settings.enable_ui = enable_ui
194
 
195
  _run(
 
254
  ),
255
  ] = uvicorn_settings.proxy_headers,
256
  # docling options
257
+ artifacts_path: Annotated[
258
+ Optional[Path],
259
+ typer.Option(
260
+ help=(
261
+ "If set to a valid directory, "
262
+ "the model weights will be loaded from this path."
263
+ )
264
+ ),
265
+ ] = docling_serve_settings.artifacts_path,
266
  enable_ui: Annotated[
267
  bool, typer.Option(help="Enable the development UI.")
268
  ] = docling_serve_settings.enable_ui,
 
284
  uvicorn_settings.root_path = root_path
285
  uvicorn_settings.proxy_headers = proxy_headers
286
 
287
+ docling_serve_settings.artifacts_path = artifacts_path
288
  docling_serve_settings.enable_ui = enable_ui
289
 
290
  _run(
docling_serve/docling_conversion.py CHANGED
@@ -39,6 +39,7 @@ from fastapi import HTTPException
39
  from pydantic import BaseModel, Field
40
 
41
  from docling_serve.helper_functions import _to_list_of_strings
 
42
 
43
  _log = logging.getLogger(__name__)
44
 
@@ -276,6 +277,11 @@ def _serialize_pdf_format_option(pdf_format_option: PdfFormatOption) -> str:
276
  if pdf_format_option.pipeline_options:
277
  data["pipeline_options"] = pdf_format_option.pipeline_options.model_dump()
278
 
 
 
 
 
 
279
  # Replace `pipeline_cls` with a string representation
280
  data["pipeline_cls"] = repr(data["pipeline_cls"])
281
 
@@ -293,7 +299,7 @@ def _serialize_pdf_format_option(pdf_format_option: PdfFormatOption) -> str:
293
 
294
 
295
  # Computes the PDF pipeline options and returns the PdfFormatOption and its hash
296
- def get_pdf_pipeline_opts(
297
  request: ConvertDocumentsOptions,
298
  ) -> Tuple[PdfFormatOption, str]:
299
 
@@ -364,6 +370,31 @@ def get_pdf_pipeline_opts(
364
  else:
365
  raise RuntimeError(f"Unexpected PDF backend type {request.pdf_backend}")
366
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367
  pdf_format_option = PdfFormatOption(
368
  pipeline_options=pipeline_options,
369
  backend=backend,
 
39
  from pydantic import BaseModel, Field
40
 
41
  from docling_serve.helper_functions import _to_list_of_strings
42
+ from docling_serve.settings import docling_serve_settings
43
 
44
  _log = logging.getLogger(__name__)
45
 
 
277
  if pdf_format_option.pipeline_options:
278
  data["pipeline_options"] = pdf_format_option.pipeline_options.model_dump()
279
 
280
+ # Replace `artifacts_path` with a string representation
281
+ data["pipeline_options"]["artifacts_path"] = repr(
282
+ data["pipeline_options"]["artifacts_path"]
283
+ )
284
+
285
  # Replace `pipeline_cls` with a string representation
286
  data["pipeline_cls"] = repr(data["pipeline_cls"])
287
 
 
299
 
300
 
301
  # Computes the PDF pipeline options and returns the PdfFormatOption and its hash
302
+ def get_pdf_pipeline_opts( # noqa: C901
303
  request: ConvertDocumentsOptions,
304
  ) -> Tuple[PdfFormatOption, str]:
305
 
 
370
  else:
371
  raise RuntimeError(f"Unexpected PDF backend type {request.pdf_backend}")
372
 
373
+ if docling_serve_settings.artifacts_path is not None:
374
+ if str(docling_serve_settings.artifacts_path.absolute()) == "":
375
+ _log.info(
376
+ "artifacts_path is an empty path, model weights will be dowloaded "
377
+ "at runtime."
378
+ )
379
+ pipeline_options.artifacts_path = None
380
+ elif docling_serve_settings.artifacts_path.is_dir():
381
+ _log.info(
382
+ "artifacts_path is set to a valid directory. "
383
+ "No model weights will be downloaded at runtime."
384
+ )
385
+ pipeline_options.artifacts_path = docling_serve_settings.artifacts_path
386
+ else:
387
+ _log.warning(
388
+ "artifacts_path is set to an invalid directory. "
389
+ "The system will download the model weights at runtime."
390
+ )
391
+ pipeline_options.artifacts_path = None
392
+ else:
393
+ _log.info(
394
+ "artifacts_path is unset. "
395
+ "The system will download the model weights at runtime."
396
+ )
397
+
398
  pdf_format_option = PdfFormatOption(
399
  pipeline_options=pipeline_options,
400
  backend=backend,
docling_serve/settings.py CHANGED
@@ -1,4 +1,5 @@
1
- from typing import Union
 
2
 
3
  from pydantic_settings import BaseSettings, SettingsConfigDict
4
 
@@ -18,10 +19,14 @@ class UvicornSettings(BaseSettings):
18
 
19
  class DoclingServeSettings(BaseSettings):
20
  model_config = SettingsConfigDict(
21
- env_prefix="DOCLING_SERVE_", env_file=".env", extra="allow"
 
 
 
22
  )
23
 
24
  enable_ui: bool = False
 
25
 
26
 
27
  uvicorn_settings = UvicornSettings()
 
1
+ from pathlib import Path
2
+ from typing import Optional, Union
3
 
4
  from pydantic_settings import BaseSettings, SettingsConfigDict
5
 
 
19
 
20
  class DoclingServeSettings(BaseSettings):
21
  model_config = SettingsConfigDict(
22
+ env_prefix="DOCLING_SERVE_",
23
+ env_file=".env",
24
+ env_parse_none_str="",
25
+ extra="allow",
26
  )
27
 
28
  enable_ui: bool = False
29
+ artifacts_path: Optional[Path] = None
30
 
31
 
32
  uvicorn_settings = UvicornSettings()