Spaces:
Configuration error
Configuration error
Michele Dolfi
commited on
feat: Async api (#60)
Browse filesSigned-off-by: Michele Dolfi <[email protected]>
- .pre-commit-config.yaml +9 -9
- Containerfile +1 -1
- docling_serve/app.py +170 -12
- docling_serve/datamodel/__init__.py +0 -0
- docling_serve/datamodel/convert.py +174 -0
- docling_serve/datamodel/engines.py +30 -0
- docling_serve/datamodel/requests.py +62 -0
- docling_serve/datamodel/responses.py +52 -0
- docling_serve/docling_conversion.py +5 -237
- docling_serve/engines/__init__.py +8 -0
- docling_serve/engines/async_local/__init__.py +0 -0
- docling_serve/engines/async_local/orchestrator.py +101 -0
- docling_serve/engines/async_local/worker.py +116 -0
- docling_serve/engines/base_orchestrator.py +21 -0
- docling_serve/engines/block_local/__init__.py +0 -0
- docling_serve/gradio_ui.py +2 -7
- docling_serve/response_preparation.py +7 -31
- docling_serve/settings.py +5 -0
- pyproject.toml +13 -6
- tests/test_1-file-all-outputs.py +1 -1
- tests/test_1-url-all-outputs.py +1 -1
- tests/test_1-url-async-ws.py +48 -0
- tests/test_1-url-async.py +60 -0
- tests/test_2-files-all-outputs.py +9 -9
- tests/test_2-urls-all-outputs.py +9 -9
- uv.lock +15 -44
.pre-commit-config.yaml
CHANGED
|
@@ -1,5 +1,14 @@
|
|
| 1 |
fail_fast: true
|
| 2 |
repos:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
- repo: local
|
| 4 |
hooks:
|
| 5 |
- id: system
|
|
@@ -13,12 +22,3 @@ repos:
|
|
| 13 |
rev: 0.6.1
|
| 14 |
hooks:
|
| 15 |
- id: uv-lock
|
| 16 |
-
- repo: https://github.com/astral-sh/ruff-pre-commit
|
| 17 |
-
rev: v0.9.6
|
| 18 |
-
hooks:
|
| 19 |
-
# Run the Ruff linter.
|
| 20 |
-
- id: ruff
|
| 21 |
-
args: [--exit-non-zero-on-fix, --config=pyproject.toml]
|
| 22 |
-
# Run the Ruff formatter.
|
| 23 |
-
# - id: ruff-format
|
| 24 |
-
# args: [--config=pyproject.toml]
|
|
|
|
| 1 |
fail_fast: true
|
| 2 |
repos:
|
| 3 |
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
| 4 |
+
rev: v0.9.6
|
| 5 |
+
hooks:
|
| 6 |
+
# Run the Ruff formatter.
|
| 7 |
+
- id: ruff-format
|
| 8 |
+
args: [--config=pyproject.toml]
|
| 9 |
+
# Run the Ruff linter.
|
| 10 |
+
- id: ruff
|
| 11 |
+
args: [--exit-non-zero-on-fix, --fix, --config=pyproject.toml]
|
| 12 |
- repo: local
|
| 13 |
hooks:
|
| 14 |
- id: system
|
|
|
|
| 22 |
rev: 0.6.1
|
| 23 |
hooks:
|
| 24 |
- id: uv-lock
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Containerfile
CHANGED
|
@@ -53,7 +53,7 @@ RUN echo "Downloading models..." && \
|
|
| 53 |
chown -R 1001:0 /opt/app-root/src/.cache && \
|
| 54 |
chmod -R g=u /opt/app-root/src/.cache
|
| 55 |
|
| 56 |
-
COPY --chown=1001:0
|
| 57 |
RUN --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
|
| 58 |
uv sync --frozen --no-dev --all-extras ${UV_SYNC_EXTRA_ARGS} # --no-extra ${NO_EXTRA}
|
| 59 |
|
|
|
|
| 53 |
chown -R 1001:0 /opt/app-root/src/.cache && \
|
| 54 |
chmod -R g=u /opt/app-root/src/.cache
|
| 55 |
|
| 56 |
+
COPY --chown=1001:0 ./docling_serve ./docling_serve
|
| 57 |
RUN --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
|
| 58 |
uv sync --frozen --no-dev --all-extras ${UV_SYNC_EXTRA_ARGS} # --no-extra ${NO_EXTRA}
|
| 59 |
|
docling_serve/app.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import importlib.metadata
|
| 2 |
import logging
|
| 3 |
import tempfile
|
|
@@ -6,23 +7,46 @@ from io import BytesIO
|
|
| 6 |
from pathlib import Path
|
| 7 |
from typing import Annotated, Any, Dict, List, Optional, Union
|
| 8 |
|
| 9 |
-
from
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
from fastapi.middleware.cors import CORSMiddleware
|
| 13 |
from fastapi.responses import RedirectResponse
|
| 14 |
-
from pydantic import BaseModel
|
| 15 |
|
| 16 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
ConvertDocumentFileSourcesRequest,
|
| 18 |
-
ConvertDocumentsOptions,
|
| 19 |
ConvertDocumentsRequest,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
convert_documents,
|
| 21 |
converters,
|
| 22 |
get_pdf_pipeline_opts,
|
| 23 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
from docling_serve.helper_functions import FormDepends
|
| 25 |
-
from docling_serve.response_preparation import
|
| 26 |
from docling_serve.settings import docling_serve_settings
|
| 27 |
|
| 28 |
|
|
@@ -72,9 +96,22 @@ async def lifespan(app: FastAPI):
|
|
| 72 |
|
| 73 |
converters[options_hash].initialize_pipeline(InputFormat.PDF)
|
| 74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
yield
|
| 76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
converters.clear()
|
|
|
|
| 78 |
# if WITH_UI:
|
| 79 |
# gradio_ui.close()
|
| 80 |
|
|
@@ -84,7 +121,7 @@ async def lifespan(app: FastAPI):
|
|
| 84 |
##################################
|
| 85 |
|
| 86 |
|
| 87 |
-
def create_app():
|
| 88 |
try:
|
| 89 |
version = importlib.metadata.version("docling_serve")
|
| 90 |
except importlib.metadata.PackageNotFoundError:
|
|
@@ -145,10 +182,6 @@ def create_app():
|
|
| 145 |
)
|
| 146 |
return response
|
| 147 |
|
| 148 |
-
# Status
|
| 149 |
-
class HealthCheckResponse(BaseModel):
|
| 150 |
-
status: str = "ok"
|
| 151 |
-
|
| 152 |
@app.get("/health")
|
| 153 |
def health() -> HealthCheckResponse:
|
| 154 |
return HealthCheckResponse()
|
|
@@ -233,4 +266,129 @@ def create_app():
|
|
| 233 |
|
| 234 |
return response
|
| 235 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
return app
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
import importlib.metadata
|
| 3 |
import logging
|
| 4 |
import tempfile
|
|
|
|
| 7 |
from pathlib import Path
|
| 8 |
from typing import Annotated, Any, Dict, List, Optional, Union
|
| 9 |
|
| 10 |
+
from fastapi import (
|
| 11 |
+
BackgroundTasks,
|
| 12 |
+
Depends,
|
| 13 |
+
FastAPI,
|
| 14 |
+
HTTPException,
|
| 15 |
+
Query,
|
| 16 |
+
UploadFile,
|
| 17 |
+
WebSocket,
|
| 18 |
+
WebSocketDisconnect,
|
| 19 |
+
)
|
| 20 |
from fastapi.middleware.cors import CORSMiddleware
|
| 21 |
from fastapi.responses import RedirectResponse
|
|
|
|
| 22 |
|
| 23 |
+
from docling.datamodel.base_models import DocumentStream, InputFormat
|
| 24 |
+
from docling.document_converter import DocumentConverter
|
| 25 |
+
|
| 26 |
+
from docling_serve.datamodel.convert import ConvertDocumentsOptions
|
| 27 |
+
from docling_serve.datamodel.requests import (
|
| 28 |
ConvertDocumentFileSourcesRequest,
|
|
|
|
| 29 |
ConvertDocumentsRequest,
|
| 30 |
+
)
|
| 31 |
+
from docling_serve.datamodel.responses import (
|
| 32 |
+
ConvertDocumentResponse,
|
| 33 |
+
HealthCheckResponse,
|
| 34 |
+
MessageKind,
|
| 35 |
+
TaskStatusResponse,
|
| 36 |
+
WebsocketMessage,
|
| 37 |
+
)
|
| 38 |
+
from docling_serve.docling_conversion import (
|
| 39 |
convert_documents,
|
| 40 |
converters,
|
| 41 |
get_pdf_pipeline_opts,
|
| 42 |
)
|
| 43 |
+
from docling_serve.engines import get_orchestrator
|
| 44 |
+
from docling_serve.engines.async_local.orchestrator import (
|
| 45 |
+
AsyncLocalOrchestrator,
|
| 46 |
+
TaskNotFoundError,
|
| 47 |
+
)
|
| 48 |
from docling_serve.helper_functions import FormDepends
|
| 49 |
+
from docling_serve.response_preparation import process_results
|
| 50 |
from docling_serve.settings import docling_serve_settings
|
| 51 |
|
| 52 |
|
|
|
|
| 96 |
|
| 97 |
converters[options_hash].initialize_pipeline(InputFormat.PDF)
|
| 98 |
|
| 99 |
+
orchestrator = get_orchestrator()
|
| 100 |
+
|
| 101 |
+
# Start the background queue processor
|
| 102 |
+
queue_task = asyncio.create_task(orchestrator.process_queue())
|
| 103 |
+
|
| 104 |
yield
|
| 105 |
|
| 106 |
+
# Cancel the background queue processor on shutdown
|
| 107 |
+
queue_task.cancel()
|
| 108 |
+
try:
|
| 109 |
+
await queue_task
|
| 110 |
+
except asyncio.CancelledError:
|
| 111 |
+
_log.info("Queue processor cancelled.")
|
| 112 |
+
|
| 113 |
converters.clear()
|
| 114 |
+
|
| 115 |
# if WITH_UI:
|
| 116 |
# gradio_ui.close()
|
| 117 |
|
|
|
|
| 121 |
##################################
|
| 122 |
|
| 123 |
|
| 124 |
+
def create_app(): # noqa: C901
|
| 125 |
try:
|
| 126 |
version = importlib.metadata.version("docling_serve")
|
| 127 |
except importlib.metadata.PackageNotFoundError:
|
|
|
|
| 182 |
)
|
| 183 |
return response
|
| 184 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
@app.get("/health")
|
| 186 |
def health() -> HealthCheckResponse:
|
| 187 |
return HealthCheckResponse()
|
|
|
|
| 266 |
|
| 267 |
return response
|
| 268 |
|
| 269 |
+
# Convert a document from URL(s) using the async api
|
| 270 |
+
@app.post(
|
| 271 |
+
"/v1alpha/convert/source/async",
|
| 272 |
+
response_model=TaskStatusResponse,
|
| 273 |
+
)
|
| 274 |
+
async def process_url_async(
|
| 275 |
+
orchestrator: Annotated[AsyncLocalOrchestrator, Depends(get_orchestrator)],
|
| 276 |
+
conversion_request: ConvertDocumentsRequest,
|
| 277 |
+
):
|
| 278 |
+
task = await orchestrator.enqueue(request=conversion_request)
|
| 279 |
+
task_queue_position = await orchestrator.get_queue_position(
|
| 280 |
+
task_id=task.task_id
|
| 281 |
+
)
|
| 282 |
+
return TaskStatusResponse(
|
| 283 |
+
task_id=task.task_id,
|
| 284 |
+
task_status=task.task_status,
|
| 285 |
+
task_position=task_queue_position,
|
| 286 |
+
)
|
| 287 |
+
|
| 288 |
+
# Task status poll
|
| 289 |
+
@app.get(
|
| 290 |
+
"/v1alpha/status/poll/{task_id}",
|
| 291 |
+
response_model=TaskStatusResponse,
|
| 292 |
+
)
|
| 293 |
+
async def task_status_poll(
|
| 294 |
+
orchestrator: Annotated[AsyncLocalOrchestrator, Depends(get_orchestrator)],
|
| 295 |
+
task_id: str,
|
| 296 |
+
wait: Annotated[
|
| 297 |
+
float, Query(help="Number of seconds to wait for a completed status.")
|
| 298 |
+
] = 0.0,
|
| 299 |
+
):
|
| 300 |
+
try:
|
| 301 |
+
task = await orchestrator.task_status(task_id=task_id, wait=wait)
|
| 302 |
+
task_queue_position = await orchestrator.get_queue_position(task_id=task_id)
|
| 303 |
+
except TaskNotFoundError:
|
| 304 |
+
raise HTTPException(status_code=404, detail="Task not found.")
|
| 305 |
+
return TaskStatusResponse(
|
| 306 |
+
task_id=task.task_id,
|
| 307 |
+
task_status=task.task_status,
|
| 308 |
+
task_position=task_queue_position,
|
| 309 |
+
)
|
| 310 |
+
|
| 311 |
+
# Task status websocket
|
| 312 |
+
@app.websocket(
|
| 313 |
+
"/v1alpha/status/ws/{task_id}",
|
| 314 |
+
)
|
| 315 |
+
async def task_status_ws(
|
| 316 |
+
websocket: WebSocket,
|
| 317 |
+
orchestrator: Annotated[AsyncLocalOrchestrator, Depends(get_orchestrator)],
|
| 318 |
+
task_id: str,
|
| 319 |
+
):
|
| 320 |
+
await websocket.accept()
|
| 321 |
+
|
| 322 |
+
if task_id not in orchestrator.tasks:
|
| 323 |
+
await websocket.send_text(
|
| 324 |
+
WebsocketMessage(
|
| 325 |
+
message=MessageKind.ERROR, error="Task not found."
|
| 326 |
+
).model_dump_json()
|
| 327 |
+
)
|
| 328 |
+
await websocket.close()
|
| 329 |
+
return
|
| 330 |
+
|
| 331 |
+
task = orchestrator.tasks[task_id]
|
| 332 |
+
|
| 333 |
+
# Track active WebSocket connections for this job
|
| 334 |
+
orchestrator.task_subscribers[task_id].add(websocket)
|
| 335 |
+
|
| 336 |
+
try:
|
| 337 |
+
task_queue_position = await orchestrator.get_queue_position(task_id=task_id)
|
| 338 |
+
task_response = TaskStatusResponse(
|
| 339 |
+
task_id=task.task_id,
|
| 340 |
+
task_status=task.task_status,
|
| 341 |
+
task_position=task_queue_position,
|
| 342 |
+
)
|
| 343 |
+
await websocket.send_text(
|
| 344 |
+
WebsocketMessage(
|
| 345 |
+
message=MessageKind.CONNECTION, task=task_response
|
| 346 |
+
).model_dump_json()
|
| 347 |
+
)
|
| 348 |
+
while True:
|
| 349 |
+
task_queue_position = await orchestrator.get_queue_position(
|
| 350 |
+
task_id=task_id
|
| 351 |
+
)
|
| 352 |
+
task_response = TaskStatusResponse(
|
| 353 |
+
task_id=task.task_id,
|
| 354 |
+
task_status=task.task_status,
|
| 355 |
+
task_position=task_queue_position,
|
| 356 |
+
)
|
| 357 |
+
await websocket.send_text(
|
| 358 |
+
WebsocketMessage(
|
| 359 |
+
message=MessageKind.UPDATE, task=task_response
|
| 360 |
+
).model_dump_json()
|
| 361 |
+
)
|
| 362 |
+
# each client message will be interpreted as a request for update
|
| 363 |
+
msg = await websocket.receive_text()
|
| 364 |
+
_log.debug(f"Received message: {msg}")
|
| 365 |
+
|
| 366 |
+
except WebSocketDisconnect:
|
| 367 |
+
_log.info(f"WebSocket disconnected for job {task_id}")
|
| 368 |
+
|
| 369 |
+
finally:
|
| 370 |
+
orchestrator.task_subscribers[task_id].remove(websocket)
|
| 371 |
+
|
| 372 |
+
# Task result
|
| 373 |
+
@app.get(
|
| 374 |
+
"/v1alpha/result/{task_id}",
|
| 375 |
+
response_model=ConvertDocumentResponse,
|
| 376 |
+
responses={
|
| 377 |
+
200: {
|
| 378 |
+
"content": {"application/zip": {}},
|
| 379 |
+
}
|
| 380 |
+
},
|
| 381 |
+
)
|
| 382 |
+
async def task_result(
|
| 383 |
+
orchestrator: Annotated[AsyncLocalOrchestrator, Depends(get_orchestrator)],
|
| 384 |
+
task_id: str,
|
| 385 |
+
):
|
| 386 |
+
result = await orchestrator.task_result(task_id=task_id)
|
| 387 |
+
if result is None:
|
| 388 |
+
raise HTTPException(
|
| 389 |
+
status_code=404,
|
| 390 |
+
detail="Task result not found. Please wait for a completion status.",
|
| 391 |
+
)
|
| 392 |
+
return result
|
| 393 |
+
|
| 394 |
return app
|
docling_serve/datamodel/__init__.py
ADDED
|
File without changes
|
docling_serve/datamodel/convert.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Define the input options for the API
|
| 2 |
+
from typing import Annotated, List, Optional
|
| 3 |
+
|
| 4 |
+
from pydantic import BaseModel, Field
|
| 5 |
+
|
| 6 |
+
from docling.datamodel.base_models import InputFormat, OutputFormat
|
| 7 |
+
from docling.datamodel.pipeline_options import OcrEngine, PdfBackend, TableFormerMode
|
| 8 |
+
from docling_core.types.doc import ImageRefMode
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class ConvertDocumentsOptions(BaseModel):
|
| 12 |
+
from_formats: Annotated[
|
| 13 |
+
List[InputFormat],
|
| 14 |
+
Field(
|
| 15 |
+
description=(
|
| 16 |
+
"Input format(s) to convert from. String or list of strings. "
|
| 17 |
+
f"Allowed values: {', '.join([v.value for v in InputFormat])}. "
|
| 18 |
+
"Optional, defaults to all formats."
|
| 19 |
+
),
|
| 20 |
+
examples=[[v.value for v in InputFormat]],
|
| 21 |
+
),
|
| 22 |
+
] = list(InputFormat)
|
| 23 |
+
|
| 24 |
+
to_formats: Annotated[
|
| 25 |
+
List[OutputFormat],
|
| 26 |
+
Field(
|
| 27 |
+
description=(
|
| 28 |
+
"Output format(s) to convert to. String or list of strings. "
|
| 29 |
+
f"Allowed values: {', '.join([v.value for v in OutputFormat])}. "
|
| 30 |
+
"Optional, defaults to Markdown."
|
| 31 |
+
),
|
| 32 |
+
examples=[[OutputFormat.MARKDOWN]],
|
| 33 |
+
),
|
| 34 |
+
] = [OutputFormat.MARKDOWN]
|
| 35 |
+
|
| 36 |
+
image_export_mode: Annotated[
|
| 37 |
+
ImageRefMode,
|
| 38 |
+
Field(
|
| 39 |
+
description=(
|
| 40 |
+
"Image export mode for the document (in case of JSON,"
|
| 41 |
+
" Markdown or HTML). "
|
| 42 |
+
f"Allowed values: {', '.join([v.value for v in ImageRefMode])}. "
|
| 43 |
+
"Optional, defaults to Embedded."
|
| 44 |
+
),
|
| 45 |
+
examples=[ImageRefMode.EMBEDDED.value],
|
| 46 |
+
# pattern="embedded|placeholder|referenced",
|
| 47 |
+
),
|
| 48 |
+
] = ImageRefMode.EMBEDDED
|
| 49 |
+
|
| 50 |
+
do_ocr: Annotated[
|
| 51 |
+
bool,
|
| 52 |
+
Field(
|
| 53 |
+
description=(
|
| 54 |
+
"If enabled, the bitmap content will be processed using OCR. "
|
| 55 |
+
"Boolean. Optional, defaults to true"
|
| 56 |
+
),
|
| 57 |
+
# examples=[True],
|
| 58 |
+
),
|
| 59 |
+
] = True
|
| 60 |
+
|
| 61 |
+
force_ocr: Annotated[
|
| 62 |
+
bool,
|
| 63 |
+
Field(
|
| 64 |
+
description=(
|
| 65 |
+
"If enabled, replace existing text with OCR-generated "
|
| 66 |
+
"text over content. Boolean. Optional, defaults to false."
|
| 67 |
+
),
|
| 68 |
+
# examples=[False],
|
| 69 |
+
),
|
| 70 |
+
] = False
|
| 71 |
+
|
| 72 |
+
# TODO: use a restricted list based on what is installed on the system
|
| 73 |
+
ocr_engine: Annotated[
|
| 74 |
+
OcrEngine,
|
| 75 |
+
Field(
|
| 76 |
+
description=(
|
| 77 |
+
"The OCR engine to use. String. "
|
| 78 |
+
"Allowed values: easyocr, tesseract, rapidocr. "
|
| 79 |
+
"Optional, defaults to easyocr."
|
| 80 |
+
),
|
| 81 |
+
examples=[OcrEngine.EASYOCR],
|
| 82 |
+
),
|
| 83 |
+
] = OcrEngine.EASYOCR
|
| 84 |
+
|
| 85 |
+
ocr_lang: Annotated[
|
| 86 |
+
Optional[List[str]],
|
| 87 |
+
Field(
|
| 88 |
+
description=(
|
| 89 |
+
"List of languages used by the OCR engine. "
|
| 90 |
+
"Note that each OCR engine has "
|
| 91 |
+
"different values for the language names. String or list of strings. "
|
| 92 |
+
"Optional, defaults to empty."
|
| 93 |
+
),
|
| 94 |
+
examples=[["fr", "de", "es", "en"]],
|
| 95 |
+
),
|
| 96 |
+
] = None
|
| 97 |
+
|
| 98 |
+
pdf_backend: Annotated[
|
| 99 |
+
PdfBackend,
|
| 100 |
+
Field(
|
| 101 |
+
description=(
|
| 102 |
+
"The PDF backend to use. String. "
|
| 103 |
+
f"Allowed values: {', '.join([v.value for v in PdfBackend])}. "
|
| 104 |
+
f"Optional, defaults to {PdfBackend.DLPARSE_V2.value}."
|
| 105 |
+
),
|
| 106 |
+
examples=[PdfBackend.DLPARSE_V2],
|
| 107 |
+
),
|
| 108 |
+
] = PdfBackend.DLPARSE_V2
|
| 109 |
+
|
| 110 |
+
table_mode: Annotated[
|
| 111 |
+
TableFormerMode,
|
| 112 |
+
Field(
|
| 113 |
+
TableFormerMode.FAST,
|
| 114 |
+
description=(
|
| 115 |
+
"Mode to use for table structure, String. "
|
| 116 |
+
f"Allowed values: {', '.join([v.value for v in TableFormerMode])}. "
|
| 117 |
+
"Optional, defaults to fast."
|
| 118 |
+
),
|
| 119 |
+
examples=[TableFormerMode.FAST],
|
| 120 |
+
# pattern="fast|accurate",
|
| 121 |
+
),
|
| 122 |
+
] = TableFormerMode.FAST
|
| 123 |
+
|
| 124 |
+
abort_on_error: Annotated[
|
| 125 |
+
bool,
|
| 126 |
+
Field(
|
| 127 |
+
description=(
|
| 128 |
+
"Abort on error if enabled. Boolean. Optional, defaults to false."
|
| 129 |
+
),
|
| 130 |
+
# examples=[False],
|
| 131 |
+
),
|
| 132 |
+
] = False
|
| 133 |
+
|
| 134 |
+
return_as_file: Annotated[
|
| 135 |
+
bool,
|
| 136 |
+
Field(
|
| 137 |
+
description=(
|
| 138 |
+
"Return the output as a zip file "
|
| 139 |
+
"(will happen anyway if multiple files are generated). "
|
| 140 |
+
"Boolean. Optional, defaults to false."
|
| 141 |
+
),
|
| 142 |
+
examples=[False],
|
| 143 |
+
),
|
| 144 |
+
] = False
|
| 145 |
+
|
| 146 |
+
do_table_structure: Annotated[
|
| 147 |
+
bool,
|
| 148 |
+
Field(
|
| 149 |
+
description=(
|
| 150 |
+
"If enabled, the table structure will be extracted. "
|
| 151 |
+
"Boolean. Optional, defaults to true."
|
| 152 |
+
),
|
| 153 |
+
examples=[True],
|
| 154 |
+
),
|
| 155 |
+
] = True
|
| 156 |
+
|
| 157 |
+
include_images: Annotated[
|
| 158 |
+
bool,
|
| 159 |
+
Field(
|
| 160 |
+
description=(
|
| 161 |
+
"If enabled, images will be extracted from the document. "
|
| 162 |
+
"Boolean. Optional, defaults to true."
|
| 163 |
+
),
|
| 164 |
+
examples=[True],
|
| 165 |
+
),
|
| 166 |
+
] = True
|
| 167 |
+
|
| 168 |
+
images_scale: Annotated[
|
| 169 |
+
float,
|
| 170 |
+
Field(
|
| 171 |
+
description="Scale factor for images. Float. Optional, defaults to 2.0.",
|
| 172 |
+
examples=[2.0],
|
| 173 |
+
),
|
| 174 |
+
] = 2.0
|
docling_serve/datamodel/engines.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import enum
|
| 2 |
+
from typing import Optional
|
| 3 |
+
|
| 4 |
+
from pydantic import BaseModel
|
| 5 |
+
|
| 6 |
+
from docling_serve.datamodel.requests import ConvertDocumentsRequest
|
| 7 |
+
from docling_serve.datamodel.responses import ConvertDocumentResponse
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class TaskStatus(str, enum.Enum):
|
| 11 |
+
SUCCESS = "success"
|
| 12 |
+
PENDING = "pending"
|
| 13 |
+
STARTED = "started"
|
| 14 |
+
FAILURE = "failure"
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class AsyncEngine(str, enum.Enum):
|
| 18 |
+
LOCAL = "local"
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class Task(BaseModel):
|
| 22 |
+
task_id: str
|
| 23 |
+
task_status: TaskStatus = TaskStatus.PENDING
|
| 24 |
+
request: Optional[ConvertDocumentsRequest]
|
| 25 |
+
result: Optional[ConvertDocumentResponse] = None
|
| 26 |
+
|
| 27 |
+
def is_completed(self) -> bool:
|
| 28 |
+
if self.task_status in [TaskStatus.SUCCESS, TaskStatus.FAILURE]:
|
| 29 |
+
return True
|
| 30 |
+
return False
|
docling_serve/datamodel/requests.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import base64
|
| 2 |
+
from io import BytesIO
|
| 3 |
+
from typing import Annotated, Any, Dict, List, Union
|
| 4 |
+
|
| 5 |
+
from pydantic import BaseModel, Field
|
| 6 |
+
|
| 7 |
+
from docling.datamodel.base_models import DocumentStream
|
| 8 |
+
|
| 9 |
+
from docling_serve.datamodel.convert import ConvertDocumentsOptions
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class DocumentsConvertBase(BaseModel):
|
| 13 |
+
options: ConvertDocumentsOptions = ConvertDocumentsOptions()
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class HttpSource(BaseModel):
|
| 17 |
+
url: Annotated[
|
| 18 |
+
str,
|
| 19 |
+
Field(
|
| 20 |
+
description="HTTP url to process",
|
| 21 |
+
examples=["https://arxiv.org/pdf/2206.01062"],
|
| 22 |
+
),
|
| 23 |
+
]
|
| 24 |
+
headers: Annotated[
|
| 25 |
+
Dict[str, Any],
|
| 26 |
+
Field(
|
| 27 |
+
description="Additional headers used to fetch the urls, "
|
| 28 |
+
"e.g. authorization, agent, etc"
|
| 29 |
+
),
|
| 30 |
+
] = {}
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class FileSource(BaseModel):
|
| 34 |
+
base64_string: Annotated[
|
| 35 |
+
str,
|
| 36 |
+
Field(
|
| 37 |
+
description="Content of the file serialized in base64. "
|
| 38 |
+
"For example it can be obtained via "
|
| 39 |
+
"`base64 -w 0 /path/to/file/pdf-to-convert.pdf`."
|
| 40 |
+
),
|
| 41 |
+
]
|
| 42 |
+
filename: Annotated[
|
| 43 |
+
str,
|
| 44 |
+
Field(description="Filename of the uploaded document", examples=["file.pdf"]),
|
| 45 |
+
]
|
| 46 |
+
|
| 47 |
+
def to_document_stream(self) -> DocumentStream:
|
| 48 |
+
buf = BytesIO(base64.b64decode(self.base64_string))
|
| 49 |
+
return DocumentStream(stream=buf, name=self.filename)
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
class ConvertDocumentHttpSourcesRequest(DocumentsConvertBase):
|
| 53 |
+
http_sources: List[HttpSource]
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
class ConvertDocumentFileSourcesRequest(DocumentsConvertBase):
|
| 57 |
+
file_sources: List[FileSource]
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
ConvertDocumentsRequest = Union[
|
| 61 |
+
ConvertDocumentFileSourcesRequest, ConvertDocumentHttpSourcesRequest
|
| 62 |
+
]
|
docling_serve/datamodel/responses.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import enum
|
| 2 |
+
from typing import Dict, List, Optional
|
| 3 |
+
|
| 4 |
+
from pydantic import BaseModel
|
| 5 |
+
|
| 6 |
+
from docling.datamodel.document import ConversionStatus, ErrorItem
|
| 7 |
+
from docling.utils.profiling import ProfilingItem
|
| 8 |
+
from docling_core.types.doc import DoclingDocument
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# Status
|
| 12 |
+
class HealthCheckResponse(BaseModel):
|
| 13 |
+
status: str = "ok"
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class DocumentResponse(BaseModel):
|
| 17 |
+
filename: str
|
| 18 |
+
md_content: Optional[str] = None
|
| 19 |
+
json_content: Optional[DoclingDocument] = None
|
| 20 |
+
html_content: Optional[str] = None
|
| 21 |
+
text_content: Optional[str] = None
|
| 22 |
+
doctags_content: Optional[str] = None
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class ConvertDocumentResponse(BaseModel):
|
| 26 |
+
document: DocumentResponse
|
| 27 |
+
status: ConversionStatus
|
| 28 |
+
errors: List[ErrorItem] = []
|
| 29 |
+
processing_time: float
|
| 30 |
+
timings: Dict[str, ProfilingItem] = {}
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class ConvertDocumentErrorResponse(BaseModel):
|
| 34 |
+
status: ConversionStatus
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class TaskStatusResponse(BaseModel):
|
| 38 |
+
task_id: str
|
| 39 |
+
task_status: str
|
| 40 |
+
task_position: Optional[int] = None
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
class MessageKind(str, enum.Enum):
|
| 44 |
+
CONNECTION = "connection"
|
| 45 |
+
UPDATE = "update"
|
| 46 |
+
ERROR = "error"
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
class WebsocketMessage(BaseModel):
|
| 50 |
+
message: MessageKind
|
| 51 |
+
task: Optional[TaskStatusResponse] = None
|
| 52 |
+
error: Optional[str] = None
|
docling_serve/docling_conversion.py
CHANGED
|
@@ -1,27 +1,16 @@
|
|
| 1 |
-
import base64
|
| 2 |
import hashlib
|
| 3 |
import json
|
| 4 |
import logging
|
| 5 |
-
from io import BytesIO
|
| 6 |
from pathlib import Path
|
| 7 |
-
from typing import
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
Dict,
|
| 11 |
-
Iterable,
|
| 12 |
-
Iterator,
|
| 13 |
-
List,
|
| 14 |
-
Optional,
|
| 15 |
-
Tuple,
|
| 16 |
-
Type,
|
| 17 |
-
Union,
|
| 18 |
-
)
|
| 19 |
|
| 20 |
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
|
| 21 |
from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
|
| 22 |
from docling.backend.pdf_backend import PdfDocumentBackend
|
| 23 |
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
|
| 24 |
-
from docling.datamodel.base_models import DocumentStream, InputFormat
|
| 25 |
from docling.datamodel.document import ConversionResult
|
| 26 |
from docling.datamodel.pipeline_options import (
|
| 27 |
EasyOcrOptions,
|
|
@@ -35,235 +24,14 @@ from docling.datamodel.pipeline_options import (
|
|
| 35 |
)
|
| 36 |
from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
|
| 37 |
from docling_core.types.doc import ImageRefMode
|
| 38 |
-
from fastapi import HTTPException
|
| 39 |
-
from pydantic import BaseModel, Field
|
| 40 |
|
|
|
|
| 41 |
from docling_serve.helper_functions import _to_list_of_strings
|
| 42 |
from docling_serve.settings import docling_serve_settings
|
| 43 |
|
| 44 |
_log = logging.getLogger(__name__)
|
| 45 |
|
| 46 |
|
| 47 |
-
# Define the input options for the API
|
| 48 |
-
class ConvertDocumentsOptions(BaseModel):
|
| 49 |
-
from_formats: Annotated[
|
| 50 |
-
List[InputFormat],
|
| 51 |
-
Field(
|
| 52 |
-
description=(
|
| 53 |
-
"Input format(s) to convert from. String or list of strings. "
|
| 54 |
-
f"Allowed values: {', '.join([v.value for v in InputFormat])}. "
|
| 55 |
-
"Optional, defaults to all formats."
|
| 56 |
-
),
|
| 57 |
-
examples=[[v.value for v in InputFormat]],
|
| 58 |
-
),
|
| 59 |
-
] = list(InputFormat)
|
| 60 |
-
|
| 61 |
-
to_formats: Annotated[
|
| 62 |
-
List[OutputFormat],
|
| 63 |
-
Field(
|
| 64 |
-
description=(
|
| 65 |
-
"Output format(s) to convert to. String or list of strings. "
|
| 66 |
-
f"Allowed values: {', '.join([v.value for v in OutputFormat])}. "
|
| 67 |
-
"Optional, defaults to Markdown."
|
| 68 |
-
),
|
| 69 |
-
examples=[[OutputFormat.MARKDOWN]],
|
| 70 |
-
),
|
| 71 |
-
] = [OutputFormat.MARKDOWN]
|
| 72 |
-
|
| 73 |
-
image_export_mode: Annotated[
|
| 74 |
-
ImageRefMode,
|
| 75 |
-
Field(
|
| 76 |
-
description=(
|
| 77 |
-
"Image export mode for the document (in case of JSON,"
|
| 78 |
-
" Markdown or HTML). "
|
| 79 |
-
f"Allowed values: {', '.join([v.value for v in ImageRefMode])}. "
|
| 80 |
-
"Optional, defaults to Embedded."
|
| 81 |
-
),
|
| 82 |
-
examples=[ImageRefMode.EMBEDDED.value],
|
| 83 |
-
# pattern="embedded|placeholder|referenced",
|
| 84 |
-
),
|
| 85 |
-
] = ImageRefMode.EMBEDDED
|
| 86 |
-
|
| 87 |
-
do_ocr: Annotated[
|
| 88 |
-
bool,
|
| 89 |
-
Field(
|
| 90 |
-
description=(
|
| 91 |
-
"If enabled, the bitmap content will be processed using OCR. "
|
| 92 |
-
"Boolean. Optional, defaults to true"
|
| 93 |
-
),
|
| 94 |
-
# examples=[True],
|
| 95 |
-
),
|
| 96 |
-
] = True
|
| 97 |
-
|
| 98 |
-
force_ocr: Annotated[
|
| 99 |
-
bool,
|
| 100 |
-
Field(
|
| 101 |
-
description=(
|
| 102 |
-
"If enabled, replace existing text with OCR-generated "
|
| 103 |
-
"text over content. Boolean. Optional, defaults to false."
|
| 104 |
-
),
|
| 105 |
-
# examples=[False],
|
| 106 |
-
),
|
| 107 |
-
] = False
|
| 108 |
-
|
| 109 |
-
# TODO: use a restricted list based on what is installed on the system
|
| 110 |
-
ocr_engine: Annotated[
|
| 111 |
-
OcrEngine,
|
| 112 |
-
Field(
|
| 113 |
-
description=(
|
| 114 |
-
"The OCR engine to use. String. "
|
| 115 |
-
"Allowed values: easyocr, tesseract, rapidocr. "
|
| 116 |
-
"Optional, defaults to easyocr."
|
| 117 |
-
),
|
| 118 |
-
examples=[OcrEngine.EASYOCR],
|
| 119 |
-
),
|
| 120 |
-
] = OcrEngine.EASYOCR
|
| 121 |
-
|
| 122 |
-
ocr_lang: Annotated[
|
| 123 |
-
Optional[List[str]],
|
| 124 |
-
Field(
|
| 125 |
-
description=(
|
| 126 |
-
"List of languages used by the OCR engine. "
|
| 127 |
-
"Note that each OCR engine has "
|
| 128 |
-
"different values for the language names. String or list of strings. "
|
| 129 |
-
"Optional, defaults to empty."
|
| 130 |
-
),
|
| 131 |
-
examples=[["fr", "de", "es", "en"]],
|
| 132 |
-
),
|
| 133 |
-
] = None
|
| 134 |
-
|
| 135 |
-
pdf_backend: Annotated[
|
| 136 |
-
PdfBackend,
|
| 137 |
-
Field(
|
| 138 |
-
description=(
|
| 139 |
-
"The PDF backend to use. String. "
|
| 140 |
-
f"Allowed values: {', '.join([v.value for v in PdfBackend])}. "
|
| 141 |
-
f"Optional, defaults to {PdfBackend.DLPARSE_V2.value}."
|
| 142 |
-
),
|
| 143 |
-
examples=[PdfBackend.DLPARSE_V2],
|
| 144 |
-
),
|
| 145 |
-
] = PdfBackend.DLPARSE_V2
|
| 146 |
-
|
| 147 |
-
table_mode: Annotated[
|
| 148 |
-
TableFormerMode,
|
| 149 |
-
Field(
|
| 150 |
-
TableFormerMode.FAST,
|
| 151 |
-
description=(
|
| 152 |
-
"Mode to use for table structure, String. "
|
| 153 |
-
f"Allowed values: {', '.join([v.value for v in TableFormerMode])}. "
|
| 154 |
-
"Optional, defaults to fast."
|
| 155 |
-
),
|
| 156 |
-
examples=[TableFormerMode.FAST],
|
| 157 |
-
# pattern="fast|accurate",
|
| 158 |
-
),
|
| 159 |
-
] = TableFormerMode.FAST
|
| 160 |
-
|
| 161 |
-
abort_on_error: Annotated[
|
| 162 |
-
bool,
|
| 163 |
-
Field(
|
| 164 |
-
description=(
|
| 165 |
-
"Abort on error if enabled. Boolean. Optional, defaults to false."
|
| 166 |
-
),
|
| 167 |
-
# examples=[False],
|
| 168 |
-
),
|
| 169 |
-
] = False
|
| 170 |
-
|
| 171 |
-
return_as_file: Annotated[
|
| 172 |
-
bool,
|
| 173 |
-
Field(
|
| 174 |
-
description=(
|
| 175 |
-
"Return the output as a zip file "
|
| 176 |
-
"(will happen anyway if multiple files are generated). "
|
| 177 |
-
"Boolean. Optional, defaults to false."
|
| 178 |
-
),
|
| 179 |
-
examples=[False],
|
| 180 |
-
),
|
| 181 |
-
] = False
|
| 182 |
-
|
| 183 |
-
do_table_structure: Annotated[
|
| 184 |
-
bool,
|
| 185 |
-
Field(
|
| 186 |
-
description=(
|
| 187 |
-
"If enabled, the table structure will be extracted. "
|
| 188 |
-
"Boolean. Optional, defaults to true."
|
| 189 |
-
),
|
| 190 |
-
examples=[True],
|
| 191 |
-
),
|
| 192 |
-
] = True
|
| 193 |
-
|
| 194 |
-
include_images: Annotated[
|
| 195 |
-
bool,
|
| 196 |
-
Field(
|
| 197 |
-
description=(
|
| 198 |
-
"If enabled, images will be extracted from the document. "
|
| 199 |
-
"Boolean. Optional, defaults to true."
|
| 200 |
-
),
|
| 201 |
-
examples=[True],
|
| 202 |
-
),
|
| 203 |
-
] = True
|
| 204 |
-
|
| 205 |
-
images_scale: Annotated[
|
| 206 |
-
float,
|
| 207 |
-
Field(
|
| 208 |
-
description="Scale factor for images. Float. Optional, defaults to 2.0.",
|
| 209 |
-
examples=[2.0],
|
| 210 |
-
),
|
| 211 |
-
] = 2.0
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
class DocumentsConvertBase(BaseModel):
|
| 215 |
-
options: ConvertDocumentsOptions = ConvertDocumentsOptions()
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
class HttpSource(BaseModel):
|
| 219 |
-
url: Annotated[
|
| 220 |
-
str,
|
| 221 |
-
Field(
|
| 222 |
-
description="HTTP url to process",
|
| 223 |
-
examples=["https://arxiv.org/pdf/2206.01062"],
|
| 224 |
-
),
|
| 225 |
-
]
|
| 226 |
-
headers: Annotated[
|
| 227 |
-
Dict[str, Any],
|
| 228 |
-
Field(
|
| 229 |
-
description="Additional headers used to fetch the urls, "
|
| 230 |
-
"e.g. authorization, agent, etc"
|
| 231 |
-
),
|
| 232 |
-
] = {}
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
class FileSource(BaseModel):
|
| 236 |
-
base64_string: Annotated[
|
| 237 |
-
str,
|
| 238 |
-
Field(
|
| 239 |
-
description="Content of the file serialized in base64. "
|
| 240 |
-
"For example it can be obtained via "
|
| 241 |
-
"`base64 -w 0 /path/to/file/pdf-to-convert.pdf`."
|
| 242 |
-
),
|
| 243 |
-
]
|
| 244 |
-
filename: Annotated[
|
| 245 |
-
str,
|
| 246 |
-
Field(description="Filename of the uploaded document", examples=["file.pdf"]),
|
| 247 |
-
]
|
| 248 |
-
|
| 249 |
-
def to_document_stream(self) -> DocumentStream:
|
| 250 |
-
buf = BytesIO(base64.b64decode(self.base64_string))
|
| 251 |
-
return DocumentStream(stream=buf, name=self.filename)
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
class ConvertDocumentHttpSourcesRequest(DocumentsConvertBase):
|
| 255 |
-
http_sources: List[HttpSource]
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
class ConvertDocumentFileSourcesRequest(DocumentsConvertBase):
|
| 259 |
-
file_sources: List[FileSource]
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
ConvertDocumentsRequest = Union[
|
| 263 |
-
ConvertDocumentFileSourcesRequest, ConvertDocumentHttpSourcesRequest
|
| 264 |
-
]
|
| 265 |
-
|
| 266 |
-
|
| 267 |
# Document converters will be preloaded and stored in a dictionary
|
| 268 |
converters: Dict[bytes, DocumentConverter] = {}
|
| 269 |
|
|
|
|
|
|
|
| 1 |
import hashlib
|
| 2 |
import json
|
| 3 |
import logging
|
|
|
|
| 4 |
from pathlib import Path
|
| 5 |
+
from typing import Any, Dict, Iterable, Iterator, Optional, Tuple, Type, Union
|
| 6 |
+
|
| 7 |
+
from fastapi import HTTPException
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
|
| 10 |
from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
|
| 11 |
from docling.backend.pdf_backend import PdfDocumentBackend
|
| 12 |
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
|
| 13 |
+
from docling.datamodel.base_models import DocumentStream, InputFormat
|
| 14 |
from docling.datamodel.document import ConversionResult
|
| 15 |
from docling.datamodel.pipeline_options import (
|
| 16 |
EasyOcrOptions,
|
|
|
|
| 24 |
)
|
| 25 |
from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
|
| 26 |
from docling_core.types.doc import ImageRefMode
|
|
|
|
|
|
|
| 27 |
|
| 28 |
+
from docling_serve.datamodel.convert import ConvertDocumentsOptions
|
| 29 |
from docling_serve.helper_functions import _to_list_of_strings
|
| 30 |
from docling_serve.settings import docling_serve_settings
|
| 31 |
|
| 32 |
_log = logging.getLogger(__name__)
|
| 33 |
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
# Document converters will be preloaded and stored in a dictionary
|
| 36 |
converters: Dict[bytes, DocumentConverter] = {}
|
| 37 |
|
docling_serve/engines/__init__.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from functools import lru_cache
|
| 2 |
+
|
| 3 |
+
from docling_serve.engines.async_local.orchestrator import AsyncLocalOrchestrator
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
@lru_cache
|
| 7 |
+
def get_orchestrator() -> AsyncLocalOrchestrator:
|
| 8 |
+
return AsyncLocalOrchestrator()
|
docling_serve/engines/async_local/__init__.py
ADDED
|
File without changes
|
docling_serve/engines/async_local/orchestrator.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import logging
|
| 3 |
+
import uuid
|
| 4 |
+
from typing import Dict, List, Optional, Set
|
| 5 |
+
|
| 6 |
+
from fastapi import WebSocket
|
| 7 |
+
|
| 8 |
+
from docling_serve.datamodel.engines import Task, TaskStatus
|
| 9 |
+
from docling_serve.datamodel.requests import ConvertDocumentsRequest
|
| 10 |
+
from docling_serve.datamodel.responses import (
|
| 11 |
+
MessageKind,
|
| 12 |
+
TaskStatusResponse,
|
| 13 |
+
WebsocketMessage,
|
| 14 |
+
)
|
| 15 |
+
from docling_serve.engines.async_local.worker import AsyncLocalWorker
|
| 16 |
+
from docling_serve.engines.base_orchestrator import BaseOrchestrator
|
| 17 |
+
from docling_serve.settings import docling_serve_settings
|
| 18 |
+
|
| 19 |
+
_log = logging.getLogger(__name__)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class OrchestratorError(Exception):
|
| 23 |
+
pass
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class TaskNotFoundError(OrchestratorError):
|
| 27 |
+
pass
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class AsyncLocalOrchestrator(BaseOrchestrator):
|
| 31 |
+
def __init__(self):
|
| 32 |
+
self.task_queue = asyncio.Queue()
|
| 33 |
+
self.tasks: Dict[str, Task] = {}
|
| 34 |
+
self.queue_list: List[str] = []
|
| 35 |
+
self.task_subscribers: Dict[str, Set[WebSocket]] = {}
|
| 36 |
+
|
| 37 |
+
async def enqueue(self, request: ConvertDocumentsRequest) -> Task:
|
| 38 |
+
task_id = str(uuid.uuid4())
|
| 39 |
+
task = Task(task_id=task_id, request=request)
|
| 40 |
+
self.tasks[task_id] = task
|
| 41 |
+
self.queue_list.append(task_id)
|
| 42 |
+
self.task_subscribers[task_id] = set()
|
| 43 |
+
await self.task_queue.put(task_id)
|
| 44 |
+
return task
|
| 45 |
+
|
| 46 |
+
async def queue_size(self) -> int:
|
| 47 |
+
return self.task_queue.qsize()
|
| 48 |
+
|
| 49 |
+
async def get_queue_position(self, task_id: str) -> Optional[int]:
|
| 50 |
+
return (
|
| 51 |
+
self.queue_list.index(task_id) + 1 if task_id in self.queue_list else None
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
async def task_status(self, task_id: str, wait: float = 0.0) -> Task:
|
| 55 |
+
if task_id not in self.tasks:
|
| 56 |
+
raise TaskNotFoundError()
|
| 57 |
+
return self.tasks[task_id]
|
| 58 |
+
|
| 59 |
+
async def task_result(self, task_id: str):
|
| 60 |
+
if task_id not in self.tasks:
|
| 61 |
+
raise TaskNotFoundError()
|
| 62 |
+
return self.tasks[task_id].result
|
| 63 |
+
|
| 64 |
+
async def process_queue(self):
|
| 65 |
+
# Create a pool of workers
|
| 66 |
+
workers = []
|
| 67 |
+
for i in range(docling_serve_settings.eng_loc_num_workers):
|
| 68 |
+
_log.debug(f"Starting worker {i}")
|
| 69 |
+
w = AsyncLocalWorker(i, self)
|
| 70 |
+
worker_task = asyncio.create_task(w.loop())
|
| 71 |
+
workers.append(worker_task)
|
| 72 |
+
|
| 73 |
+
# Wait for all workers to complete (they won't, as they run indefinitely)
|
| 74 |
+
await asyncio.gather(*workers)
|
| 75 |
+
_log.debug("All workers completed.")
|
| 76 |
+
|
| 77 |
+
async def notify_task_subscribers(self, task_id: str):
|
| 78 |
+
if task_id not in self.task_subscribers:
|
| 79 |
+
raise RuntimeError(f"Task {task_id} does not have a subscribers list.")
|
| 80 |
+
|
| 81 |
+
task = self.tasks[task_id]
|
| 82 |
+
task_queue_position = await self.get_queue_position(task_id)
|
| 83 |
+
msg = TaskStatusResponse(
|
| 84 |
+
task_id=task.task_id,
|
| 85 |
+
task_status=task.task_status,
|
| 86 |
+
task_position=task_queue_position,
|
| 87 |
+
)
|
| 88 |
+
for websocket in self.task_subscribers[task_id]:
|
| 89 |
+
await websocket.send_text(
|
| 90 |
+
WebsocketMessage(message=MessageKind.UPDATE, task=msg).model_dump_json()
|
| 91 |
+
)
|
| 92 |
+
if task.is_completed():
|
| 93 |
+
await websocket.close()
|
| 94 |
+
|
| 95 |
+
async def notify_queue_positions(self):
|
| 96 |
+
for task_id in self.task_subscribers.keys():
|
| 97 |
+
# notify only pending tasks
|
| 98 |
+
if self.tasks[task_id].task_status != TaskStatus.PENDING:
|
| 99 |
+
continue
|
| 100 |
+
|
| 101 |
+
await self.notify_task_subscribers(task_id)
|
docling_serve/engines/async_local/worker.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import logging
|
| 3 |
+
import time
|
| 4 |
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
| 5 |
+
|
| 6 |
+
from fastapi import BackgroundTasks
|
| 7 |
+
|
| 8 |
+
from docling.datamodel.base_models import DocumentStream
|
| 9 |
+
|
| 10 |
+
from docling_serve.datamodel.engines import TaskStatus
|
| 11 |
+
from docling_serve.datamodel.requests import ConvertDocumentFileSourcesRequest
|
| 12 |
+
from docling_serve.datamodel.responses import ConvertDocumentResponse
|
| 13 |
+
from docling_serve.docling_conversion import convert_documents
|
| 14 |
+
from docling_serve.response_preparation import process_results
|
| 15 |
+
|
| 16 |
+
if TYPE_CHECKING:
|
| 17 |
+
from docling_serve.engines.async_local.orchestrator import AsyncLocalOrchestrator
|
| 18 |
+
|
| 19 |
+
_log = logging.getLogger(__name__)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class AsyncLocalWorker:
|
| 23 |
+
def __init__(self, worker_id: int, orchestrator: "AsyncLocalOrchestrator"):
|
| 24 |
+
self.worker_id = worker_id
|
| 25 |
+
self.orchestrator = orchestrator
|
| 26 |
+
|
| 27 |
+
async def loop(self):
|
| 28 |
+
_log.debug(f"Starting loop for worker {self.worker_id}")
|
| 29 |
+
while True:
|
| 30 |
+
task_id: str = await self.orchestrator.task_queue.get()
|
| 31 |
+
self.orchestrator.queue_list.remove(task_id)
|
| 32 |
+
|
| 33 |
+
if task_id not in self.orchestrator.tasks:
|
| 34 |
+
raise RuntimeError(f"Task {task_id} not found.")
|
| 35 |
+
task = self.orchestrator.tasks[task_id]
|
| 36 |
+
|
| 37 |
+
try:
|
| 38 |
+
task.task_status = TaskStatus.STARTED
|
| 39 |
+
_log.info(f"Worker {self.worker_id} processing task {task_id}")
|
| 40 |
+
|
| 41 |
+
# Notify clients about task updates
|
| 42 |
+
await self.orchestrator.notify_task_subscribers(task_id)
|
| 43 |
+
|
| 44 |
+
# Notify clients about queue updates
|
| 45 |
+
await self.orchestrator.notify_queue_positions()
|
| 46 |
+
|
| 47 |
+
# Get the current event loop
|
| 48 |
+
asyncio.get_event_loop()
|
| 49 |
+
|
| 50 |
+
# Define a callback function to send progress updates to the client.
|
| 51 |
+
# TODO: send partial updates, e.g. when a document in the batch is done
|
| 52 |
+
def run_conversion():
|
| 53 |
+
sources: List[Union[str, DocumentStream]] = []
|
| 54 |
+
headers: Optional[Dict[str, Any]] = None
|
| 55 |
+
if isinstance(task.request, ConvertDocumentFileSourcesRequest):
|
| 56 |
+
for file_source in task.request.file_sources:
|
| 57 |
+
sources.append(file_source.to_document_stream())
|
| 58 |
+
else:
|
| 59 |
+
for http_source in task.request.http_sources:
|
| 60 |
+
sources.append(http_source.url)
|
| 61 |
+
if headers is None and http_source.headers:
|
| 62 |
+
headers = http_source.headers
|
| 63 |
+
|
| 64 |
+
# Note: results are only an iterator->lazy evaluation
|
| 65 |
+
results = convert_documents(
|
| 66 |
+
sources=sources,
|
| 67 |
+
options=task.request.options,
|
| 68 |
+
headers=headers,
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
# The real processing will happen here
|
| 72 |
+
response = process_results(
|
| 73 |
+
background_tasks=BackgroundTasks(),
|
| 74 |
+
conversion_options=task.request.options,
|
| 75 |
+
conv_results=results,
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
return response
|
| 79 |
+
|
| 80 |
+
# Run the prediction in a thread to avoid blocking the event loop.
|
| 81 |
+
start_time = time.monotonic()
|
| 82 |
+
# future = asyncio.run_coroutine_threadsafe(
|
| 83 |
+
# run_conversion(),
|
| 84 |
+
# loop=loop
|
| 85 |
+
# )
|
| 86 |
+
# response = future.result()
|
| 87 |
+
|
| 88 |
+
response = await asyncio.to_thread(
|
| 89 |
+
run_conversion,
|
| 90 |
+
)
|
| 91 |
+
processing_time = time.monotonic() - start_time
|
| 92 |
+
|
| 93 |
+
if not isinstance(response, ConvertDocumentResponse):
|
| 94 |
+
_log.error(
|
| 95 |
+
f"Worker {self.worker_id} got un-processable "
|
| 96 |
+
"result for {task_id}: {type(response)}"
|
| 97 |
+
)
|
| 98 |
+
task.result = response
|
| 99 |
+
task.request = None
|
| 100 |
+
|
| 101 |
+
task.task_status = TaskStatus.SUCCESS
|
| 102 |
+
_log.info(
|
| 103 |
+
f"Worker {self.worker_id} completed job {task_id} "
|
| 104 |
+
f"in {processing_time:.2f} seconds"
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
except Exception as e:
|
| 108 |
+
_log.error(
|
| 109 |
+
f"Worker {self.worker_id} failed to process job {task_id}: {e}"
|
| 110 |
+
)
|
| 111 |
+
task.task_status = TaskStatus.FAILURE
|
| 112 |
+
|
| 113 |
+
finally:
|
| 114 |
+
await self.orchestrator.notify_task_subscribers(task_id)
|
| 115 |
+
self.orchestrator.task_queue.task_done()
|
| 116 |
+
_log.debug(f"Worker {self.worker_id} completely done with {task_id}")
|
docling_serve/engines/base_orchestrator.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from abc import ABC, abstractmethod
|
| 2 |
+
|
| 3 |
+
from docling_serve.datamodel.engines import Task
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class BaseOrchestrator(ABC):
|
| 7 |
+
@abstractmethod
|
| 8 |
+
async def enqueue(self, task) -> Task:
|
| 9 |
+
pass
|
| 10 |
+
|
| 11 |
+
@abstractmethod
|
| 12 |
+
async def queue_size(self) -> int:
|
| 13 |
+
pass
|
| 14 |
+
|
| 15 |
+
@abstractmethod
|
| 16 |
+
async def task_status(self, task_id: str) -> Task:
|
| 17 |
+
pass
|
| 18 |
+
|
| 19 |
+
@abstractmethod
|
| 20 |
+
async def task_result(self, task_id: str):
|
| 21 |
+
pass
|
docling_serve/engines/block_local/__init__.py
ADDED
|
File without changes
|
docling_serve/gradio_ui.py
CHANGED
|
@@ -333,7 +333,6 @@ with gr.Blocks(
|
|
| 333 |
title="Docling Serve",
|
| 334 |
delete_cache=(3600, 3600), # Delete all files older than 1 hour every hour
|
| 335 |
) as ui:
|
| 336 |
-
|
| 337 |
# Constants stored in states to be able to pass them as inputs to functions
|
| 338 |
processing_text = gr.State("Processing your document(s), please wait...")
|
| 339 |
true_bool = gr.State(True)
|
|
@@ -593,9 +592,7 @@ with gr.Blocks(
|
|
| 593 |
set_outputs_visibility_direct,
|
| 594 |
inputs=[false_bool, false_bool],
|
| 595 |
outputs=[content_output, file_output],
|
| 596 |
-
).then(
|
| 597 |
-
clear_url_input, inputs=None, outputs=[url_input]
|
| 598 |
-
)
|
| 599 |
|
| 600 |
# File processing
|
| 601 |
file_process_btn.click(
|
|
@@ -664,6 +661,4 @@ with gr.Blocks(
|
|
| 664 |
set_outputs_visibility_direct,
|
| 665 |
inputs=[false_bool, false_bool],
|
| 666 |
outputs=[content_output, file_output],
|
| 667 |
-
).then(
|
| 668 |
-
clear_file_input, inputs=None, outputs=[file_input]
|
| 669 |
-
)
|
|
|
|
| 333 |
title="Docling Serve",
|
| 334 |
delete_cache=(3600, 3600), # Delete all files older than 1 hour every hour
|
| 335 |
) as ui:
|
|
|
|
| 336 |
# Constants stored in states to be able to pass them as inputs to functions
|
| 337 |
processing_text = gr.State("Processing your document(s), please wait...")
|
| 338 |
true_bool = gr.State(True)
|
|
|
|
| 592 |
set_outputs_visibility_direct,
|
| 593 |
inputs=[false_bool, false_bool],
|
| 594 |
outputs=[content_output, file_output],
|
| 595 |
+
).then(clear_url_input, inputs=None, outputs=[url_input])
|
|
|
|
|
|
|
| 596 |
|
| 597 |
# File processing
|
| 598 |
file_process_btn.click(
|
|
|
|
| 661 |
set_outputs_visibility_direct,
|
| 662 |
inputs=[false_bool, false_bool],
|
| 663 |
outputs=[content_output, file_output],
|
| 664 |
+
).then(clear_file_input, inputs=None, outputs=[file_input])
|
|
|
|
|
|
docling_serve/response_preparation.py
CHANGED
|
@@ -4,40 +4,19 @@ import shutil
|
|
| 4 |
import tempfile
|
| 5 |
import time
|
| 6 |
from pathlib import Path
|
| 7 |
-
from typing import
|
| 8 |
|
| 9 |
-
from docling.datamodel.base_models import OutputFormat
|
| 10 |
-
from docling.datamodel.document import ConversionResult, ConversionStatus, ErrorItem
|
| 11 |
-
from docling.utils.profiling import ProfilingItem
|
| 12 |
-
from docling_core.types.doc import DoclingDocument, ImageRefMode
|
| 13 |
from fastapi import BackgroundTasks, HTTPException
|
| 14 |
from fastapi.responses import FileResponse
|
| 15 |
-
from pydantic import BaseModel
|
| 16 |
-
|
| 17 |
-
from docling_serve.docling_conversion import ConvertDocumentsOptions
|
| 18 |
-
|
| 19 |
-
_log = logging.getLogger(__name__)
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
class DocumentResponse(BaseModel):
|
| 23 |
-
filename: str
|
| 24 |
-
md_content: Optional[str] = None
|
| 25 |
-
json_content: Optional[DoclingDocument] = None
|
| 26 |
-
html_content: Optional[str] = None
|
| 27 |
-
text_content: Optional[str] = None
|
| 28 |
-
doctags_content: Optional[str] = None
|
| 29 |
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
status: ConversionStatus
|
| 34 |
-
errors: List[ErrorItem] = []
|
| 35 |
-
processing_time: float
|
| 36 |
-
timings: Dict[str, ProfilingItem] = {}
|
| 37 |
-
|
| 38 |
|
| 39 |
-
|
| 40 |
-
status: ConversionStatus
|
| 41 |
|
| 42 |
|
| 43 |
def _export_document_as_content(
|
|
@@ -49,7 +28,6 @@ def _export_document_as_content(
|
|
| 49 |
export_doctags: bool,
|
| 50 |
image_mode: ImageRefMode,
|
| 51 |
):
|
| 52 |
-
|
| 53 |
document = DocumentResponse(filename=conv_res.input.file.name)
|
| 54 |
|
| 55 |
if conv_res.status == ConversionStatus.SUCCESS:
|
|
@@ -86,7 +64,6 @@ def _export_documents_as_files(
|
|
| 86 |
export_doctags: bool,
|
| 87 |
image_export_mode: ImageRefMode,
|
| 88 |
):
|
| 89 |
-
|
| 90 |
success_count = 0
|
| 91 |
failure_count = 0
|
| 92 |
|
|
@@ -150,7 +127,6 @@ def process_results(
|
|
| 150 |
conversion_options: ConvertDocumentsOptions,
|
| 151 |
conv_results: Iterable[ConversionResult],
|
| 152 |
) -> Union[ConvertDocumentResponse, FileResponse]:
|
| 153 |
-
|
| 154 |
# Let's start by processing the documents
|
| 155 |
try:
|
| 156 |
start_time = time.monotonic()
|
|
|
|
| 4 |
import tempfile
|
| 5 |
import time
|
| 6 |
from pathlib import Path
|
| 7 |
+
from typing import Iterable, Union
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
from fastapi import BackgroundTasks, HTTPException
|
| 10 |
from fastapi.responses import FileResponse
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
+
from docling.datamodel.base_models import OutputFormat
|
| 13 |
+
from docling.datamodel.document import ConversionResult, ConversionStatus
|
| 14 |
+
from docling_core.types.doc import ImageRefMode
|
| 15 |
|
| 16 |
+
from docling_serve.datamodel.convert import ConvertDocumentsOptions
|
| 17 |
+
from docling_serve.datamodel.responses import ConvertDocumentResponse, DocumentResponse
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
+
_log = logging.getLogger(__name__)
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
def _export_document_as_content(
|
|
|
|
| 28 |
export_doctags: bool,
|
| 29 |
image_mode: ImageRefMode,
|
| 30 |
):
|
|
|
|
| 31 |
document = DocumentResponse(filename=conv_res.input.file.name)
|
| 32 |
|
| 33 |
if conv_res.status == ConversionStatus.SUCCESS:
|
|
|
|
| 64 |
export_doctags: bool,
|
| 65 |
image_export_mode: ImageRefMode,
|
| 66 |
):
|
|
|
|
| 67 |
success_count = 0
|
| 68 |
failure_count = 0
|
| 69 |
|
|
|
|
| 127 |
conversion_options: ConvertDocumentsOptions,
|
| 128 |
conv_results: Iterable[ConversionResult],
|
| 129 |
) -> Union[ConvertDocumentResponse, FileResponse]:
|
|
|
|
| 130 |
# Let's start by processing the documents
|
| 131 |
try:
|
| 132 |
start_time = time.monotonic()
|
docling_serve/settings.py
CHANGED
|
@@ -3,6 +3,8 @@ from typing import Optional, Union
|
|
| 3 |
|
| 4 |
from pydantic_settings import BaseSettings, SettingsConfigDict
|
| 5 |
|
|
|
|
|
|
|
| 6 |
|
| 7 |
class UvicornSettings(BaseSettings):
|
| 8 |
model_config = SettingsConfigDict(
|
|
@@ -28,6 +30,9 @@ class DoclingServeSettings(BaseSettings):
|
|
| 28 |
enable_ui: bool = False
|
| 29 |
artifacts_path: Optional[Path] = None
|
| 30 |
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
uvicorn_settings = UvicornSettings()
|
| 33 |
docling_serve_settings = DoclingServeSettings()
|
|
|
|
| 3 |
|
| 4 |
from pydantic_settings import BaseSettings, SettingsConfigDict
|
| 5 |
|
| 6 |
+
from docling_serve.datamodel.engines import AsyncEngine
|
| 7 |
+
|
| 8 |
|
| 9 |
class UvicornSettings(BaseSettings):
|
| 10 |
model_config = SettingsConfigDict(
|
|
|
|
| 30 |
enable_ui: bool = False
|
| 31 |
artifacts_path: Optional[Path] = None
|
| 32 |
|
| 33 |
+
eng_kind: AsyncEngine = AsyncEngine.LOCAL
|
| 34 |
+
eng_loc_num_workers: int = 2
|
| 35 |
+
|
| 36 |
|
| 37 |
uvicorn_settings = UvicornSettings()
|
| 38 |
docling_serve_settings = DoclingServeSettings()
|
pyproject.toml
CHANGED
|
@@ -30,7 +30,7 @@ classifiers = [
|
|
| 30 |
]
|
| 31 |
requires-python = ">=3.10"
|
| 32 |
dependencies = [
|
| 33 |
-
"docling~=2.
|
| 34 |
"fastapi[standard]~=0.115",
|
| 35 |
"httpx~=0.28",
|
| 36 |
"pydantic~=2.10",
|
|
@@ -38,6 +38,7 @@ dependencies = [
|
|
| 38 |
"python-multipart>=0.0.14,<0.1.0",
|
| 39 |
"typer~=0.12",
|
| 40 |
"uvicorn[standard]>=0.29.0,<1.0.0",
|
|
|
|
| 41 |
]
|
| 42 |
|
| 43 |
[project.optional-dependencies]
|
|
@@ -164,9 +165,19 @@ ignore = [
|
|
| 164 |
[tool.ruff.lint.mccabe]
|
| 165 |
max-complexity = 15
|
| 166 |
|
|
|
|
|
|
|
|
|
|
| 167 |
[tool.ruff.lint.isort]
|
| 168 |
combine-as-imports = true
|
| 169 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
|
| 171 |
[tool.mypy]
|
| 172 |
pretty = true
|
|
@@ -180,10 +191,6 @@ module = [
|
|
| 180 |
"easyocr.*",
|
| 181 |
"tesserocr.*",
|
| 182 |
"rapidocr_onnxruntime.*",
|
| 183 |
-
"docling_conversion.*",
|
| 184 |
-
"gradio_ui.*",
|
| 185 |
-
"response_preparation.*",
|
| 186 |
-
"helper_functions.*",
|
| 187 |
"requests.*",
|
| 188 |
]
|
| 189 |
ignore_missing_imports = true
|
|
|
|
| 30 |
]
|
| 31 |
requires-python = ">=3.10"
|
| 32 |
dependencies = [
|
| 33 |
+
"docling~=2.25.1",
|
| 34 |
"fastapi[standard]~=0.115",
|
| 35 |
"httpx~=0.28",
|
| 36 |
"pydantic~=2.10",
|
|
|
|
| 38 |
"python-multipart>=0.0.14,<0.1.0",
|
| 39 |
"typer~=0.12",
|
| 40 |
"uvicorn[standard]>=0.29.0,<1.0.0",
|
| 41 |
+
"websockets~=14.0",
|
| 42 |
]
|
| 43 |
|
| 44 |
[project.optional-dependencies]
|
|
|
|
| 165 |
[tool.ruff.lint.mccabe]
|
| 166 |
max-complexity = 15
|
| 167 |
|
| 168 |
+
[tool.ruff.lint.isort.sections]
|
| 169 |
+
"docling" = ["docling", "docling_core"]
|
| 170 |
+
|
| 171 |
[tool.ruff.lint.isort]
|
| 172 |
combine-as-imports = true
|
| 173 |
+
section-order = [
|
| 174 |
+
"future",
|
| 175 |
+
"standard-library",
|
| 176 |
+
"third-party",
|
| 177 |
+
"docling",
|
| 178 |
+
"first-party",
|
| 179 |
+
"local-folder",
|
| 180 |
+
]
|
| 181 |
|
| 182 |
[tool.mypy]
|
| 183 |
pretty = true
|
|
|
|
| 191 |
"easyocr.*",
|
| 192 |
"tesserocr.*",
|
| 193 |
"rapidocr_onnxruntime.*",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
"requests.*",
|
| 195 |
]
|
| 196 |
ignore_missing_imports = true
|
tests/test_1-file-all-outputs.py
CHANGED
|
@@ -89,7 +89,7 @@ async def test_convert_file(async_client):
|
|
| 89 |
check.is_in(
|
| 90 |
'{"schema_name": "DoclingDocument"',
|
| 91 |
json.dumps(data["document"]["json_content"]),
|
| 92 |
-
msg=f
|
| 93 |
)
|
| 94 |
# HTML check
|
| 95 |
check.is_in(
|
|
|
|
| 89 |
check.is_in(
|
| 90 |
'{"schema_name": "DoclingDocument"',
|
| 91 |
json.dumps(data["document"]["json_content"]),
|
| 92 |
+
msg=f'JSON document should contain \'{{\\n "schema_name": "DoclingDocument\'". Received: {safe_slice(data["document"]["json_content"])}',
|
| 93 |
)
|
| 94 |
# HTML check
|
| 95 |
check.is_in(
|
tests/test_1-url-all-outputs.py
CHANGED
|
@@ -83,7 +83,7 @@ async def test_convert_url(async_client):
|
|
| 83 |
check.is_in(
|
| 84 |
'{"schema_name": "DoclingDocument"',
|
| 85 |
json.dumps(data["document"]["json_content"]),
|
| 86 |
-
msg=f
|
| 87 |
)
|
| 88 |
# HTML check
|
| 89 |
check.is_in(
|
|
|
|
| 83 |
check.is_in(
|
| 84 |
'{"schema_name": "DoclingDocument"',
|
| 85 |
json.dumps(data["document"]["json_content"]),
|
| 86 |
+
msg=f'JSON document should contain \'{{\\n "schema_name": "DoclingDocument\'". Received: {safe_slice(data["document"]["json_content"])}',
|
| 87 |
)
|
| 88 |
# HTML check
|
| 89 |
check.is_in(
|
tests/test_1-url-async-ws.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import base64
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
|
| 4 |
+
import httpx
|
| 5 |
+
import pytest
|
| 6 |
+
import pytest_asyncio
|
| 7 |
+
from websockets.sync.client import connect
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
@pytest_asyncio.fixture
|
| 11 |
+
async def async_client():
|
| 12 |
+
async with httpx.AsyncClient(timeout=60.0) as client:
|
| 13 |
+
yield client
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@pytest.mark.asyncio
|
| 17 |
+
async def test_convert_url(async_client: httpx.AsyncClient):
|
| 18 |
+
"""Test convert URL to all outputs"""
|
| 19 |
+
|
| 20 |
+
doc_filename = Path("tests/2408.09869v5.pdf")
|
| 21 |
+
encoded_doc = base64.b64encode(doc_filename.read_bytes()).decode()
|
| 22 |
+
|
| 23 |
+
base_url = "http://localhost:5001/v1alpha"
|
| 24 |
+
payload = {
|
| 25 |
+
"options": {
|
| 26 |
+
"to_formats": ["md", "json"],
|
| 27 |
+
"image_export_mode": "placeholder",
|
| 28 |
+
"ocr": True,
|
| 29 |
+
"abort_on_error": False,
|
| 30 |
+
"return_as_file": False,
|
| 31 |
+
},
|
| 32 |
+
# "http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}],
|
| 33 |
+
"file_sources": [{"base64_string": encoded_doc, "filename": doc_filename.name}],
|
| 34 |
+
}
|
| 35 |
+
# print(json.dumps(payload, indent=2))
|
| 36 |
+
|
| 37 |
+
for n in range(5):
|
| 38 |
+
response = await async_client.post(
|
| 39 |
+
f"{base_url}/convert/source/async", json=payload
|
| 40 |
+
)
|
| 41 |
+
assert response.status_code == 200, "Response should be 200 OK"
|
| 42 |
+
|
| 43 |
+
task = response.json()
|
| 44 |
+
|
| 45 |
+
uri = f"ws://localhost:5001/v1alpha/status/ws/{task['task_id']}"
|
| 46 |
+
with connect(uri) as websocket:
|
| 47 |
+
for message in websocket:
|
| 48 |
+
print(message)
|
tests/test_1-url-async.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import random
|
| 3 |
+
import time
|
| 4 |
+
|
| 5 |
+
import httpx
|
| 6 |
+
import pytest
|
| 7 |
+
import pytest_asyncio
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
@pytest_asyncio.fixture
|
| 11 |
+
async def async_client():
|
| 12 |
+
async with httpx.AsyncClient(timeout=60.0) as client:
|
| 13 |
+
yield client
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@pytest.mark.asyncio
|
| 17 |
+
async def test_convert_url(async_client):
|
| 18 |
+
"""Test convert URL to all outputs"""
|
| 19 |
+
|
| 20 |
+
example_docs = [
|
| 21 |
+
"https://arxiv.org/pdf/2411.19710",
|
| 22 |
+
"https://arxiv.org/pdf/2501.17887",
|
| 23 |
+
"https://www.nature.com/articles/s41467-024-50779-y.pdf",
|
| 24 |
+
"https://arxiv.org/pdf/2306.12802",
|
| 25 |
+
"https://arxiv.org/pdf/2311.18481",
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
base_url = "http://localhost:5001/v1alpha"
|
| 29 |
+
payload = {
|
| 30 |
+
"options": {
|
| 31 |
+
"to_formats": ["md", "json"],
|
| 32 |
+
"image_export_mode": "placeholder",
|
| 33 |
+
"ocr": True,
|
| 34 |
+
"abort_on_error": False,
|
| 35 |
+
"return_as_file": False,
|
| 36 |
+
},
|
| 37 |
+
"http_sources": [{"url": random.choice(example_docs)}],
|
| 38 |
+
}
|
| 39 |
+
print(json.dumps(payload, indent=2))
|
| 40 |
+
|
| 41 |
+
for n in range(5):
|
| 42 |
+
response = await async_client.post(
|
| 43 |
+
f"{base_url}/convert/source/async", json=payload
|
| 44 |
+
)
|
| 45 |
+
assert response.status_code == 200, "Response should be 200 OK"
|
| 46 |
+
|
| 47 |
+
task = response.json()
|
| 48 |
+
|
| 49 |
+
print(json.dumps(task, indent=2))
|
| 50 |
+
|
| 51 |
+
while task["task_status"] not in ("success", "failure"):
|
| 52 |
+
response = await async_client.get(f"{base_url}/status/poll/{task['task_id']}")
|
| 53 |
+
assert response.status_code == 200, "Response should be 200 OK"
|
| 54 |
+
task = response.json()
|
| 55 |
+
print(f"{task['task_status']=}")
|
| 56 |
+
print(f"{task['task_position']=}")
|
| 57 |
+
|
| 58 |
+
time.sleep(2)
|
| 59 |
+
|
| 60 |
+
assert task["task_status"] == "success"
|
tests/test_2-files-all-outputs.py
CHANGED
|
@@ -57,18 +57,18 @@ async def test_convert_file(async_client):
|
|
| 57 |
content_disposition = response.headers.get("content-disposition")
|
| 58 |
|
| 59 |
with check:
|
| 60 |
-
assert (
|
| 61 |
-
|
| 62 |
-
)
|
| 63 |
with check:
|
| 64 |
assert "attachment" in content_disposition, "Response should be an attachment"
|
| 65 |
with check:
|
| 66 |
-
assert (
|
| 67 |
-
'
|
| 68 |
-
)
|
| 69 |
|
| 70 |
content_type = response.headers.get("content-type")
|
| 71 |
with check:
|
| 72 |
-
assert (
|
| 73 |
-
|
| 74 |
-
)
|
|
|
|
| 57 |
content_disposition = response.headers.get("content-disposition")
|
| 58 |
|
| 59 |
with check:
|
| 60 |
+
assert content_disposition is not None, (
|
| 61 |
+
"Content-Disposition header should be present"
|
| 62 |
+
)
|
| 63 |
with check:
|
| 64 |
assert "attachment" in content_disposition, "Response should be an attachment"
|
| 65 |
with check:
|
| 66 |
+
assert 'filename="converted_docs.zip"' in content_disposition, (
|
| 67 |
+
"Attachment filename should be 'converted_docs.zip'"
|
| 68 |
+
)
|
| 69 |
|
| 70 |
content_type = response.headers.get("content-type")
|
| 71 |
with check:
|
| 72 |
+
assert content_type == "application/zip", (
|
| 73 |
+
"Content-Type should be 'application/zip'"
|
| 74 |
+
)
|
tests/test_2-urls-all-outputs.py
CHANGED
|
@@ -50,18 +50,18 @@ async def test_convert_url(async_client):
|
|
| 50 |
content_disposition = response.headers.get("content-disposition")
|
| 51 |
|
| 52 |
with check:
|
| 53 |
-
assert (
|
| 54 |
-
|
| 55 |
-
)
|
| 56 |
with check:
|
| 57 |
assert "attachment" in content_disposition, "Response should be an attachment"
|
| 58 |
with check:
|
| 59 |
-
assert (
|
| 60 |
-
'
|
| 61 |
-
)
|
| 62 |
|
| 63 |
content_type = response.headers.get("content-type")
|
| 64 |
with check:
|
| 65 |
-
assert (
|
| 66 |
-
|
| 67 |
-
)
|
|
|
|
| 50 |
content_disposition = response.headers.get("content-disposition")
|
| 51 |
|
| 52 |
with check:
|
| 53 |
+
assert content_disposition is not None, (
|
| 54 |
+
"Content-Disposition header should be present"
|
| 55 |
+
)
|
| 56 |
with check:
|
| 57 |
assert "attachment" in content_disposition, "Response should be an attachment"
|
| 58 |
with check:
|
| 59 |
+
assert 'filename="converted_docs.zip"' in content_disposition, (
|
| 60 |
+
"Attachment filename should be 'converted_docs.zip'"
|
| 61 |
+
)
|
| 62 |
|
| 63 |
content_type = response.headers.get("content-type")
|
| 64 |
with check:
|
| 65 |
+
assert content_type == "application/zip", (
|
| 66 |
+
"Content-Type should be 'application/zip'"
|
| 67 |
+
)
|
uv.lock
CHANGED
|
@@ -349,38 +349,6 @@ wheels = [
|
|
| 349 |
{ url = "https://files.pythonhosted.org/packages/2e/38/3fd83c4690dc7d753a442a284b3826ea5e5c380a411443c66421cd823898/cryptography-44.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d9c5b9f698a83c8bd71e0f4d3f9f839ef244798e5ffe96febfa9714717db7af7", size = 3134657 },
|
| 350 |
]
|
| 351 |
|
| 352 |
-
[[package]]
|
| 353 |
-
name = "deepsearch-glm"
|
| 354 |
-
version = "1.0.0"
|
| 355 |
-
source = { registry = "https://pypi.org/simple" }
|
| 356 |
-
dependencies = [
|
| 357 |
-
{ name = "pywin32", marker = "sys_platform == 'win32' or (extra == 'extra-13-docling-serve-cpu' and extra == 'extra-13-docling-serve-cu124')" },
|
| 358 |
-
]
|
| 359 |
-
sdist = { url = "https://files.pythonhosted.org/packages/73/d5/a907234e57f5c4f6480c9ddbc3cdacc47f727c768e502be3d361719fac4e/deepsearch_glm-1.0.0.tar.gz", hash = "sha256:e8dce88ac519a693c260f28bd3c4ec409811e65ade84fb508f6c6e37ca065e62", size = 2401014 }
|
| 360 |
-
wheels = [
|
| 361 |
-
{ url = "https://files.pythonhosted.org/packages/40/65/4b2013784d5ed8d3664a2efa61f15600c8bf090766b0363c036d78aca550/deepsearch_glm-1.0.0-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:94792b57df7a1c4ba8b47ebd8f36ea0a090d4f27a4fba39bd7b166b6b537260a", size = 6303790 },
|
| 362 |
-
{ url = "https://files.pythonhosted.org/packages/45/2a/1e95260a712948a21b74dcb239032d9e612f7e1a273657008655749f4115/deepsearch_glm-1.0.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:ff46e352e96a2f56ce7ae4fdf04b271ee841c29ff159b1dec0e5ecaaadba8d4d", size = 5945851 },
|
| 363 |
-
{ url = "https://files.pythonhosted.org/packages/9e/1a/5c37a98f27644fd02bc447df651e8d5ce484cd6ce7cb178218625b4de5bc/deepsearch_glm-1.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d77d3d94d49641888aa15f3ad23e81158e791aa9d9608dd8168dc71788e56f3", size = 7431282 },
|
| 364 |
-
{ url = "https://files.pythonhosted.org/packages/e8/e2/56b5e7ae3ccc4d8ee758427c8c9a403c985e250a468c53538c269897bef2/deepsearch_glm-1.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:143de0fd111a570be12935d8799a2715fe1775d4dc4e256337860b429cee5d36", size = 7759571 },
|
| 365 |
-
{ url = "https://files.pythonhosted.org/packages/61/f4/e39a5090a2bf0d641449918865566ad5adabef156993a922bdbf4a3ebb60/deepsearch_glm-1.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:9f2872dd573cd2206ce7f9e2e6016c38b66d9ecbd983283ff5e8c6023813c311", size = 7904646 },
|
| 366 |
-
{ url = "https://files.pythonhosted.org/packages/41/f7/8e8dd9738554f97522b59b0a6d7680ccf2d527bd3471ec4aa4e52acf552a/deepsearch_glm-1.0.0-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:e64d94ff5209f0a11e8c75c6b28b033ef27b95a22c2fbcbd945e7fe8cc421545", size = 6309301 },
|
| 367 |
-
{ url = "https://files.pythonhosted.org/packages/17/37/4d8514d8ef851e44513a71f675a7ebb373f109aece38e324c7d444ced20c/deepsearch_glm-1.0.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:a5702205677b768b51f881d15d933370f6ef3c826dfac3b9aa0b904d2e6c495a", size = 5951522 },
|
| 368 |
-
{ url = "https://files.pythonhosted.org/packages/0c/c6/3680318e66df278fa7f0811dc862d6cb3c328ce168b4f36736eb77120b6d/deepsearch_glm-1.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0417a2ae998e1709f03458cfb9adb55423bb1328224eb055300796baa757879f", size = 7434315 },
|
| 369 |
-
{ url = "https://files.pythonhosted.org/packages/c3/cd/9ffb616d347d568f868f47585b3261c16e277aa7b37740e8720eee71c539/deepsearch_glm-1.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f0e1efe9af0d28e9b473fe599246deb3a0be7c3d546a478da284747144d086a", size = 7761264 },
|
| 370 |
-
{ url = "https://files.pythonhosted.org/packages/3d/d3/e5ebdda9cee8a1c846e6a960a0e5b97624aff2f248c2bc89ae490b9a1342/deepsearch_glm-1.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:807faf13eb0deea55a1951d479a85d5e20de0ff8b2e0b57b2f7939552759a426", size = 7908603 },
|
| 371 |
-
{ url = "https://files.pythonhosted.org/packages/60/ca/6adbadc979910b11594cd0242f1991942c22528eead431d47de064ac2860/deepsearch_glm-1.0.0-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:56d9575df9eceb8c2ae33e3d15e133924cc195714c3d268599b6f8414c1f6bb8", size = 6308715 },
|
| 372 |
-
{ url = "https://files.pythonhosted.org/packages/20/7c/bf1e9c458705c7143c6630cb6847554ad694d25dc6f1f038512b9c86160a/deepsearch_glm-1.0.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:51f5c6522f60ba73eb12eeb7217bd98d871ba7c078337a4059d05878d8baf2d6", size = 5949609 },
|
| 373 |
-
{ url = "https://files.pythonhosted.org/packages/21/b1/eb0cd0db50d05f2d7a510a77960e85e6caee727eb3d931ed0ec067917813/deepsearch_glm-1.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6211eaf497ad7cfcb68f80f9b5387940be0204fe149a9fc03988a95145f410a", size = 7433929 },
|
| 374 |
-
{ url = "https://files.pythonhosted.org/packages/3a/7e/2b7db77ff02fe9eec41f3605fcd72e3eb4e6b48561b344d432b417a75cfe/deepsearch_glm-1.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b003bf457fce61ea4de79e2d7d0228a1ae349f677eb6570e745f79d4429804f", size = 7760438 },
|
| 375 |
-
{ url = "https://files.pythonhosted.org/packages/ab/97/ffb2bb5d2432c7b0e9f3a3e6b5873fbcd6e19e82b620393bfb8e01bdecb1/deepsearch_glm-1.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:9d61f66048e6ab60fe9f84c823fd593bf8517755833bd9efb59156d77a2b42d0", size = 7907583 },
|
| 376 |
-
{ url = "https://files.pythonhosted.org/packages/38/06/08c5fd0e1144c2c8d76d06da1545a9cf589278a37f8b9e6235b5b416eb52/deepsearch_glm-1.0.0-cp313-cp313-macosx_13_0_x86_64.whl", hash = "sha256:7d558e8b365c27ee665d0589165fd074fb252c73715f9cc6aeb4304a63683f37", size = 6308867 },
|
| 377 |
-
{ url = "https://files.pythonhosted.org/packages/ba/fb/f5f9787876b67ce83d5afa4903901be9f8071530bc0706dc2228afc0b6c0/deepsearch_glm-1.0.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:3199093a9472e5756214b9b6563f827c19c001c7dd8ae00e03eed1140c12930d", size = 5949719 },
|
| 378 |
-
{ url = "https://files.pythonhosted.org/packages/83/0f/42b5a4aa798acbc6309d748435b006c489e58102b6cb2278e7b8f0194743/deepsearch_glm-1.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f18d1ee68a0479592e0c714e6cbf9e2d0fa8edd692d580da64431c84cbef5c2", size = 7434981 },
|
| 379 |
-
{ url = "https://files.pythonhosted.org/packages/17/6a/c2c4eaa4470b78dde6c03f055cbb09f3f7f15b8a6ff38f5bea5180339e6f/deepsearch_glm-1.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:62c1c0ea0a544219da15c017632f9e0be116ecdc335b865c6c5760429557fe23", size = 7760773 },
|
| 380 |
-
{ url = "https://files.pythonhosted.org/packages/01/0a/7c3cf75bad38a8d6ff3842b78b3263dd81ad4eaf1d859f4b8e1ab465cad5/deepsearch_glm-1.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:962f393dcec2204de1a5cb0f635c65258bde2424ad2d4e0f5df770139c3958de", size = 7908766 },
|
| 381 |
-
{ url = "https://files.pythonhosted.org/packages/1f/cd/e6507d924aa69e9647f917ed671e2d62e19e41d4f120a15fcbb583661667/deepsearch_glm-1.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:e2315cc4ffe7032dada294a0cd72a47dbc6c0121fd07d4b5719f9a9e9519d091", size = 14644989 },
|
| 382 |
-
]
|
| 383 |
-
|
| 384 |
[[package]]
|
| 385 |
name = "dill"
|
| 386 |
version = "0.3.9"
|
|
@@ -410,12 +378,11 @@ wheels = [
|
|
| 410 |
|
| 411 |
[[package]]
|
| 412 |
name = "docling"
|
| 413 |
-
version = "2.
|
| 414 |
source = { registry = "https://pypi.org/simple" }
|
| 415 |
dependencies = [
|
| 416 |
{ name = "beautifulsoup4" },
|
| 417 |
{ name = "certifi" },
|
| 418 |
-
{ name = "deepsearch-glm" },
|
| 419 |
{ name = "docling-core", extra = ["chunking"] },
|
| 420 |
{ name = "docling-ibm-models" },
|
| 421 |
{ name = "docling-parse" },
|
|
@@ -438,9 +405,9 @@ dependencies = [
|
|
| 438 |
{ name = "tqdm" },
|
| 439 |
{ name = "typer" },
|
| 440 |
]
|
| 441 |
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
| 442 |
wheels = [
|
| 443 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 444 |
]
|
| 445 |
|
| 446 |
[[package]]
|
|
@@ -472,14 +439,16 @@ chunking = [
|
|
| 472 |
|
| 473 |
[[package]]
|
| 474 |
name = "docling-ibm-models"
|
| 475 |
-
version = "3.
|
| 476 |
source = { registry = "https://pypi.org/simple" }
|
| 477 |
dependencies = [
|
|
|
|
| 478 |
{ name = "huggingface-hub" },
|
| 479 |
{ name = "jsonlines" },
|
| 480 |
{ name = "numpy" },
|
| 481 |
{ name = "opencv-python-headless" },
|
| 482 |
{ name = "pillow" },
|
|
|
|
| 483 |
{ name = "safetensors", extra = ["torch"] },
|
| 484 |
{ name = "torch", version = "2.6.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'x86_64' and sys_platform == 'darwin' and extra == 'extra-13-docling-serve-cpu') or (platform_machine == 'x86_64' and extra == 'extra-13-docling-serve-cpu' and extra == 'extra-13-docling-serve-cu124') or (sys_platform != 'darwin' and extra == 'extra-13-docling-serve-cpu' and extra == 'extra-13-docling-serve-cu124')" },
|
| 485 |
{ name = "torch", version = "2.6.0", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-docling-serve-cpu' and extra == 'extra-13-docling-serve-cu124') or (extra != 'extra-13-docling-serve-cpu' and extra != 'extra-13-docling-serve-cu124')" },
|
|
@@ -492,9 +461,9 @@ dependencies = [
|
|
| 492 |
{ name = "tqdm" },
|
| 493 |
{ name = "transformers" },
|
| 494 |
]
|
| 495 |
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
| 496 |
wheels = [
|
| 497 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 498 |
]
|
| 499 |
|
| 500 |
[[package]]
|
|
@@ -546,14 +515,15 @@ dependencies = [
|
|
| 546 |
{ name = "python-multipart" },
|
| 547 |
{ name = "typer" },
|
| 548 |
{ name = "uvicorn", extra = ["standard"] },
|
|
|
|
| 549 |
]
|
| 550 |
|
| 551 |
[package.optional-dependencies]
|
| 552 |
cpu = [
|
| 553 |
-
{ name = "torch", version = "2.6.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "
|
| 554 |
-
{ name = "torch", version = "2.6.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "
|
| 555 |
-
{ name = "torchvision", version = "0.21.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'x86_64' and sys_platform == 'darwin'
|
| 556 |
-
{ name = "torchvision", version = "0.21.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'darwin'
|
| 557 |
]
|
| 558 |
cu124 = [
|
| 559 |
{ name = "torch", version = "2.6.0+cu124", source = { registry = "https://download.pytorch.org/whl/cu124" } },
|
|
@@ -583,7 +553,7 @@ dev = [
|
|
| 583 |
|
| 584 |
[package.metadata]
|
| 585 |
requires-dist = [
|
| 586 |
-
{ name = "docling", specifier = "~=2.
|
| 587 |
{ name = "fastapi", extras = ["standard"], specifier = "~=0.115" },
|
| 588 |
{ name = "gradio", marker = "extra == 'ui'", specifier = "~=5.9" },
|
| 589 |
{ name = "httpx", specifier = "~=0.28" },
|
|
@@ -599,6 +569,7 @@ requires-dist = [
|
|
| 599 |
{ name = "torchvision", marker = "extra == 'cu124'", specifier = ">=0.21.0", index = "https://download.pytorch.org/whl/cu124", conflict = { package = "docling-serve", extra = "cu124" } },
|
| 600 |
{ name = "typer", specifier = "~=0.12" },
|
| 601 |
{ name = "uvicorn", extras = ["standard"], specifier = ">=0.29.0,<1.0.0" },
|
|
|
|
| 602 |
]
|
| 603 |
provides-extras = ["ui", "tesserocr", "rapidocr", "cpu", "cu124"]
|
| 604 |
|
|
|
|
| 349 |
{ url = "https://files.pythonhosted.org/packages/2e/38/3fd83c4690dc7d753a442a284b3826ea5e5c380a411443c66421cd823898/cryptography-44.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d9c5b9f698a83c8bd71e0f4d3f9f839ef244798e5ffe96febfa9714717db7af7", size = 3134657 },
|
| 350 |
]
|
| 351 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
[[package]]
|
| 353 |
name = "dill"
|
| 354 |
version = "0.3.9"
|
|
|
|
| 378 |
|
| 379 |
[[package]]
|
| 380 |
name = "docling"
|
| 381 |
+
version = "2.25.1"
|
| 382 |
source = { registry = "https://pypi.org/simple" }
|
| 383 |
dependencies = [
|
| 384 |
{ name = "beautifulsoup4" },
|
| 385 |
{ name = "certifi" },
|
|
|
|
| 386 |
{ name = "docling-core", extra = ["chunking"] },
|
| 387 |
{ name = "docling-ibm-models" },
|
| 388 |
{ name = "docling-parse" },
|
|
|
|
| 405 |
{ name = "tqdm" },
|
| 406 |
{ name = "typer" },
|
| 407 |
]
|
| 408 |
+
sdist = { url = "https://files.pythonhosted.org/packages/f9/88/b6d782d2cd7ed602d2bae1a01e87a6347a37295ad450d86159cc7c252290/docling-2.25.1.tar.gz", hash = "sha256:ba2fce77659f4ccf1c8a696531ea9f17253215dbebfac6536012bbc6d1c29ce8", size = 112676 }
|
| 409 |
wheels = [
|
| 410 |
+
{ url = "https://files.pythonhosted.org/packages/2a/c1/6c58516672f0f60c432ae331391b6548e4fdcb7b6a6dcd7725605284dcf7/docling-2.25.1-py3-none-any.whl", hash = "sha256:92318591342fc50781134fc553c6c57b703ce43e8095a80d59ed02206d0f560c", size = 145677 },
|
| 411 |
]
|
| 412 |
|
| 413 |
[[package]]
|
|
|
|
| 439 |
|
| 440 |
[[package]]
|
| 441 |
name = "docling-ibm-models"
|
| 442 |
+
version = "3.4.1"
|
| 443 |
source = { registry = "https://pypi.org/simple" }
|
| 444 |
dependencies = [
|
| 445 |
+
{ name = "docling-core" },
|
| 446 |
{ name = "huggingface-hub" },
|
| 447 |
{ name = "jsonlines" },
|
| 448 |
{ name = "numpy" },
|
| 449 |
{ name = "opencv-python-headless" },
|
| 450 |
{ name = "pillow" },
|
| 451 |
+
{ name = "pydantic" },
|
| 452 |
{ name = "safetensors", extra = ["torch"] },
|
| 453 |
{ name = "torch", version = "2.6.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'x86_64' and sys_platform == 'darwin' and extra == 'extra-13-docling-serve-cpu') or (platform_machine == 'x86_64' and extra == 'extra-13-docling-serve-cpu' and extra == 'extra-13-docling-serve-cu124') or (sys_platform != 'darwin' and extra == 'extra-13-docling-serve-cpu' and extra == 'extra-13-docling-serve-cu124')" },
|
| 454 |
{ name = "torch", version = "2.6.0", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-docling-serve-cpu' and extra == 'extra-13-docling-serve-cu124') or (extra != 'extra-13-docling-serve-cpu' and extra != 'extra-13-docling-serve-cu124')" },
|
|
|
|
| 461 |
{ name = "tqdm" },
|
| 462 |
{ name = "transformers" },
|
| 463 |
]
|
| 464 |
+
sdist = { url = "https://files.pythonhosted.org/packages/eb/a5/88d5b7c970d5e10a06062fe9e9de3cde6acdefcc1f85854f689a82863c2a/docling_ibm_models-3.4.1.tar.gz", hash = "sha256:093b4dff2ea284a4953c3aa009e29945208b8d389b94fb14940a03a93f673e96", size = 69794 }
|
| 465 |
wheels = [
|
| 466 |
+
{ url = "https://files.pythonhosted.org/packages/af/8f/0f2b823fa09d06deacbdfc6d5d7809d462ddc508f43146960083d113c4c6/docling_ibm_models-3.4.1-py3-none-any.whl", hash = "sha256:c3582c99dddfa3f0eafcf80cf1267fd8efa39c4a74cc7a88f9dd49684fac2986", size = 80886 },
|
| 467 |
]
|
| 468 |
|
| 469 |
[[package]]
|
|
|
|
| 515 |
{ name = "python-multipart" },
|
| 516 |
{ name = "typer" },
|
| 517 |
{ name = "uvicorn", extra = ["standard"] },
|
| 518 |
+
{ name = "websockets" },
|
| 519 |
]
|
| 520 |
|
| 521 |
[package.optional-dependencies]
|
| 522 |
cpu = [
|
| 523 |
+
{ name = "torch", version = "2.6.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "platform_machine != 'x86_64' and sys_platform == 'darwin'" },
|
| 524 |
+
{ name = "torch", version = "2.6.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "platform_machine == 'x86_64' or sys_platform != 'darwin'" },
|
| 525 |
+
{ name = "torchvision", version = "0.21.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'x86_64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux')" },
|
| 526 |
+
{ name = "torchvision", version = "0.21.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'darwin') or (platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
| 527 |
]
|
| 528 |
cu124 = [
|
| 529 |
{ name = "torch", version = "2.6.0+cu124", source = { registry = "https://download.pytorch.org/whl/cu124" } },
|
|
|
|
| 553 |
|
| 554 |
[package.metadata]
|
| 555 |
requires-dist = [
|
| 556 |
+
{ name = "docling", specifier = "~=2.25.1" },
|
| 557 |
{ name = "fastapi", extras = ["standard"], specifier = "~=0.115" },
|
| 558 |
{ name = "gradio", marker = "extra == 'ui'", specifier = "~=5.9" },
|
| 559 |
{ name = "httpx", specifier = "~=0.28" },
|
|
|
|
| 569 |
{ name = "torchvision", marker = "extra == 'cu124'", specifier = ">=0.21.0", index = "https://download.pytorch.org/whl/cu124", conflict = { package = "docling-serve", extra = "cu124" } },
|
| 570 |
{ name = "typer", specifier = "~=0.12" },
|
| 571 |
{ name = "uvicorn", extras = ["standard"], specifier = ">=0.29.0,<1.0.0" },
|
| 572 |
+
{ name = "websockets", specifier = "~=14.0" },
|
| 573 |
]
|
| 574 |
provides-extras = ["ui", "tesserocr", "rapidocr", "cpu", "cu124"]
|
| 575 |
|