Michele Dolfi commited on
Commit
360d0c5
·
unverified ·
1 Parent(s): d60458e

chore: Remove deprecated type aliases and run as pre-commit (#79)

Browse files
docling_serve/app.py CHANGED
@@ -5,7 +5,7 @@ import tempfile
5
  from contextlib import asynccontextmanager
6
  from io import BytesIO
7
  from pathlib import Path
8
- from typing import Annotated, Any, Dict, List, Optional, Union
9
 
10
  from fastapi import (
11
  BackgroundTasks,
@@ -205,8 +205,8 @@ def create_app(): # noqa: C901
205
  def process_url(
206
  background_tasks: BackgroundTasks, conversion_request: ConvertDocumentsRequest
207
  ):
208
- sources: List[Union[str, DocumentStream]] = []
209
- headers: Optional[Dict[str, Any]] = None
210
  if isinstance(conversion_request, ConvertDocumentFileSourcesRequest):
211
  for file_source in conversion_request.file_sources:
212
  sources.append(file_source.to_document_stream())
@@ -242,7 +242,7 @@ def create_app(): # noqa: C901
242
  )
243
  async def process_file(
244
  background_tasks: BackgroundTasks,
245
- files: List[UploadFile],
246
  options: Annotated[
247
  ConvertDocumentsOptions, FormDepends(ConvertDocumentsOptions)
248
  ],
 
5
  from contextlib import asynccontextmanager
6
  from io import BytesIO
7
  from pathlib import Path
8
+ from typing import Annotated, Any, Optional, Union
9
 
10
  from fastapi import (
11
  BackgroundTasks,
 
205
  def process_url(
206
  background_tasks: BackgroundTasks, conversion_request: ConvertDocumentsRequest
207
  ):
208
+ sources: list[Union[str, DocumentStream]] = []
209
+ headers: Optional[dict[str, Any]] = None
210
  if isinstance(conversion_request, ConvertDocumentFileSourcesRequest):
211
  for file_source in conversion_request.file_sources:
212
  sources.append(file_source.to_document_stream())
 
242
  )
243
  async def process_file(
244
  background_tasks: BackgroundTasks,
245
+ files: list[UploadFile],
246
  options: Annotated[
247
  ConvertDocumentsOptions, FormDepends(ConvertDocumentsOptions)
248
  ],
docling_serve/datamodel/convert.py CHANGED
@@ -1,5 +1,5 @@
1
  # Define the input options for the API
2
- from typing import Annotated, List, Optional
3
 
4
  from pydantic import BaseModel, Field
5
 
@@ -10,7 +10,7 @@ from docling_core.types.doc import ImageRefMode
10
 
11
  class ConvertDocumentsOptions(BaseModel):
12
  from_formats: Annotated[
13
- List[InputFormat],
14
  Field(
15
  description=(
16
  "Input format(s) to convert from. String or list of strings. "
@@ -22,7 +22,7 @@ class ConvertDocumentsOptions(BaseModel):
22
  ] = list(InputFormat)
23
 
24
  to_formats: Annotated[
25
- List[OutputFormat],
26
  Field(
27
  description=(
28
  "Output format(s) to convert to. String or list of strings. "
@@ -83,7 +83,7 @@ class ConvertDocumentsOptions(BaseModel):
83
  ] = OcrEngine.EASYOCR
84
 
85
  ocr_lang: Annotated[
86
- Optional[List[str]],
87
  Field(
88
  description=(
89
  "List of languages used by the OCR engine. "
 
1
  # Define the input options for the API
2
+ from typing import Annotated, Optional
3
 
4
  from pydantic import BaseModel, Field
5
 
 
10
 
11
  class ConvertDocumentsOptions(BaseModel):
12
  from_formats: Annotated[
13
+ list[InputFormat],
14
  Field(
15
  description=(
16
  "Input format(s) to convert from. String or list of strings. "
 
22
  ] = list(InputFormat)
23
 
24
  to_formats: Annotated[
25
+ list[OutputFormat],
26
  Field(
27
  description=(
28
  "Output format(s) to convert to. String or list of strings. "
 
83
  ] = OcrEngine.EASYOCR
84
 
85
  ocr_lang: Annotated[
86
+ Optional[list[str]],
87
  Field(
88
  description=(
89
  "List of languages used by the OCR engine. "
docling_serve/datamodel/requests.py CHANGED
@@ -1,6 +1,6 @@
1
  import base64
2
  from io import BytesIO
3
- from typing import Annotated, Any, Dict, List, Union
4
 
5
  from pydantic import BaseModel, Field
6
 
@@ -22,7 +22,7 @@ class HttpSource(BaseModel):
22
  ),
23
  ]
24
  headers: Annotated[
25
- Dict[str, Any],
26
  Field(
27
  description="Additional headers used to fetch the urls, "
28
  "e.g. authorization, agent, etc"
@@ -50,11 +50,11 @@ class FileSource(BaseModel):
50
 
51
 
52
  class ConvertDocumentHttpSourcesRequest(DocumentsConvertBase):
53
- http_sources: List[HttpSource]
54
 
55
 
56
  class ConvertDocumentFileSourcesRequest(DocumentsConvertBase):
57
- file_sources: List[FileSource]
58
 
59
 
60
  ConvertDocumentsRequest = Union[
 
1
  import base64
2
  from io import BytesIO
3
+ from typing import Annotated, Any, Union
4
 
5
  from pydantic import BaseModel, Field
6
 
 
22
  ),
23
  ]
24
  headers: Annotated[
25
+ dict[str, Any],
26
  Field(
27
  description="Additional headers used to fetch the urls, "
28
  "e.g. authorization, agent, etc"
 
50
 
51
 
52
  class ConvertDocumentHttpSourcesRequest(DocumentsConvertBase):
53
+ http_sources: list[HttpSource]
54
 
55
 
56
  class ConvertDocumentFileSourcesRequest(DocumentsConvertBase):
57
+ file_sources: list[FileSource]
58
 
59
 
60
  ConvertDocumentsRequest = Union[
docling_serve/datamodel/responses.py CHANGED
@@ -1,5 +1,5 @@
1
  import enum
2
- from typing import Dict, List, Optional
3
 
4
  from pydantic import BaseModel
5
 
@@ -25,9 +25,9 @@ class DocumentResponse(BaseModel):
25
  class ConvertDocumentResponse(BaseModel):
26
  document: DocumentResponse
27
  status: ConversionStatus
28
- errors: List[ErrorItem] = []
29
  processing_time: float
30
- timings: Dict[str, ProfilingItem] = {}
31
 
32
 
33
  class ConvertDocumentErrorResponse(BaseModel):
 
1
  import enum
2
+ from typing import Optional
3
 
4
  from pydantic import BaseModel
5
 
 
25
  class ConvertDocumentResponse(BaseModel):
26
  document: DocumentResponse
27
  status: ConversionStatus
28
+ errors: list[ErrorItem] = []
29
  processing_time: float
30
+ timings: dict[str, ProfilingItem] = {}
31
 
32
 
33
  class ConvertDocumentErrorResponse(BaseModel):
docling_serve/docling_conversion.py CHANGED
@@ -1,8 +1,9 @@
1
  import hashlib
2
  import json
3
  import logging
 
4
  from pathlib import Path
5
- from typing import Any, Dict, Iterable, Iterator, Optional, Tuple, Type, Union
6
 
7
  from fastapi import HTTPException
8
 
@@ -33,7 +34,7 @@ _log = logging.getLogger(__name__)
33
 
34
 
35
  # Document converters will be preloaded and stored in a dictionary
36
- converters: Dict[bytes, DocumentConverter] = {}
37
 
38
 
39
  # Custom serializer for PdfFormatOption
@@ -69,7 +70,7 @@ def _serialize_pdf_format_option(pdf_format_option: PdfFormatOption) -> str:
69
  # Computes the PDF pipeline options and returns the PdfFormatOption and its hash
70
  def get_pdf_pipeline_opts( # noqa: C901
71
  request: ConvertDocumentsOptions,
72
- ) -> Tuple[PdfFormatOption, bytes]:
73
  if request.ocr_engine == OcrEngine.EASYOCR:
74
  try:
75
  import easyocr # noqa: F401
@@ -129,7 +130,7 @@ def get_pdf_pipeline_opts( # noqa: C901
129
  pipeline_options.images_scale = request.images_scale
130
 
131
  if request.pdf_backend == PdfBackend.DLPARSE_V1:
132
- backend: Type[PdfDocumentBackend] = DoclingParseDocumentBackend
133
  elif request.pdf_backend == PdfBackend.DLPARSE_V2:
134
  backend = DoclingParseV2DocumentBackend
135
  elif request.pdf_backend == PdfBackend.PYPDFIUM2:
@@ -177,12 +178,12 @@ def get_pdf_pipeline_opts( # noqa: C901
177
  def convert_documents(
178
  sources: Iterable[Union[Path, str, DocumentStream]],
179
  options: ConvertDocumentsOptions,
180
- headers: Optional[Dict[str, Any]] = None,
181
  ):
182
  pdf_format_option, options_hash = get_pdf_pipeline_opts(options)
183
 
184
  if options_hash not in converters:
185
- format_options: Dict[InputFormat, FormatOption] = {
186
  InputFormat.PDF: pdf_format_option,
187
  InputFormat.IMAGE: pdf_format_option,
188
  }
 
1
  import hashlib
2
  import json
3
  import logging
4
+ from collections.abc import Iterable, Iterator
5
  from pathlib import Path
6
+ from typing import Any, Optional, Union
7
 
8
  from fastapi import HTTPException
9
 
 
34
 
35
 
36
  # Document converters will be preloaded and stored in a dictionary
37
+ converters: dict[bytes, DocumentConverter] = {}
38
 
39
 
40
  # Custom serializer for PdfFormatOption
 
70
  # Computes the PDF pipeline options and returns the PdfFormatOption and its hash
71
  def get_pdf_pipeline_opts( # noqa: C901
72
  request: ConvertDocumentsOptions,
73
+ ) -> tuple[PdfFormatOption, bytes]:
74
  if request.ocr_engine == OcrEngine.EASYOCR:
75
  try:
76
  import easyocr # noqa: F401
 
130
  pipeline_options.images_scale = request.images_scale
131
 
132
  if request.pdf_backend == PdfBackend.DLPARSE_V1:
133
+ backend: type[PdfDocumentBackend] = DoclingParseDocumentBackend
134
  elif request.pdf_backend == PdfBackend.DLPARSE_V2:
135
  backend = DoclingParseV2DocumentBackend
136
  elif request.pdf_backend == PdfBackend.PYPDFIUM2:
 
178
  def convert_documents(
179
  sources: Iterable[Union[Path, str, DocumentStream]],
180
  options: ConvertDocumentsOptions,
181
+ headers: Optional[dict[str, Any]] = None,
182
  ):
183
  pdf_format_option, options_hash = get_pdf_pipeline_opts(options)
184
 
185
  if options_hash not in converters:
186
+ format_options: dict[InputFormat, FormatOption] = {
187
  InputFormat.PDF: pdf_format_option,
188
  InputFormat.IMAGE: pdf_format_option,
189
  }
docling_serve/engines/async_local/orchestrator.py CHANGED
@@ -1,7 +1,7 @@
1
  import asyncio
2
  import logging
3
  import uuid
4
- from typing import Dict, List, Optional, Set
5
 
6
  from fastapi import WebSocket
7
 
@@ -30,9 +30,9 @@ class TaskNotFoundError(OrchestratorError):
30
  class AsyncLocalOrchestrator(BaseOrchestrator):
31
  def __init__(self):
32
  self.task_queue = asyncio.Queue()
33
- self.tasks: Dict[str, Task] = {}
34
- self.queue_list: List[str] = []
35
- self.task_subscribers: Dict[str, Set[WebSocket]] = {}
36
 
37
  async def enqueue(self, request: ConvertDocumentsRequest) -> Task:
38
  task_id = str(uuid.uuid4())
 
1
  import asyncio
2
  import logging
3
  import uuid
4
+ from typing import Optional
5
 
6
  from fastapi import WebSocket
7
 
 
30
  class AsyncLocalOrchestrator(BaseOrchestrator):
31
  def __init__(self):
32
  self.task_queue = asyncio.Queue()
33
+ self.tasks: dict[str, Task] = {}
34
+ self.queue_list: list[str] = []
35
+ self.task_subscribers: dict[str, set[WebSocket]] = {}
36
 
37
  async def enqueue(self, request: ConvertDocumentsRequest) -> Task:
38
  task_id = str(uuid.uuid4())
docling_serve/engines/async_local/worker.py CHANGED
@@ -1,7 +1,7 @@
1
  import asyncio
2
  import logging
3
  import time
4
- from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
5
 
6
  from fastapi import BackgroundTasks
7
 
@@ -50,8 +50,8 @@ class AsyncLocalWorker:
50
  # Define a callback function to send progress updates to the client.
51
  # TODO: send partial updates, e.g. when a document in the batch is done
52
  def run_conversion():
53
- sources: List[Union[str, DocumentStream]] = []
54
- headers: Optional[Dict[str, Any]] = None
55
  if isinstance(task.request, ConvertDocumentFileSourcesRequest):
56
  for file_source in task.request.file_sources:
57
  sources.append(file_source.to_document_stream())
 
1
  import asyncio
2
  import logging
3
  import time
4
+ from typing import TYPE_CHECKING, Any, Optional, Union
5
 
6
  from fastapi import BackgroundTasks
7
 
 
50
  # Define a callback function to send progress updates to the client.
51
  # TODO: send partial updates, e.g. when a document in the batch is done
52
  def run_conversion():
53
+ sources: list[Union[str, DocumentStream]] = []
54
+ headers: Optional[dict[str, Any]] = None
55
  if isinstance(task.request, ConvertDocumentFileSourcesRequest):
56
  for file_source in task.request.file_sources:
57
  sources.append(file_source.to_document_stream())
docling_serve/helper_functions.py CHANGED
@@ -1,6 +1,6 @@
1
  import inspect
2
  import re
3
- from typing import List, Type, Union
4
 
5
  from fastapi import Depends, Form
6
  from pydantic import BaseModel
@@ -8,7 +8,7 @@ from pydantic import BaseModel
8
 
9
  # Adapted from
10
  # https://github.com/fastapi/fastapi/discussions/8971#discussioncomment-7892972
11
- def FormDepends(cls: Type[BaseModel]):
12
  new_parameters = []
13
 
14
  for field_name, model_field in cls.model_fields.items():
@@ -34,8 +34,8 @@ def FormDepends(cls: Type[BaseModel]):
34
  return Depends(as_form_func)
35
 
36
 
37
- def _to_list_of_strings(input_value: Union[str, List[str]]) -> List[str]:
38
- def split_and_strip(value: str) -> List[str]:
39
  if re.search(r"[;,]", value):
40
  return [item.strip() for item in re.split(r"[;,]", value)]
41
  else:
 
1
  import inspect
2
  import re
3
+ from typing import Union
4
 
5
  from fastapi import Depends, Form
6
  from pydantic import BaseModel
 
8
 
9
  # Adapted from
10
  # https://github.com/fastapi/fastapi/discussions/8971#discussioncomment-7892972
11
+ def FormDepends(cls: type[BaseModel]):
12
  new_parameters = []
13
 
14
  for field_name, model_field in cls.model_fields.items():
 
34
  return Depends(as_form_func)
35
 
36
 
37
+ def _to_list_of_strings(input_value: Union[str, list[str]]) -> list[str]:
38
+ def split_and_strip(value: str) -> list[str]:
39
  if re.search(r"[;,]", value):
40
  return [item.strip() for item in re.split(r"[;,]", value)]
41
  else:
docling_serve/response_preparation.py CHANGED
@@ -3,8 +3,9 @@ import os
3
  import shutil
4
  import tempfile
5
  import time
 
6
  from pathlib import Path
7
- from typing import Iterable, Union
8
 
9
  from fastapi import BackgroundTasks, HTTPException
10
  from fastapi.responses import FileResponse
 
3
  import shutil
4
  import tempfile
5
  import time
6
+ from collections.abc import Iterable
7
  from pathlib import Path
8
+ from typing import Union
9
 
10
  from fastapi import BackgroundTasks, HTTPException
11
  from fastapi.responses import FileResponse
pyproject.toml CHANGED
@@ -145,7 +145,8 @@ select = [
145
  "S307", # eval
146
  # "T20", # (disallow print statements) keep debugging statements out of the codebase
147
  "W", # pycodestyle warnings
148
- "ASYNC" # async
 
149
  ]
150
 
151
  ignore = [
@@ -154,6 +155,7 @@ ignore = [
154
  "F811", # "redefinition of the same function"
155
  "PL", # Pylint
156
  "RUF012", # Mutable Class Attributes
 
157
  ]
158
 
159
  #extend-select = []
 
145
  "S307", # eval
146
  # "T20", # (disallow print statements) keep debugging statements out of the codebase
147
  "W", # pycodestyle warnings
148
+ "ASYNC", # async
149
+ "UP", # pyupgrade
150
  ]
151
 
152
  ignore = [
 
155
  "F811", # "redefinition of the same function"
156
  "PL", # Pylint
157
  "RUF012", # Mutable Class Attributes
158
+ "UP007", # Option and Union
159
  ]
160
 
161
  #extend-select = []