Eugene commited on
Commit
359c3b5
·
unverified ·
1 Parent(s): 0926049

refactor: Use bytes as options key (#58)

Browse files

Signed-off-by: Eugene <[email protected]>

docling_serve/docling_conversion.py CHANGED
@@ -265,7 +265,7 @@ ConvertDocumentsRequest = Union[
265
 
266
 
267
  # Document converters will be preloaded and stored in a dictionary
268
- converters: Dict[str, DocumentConverter] = {}
269
 
270
 
271
  # Custom serializer for PdfFormatOption
@@ -301,7 +301,7 @@ def _serialize_pdf_format_option(pdf_format_option: PdfFormatOption) -> str:
301
  # Computes the PDF pipeline options and returns the PdfFormatOption and its hash
302
  def get_pdf_pipeline_opts( # noqa: C901
303
  request: ConvertDocumentsOptions,
304
- ) -> Tuple[PdfFormatOption, str]:
305
  if request.ocr_engine == OcrEngine.EASYOCR:
306
  try:
307
  import easyocr # noqa: F401
@@ -401,7 +401,7 @@ def get_pdf_pipeline_opts( # noqa: C901
401
 
402
  serialized_data = _serialize_pdf_format_option(pdf_format_option)
403
 
404
- options_hash = hashlib.sha1(serialized_data.encode()).hexdigest()
405
 
406
  return pdf_format_option, options_hash
407
 
 
265
 
266
 
267
  # Document converters will be preloaded and stored in a dictionary
268
+ converters: Dict[bytes, DocumentConverter] = {}
269
 
270
 
271
  # Custom serializer for PdfFormatOption
 
301
  # Computes the PDF pipeline options and returns the PdfFormatOption and its hash
302
  def get_pdf_pipeline_opts( # noqa: C901
303
  request: ConvertDocumentsOptions,
304
+ ) -> Tuple[PdfFormatOption, bytes]:
305
  if request.ocr_engine == OcrEngine.EASYOCR:
306
  try:
307
  import easyocr # noqa: F401
 
401
 
402
  serialized_data = _serialize_pdf_format_option(pdf_format_option)
403
 
404
+ options_hash = hashlib.sha1(serialized_data.encode()).digest()
405
 
406
  return pdf_format_option, options_hash
407