Spaces:

CerealDev
/

Docling-UI

Configuration error

App Files Files Community

Tiago Santana commited on Mar 13

Commit

405b27f

unverified ·

1 Parent(s): e32094c

feat: expose options for new features (#92)

Browse files

Signed-off-by: Tiago Santana <[email protected]>

Files changed (3) hide show

docling_serve/datamodel/convert.py +44 -0
docling_serve/docling_conversion.py +4 -0
docling_serve/gradio_ui.py +39 -0

docling_serve/datamodel/convert.py CHANGED Viewed

@@ -172,3 +172,47 @@ class ConvertDocumentsOptions(BaseModel):
             examples=[2.0],
         ),
     ] = 2.0

             examples=[2.0],
         ),
     ] = 2.0
+    do_code_enrichment: Annotated[
+        bool,
+        Field(
+            description=(
+                "If enabled, perform OCR code enrichment. "
+                "Boolean. Optional, defaults to false."
+            ),
+            examples=[False],
+        ),
+    ] = False
+    do_formula_enrichment: Annotated[
+        bool,
+        Field(
+            description=(
+                "If enabled, perform formula OCR, return Latex code. "
+                "Boolean. Optional, defaults to false."
+            ),
+            examples=[False],
+        ),
+    ] = False
+    do_picture_classification: Annotated[
+        bool,
+        Field(
+            description=(
+                "If enabled, classify pictures in documents. "
+                "Boolean. Optional, defaults to false."
+            ),
+            examples=[False],
+        ),
+    ] = False
+    do_picture_description: Annotated[
+        bool,
+        Field(
+            description=(
+                "If enabled, describe pictures in documents. "
+                "Boolean. Optional, defaults to false."
+            ),
+            examples=[False],
+        ),
+    ] = False

docling_serve/docling_conversion.py CHANGED Viewed

@@ -120,6 +120,10 @@ def get_pdf_pipeline_opts(  # noqa: C901
         do_ocr=request.do_ocr,
         ocr_options=ocr_options,
         do_table_structure=request.do_table_structure,
     )
     pipeline_options.table_structure_options.do_cell_matching = True  # do_cell_matching
     pipeline_options.table_structure_options.mode = TableFormerMode(request.table_mode)

         do_ocr=request.do_ocr,
         ocr_options=ocr_options,
         do_table_structure=request.do_table_structure,
+        do_code_enrichment=request.do_code_enrichment,
+        do_formula_enrichment=request.do_formula_enrichment,
+        do_picture_classification=request.do_picture_classification,
+        do_picture_description=request.do_picture_description,
     )
     pipeline_options.table_structure_options.do_cell_matching = True  # do_cell_matching
     pipeline_options.table_structure_options.mode = TableFormerMode(request.table_mode)

docling_serve/gradio_ui.py CHANGED Viewed

@@ -185,6 +185,10 @@ def process_url(
     table_mode,
     abort_on_error,
     return_as_file,
 ):
     parameters = {
         "http_sources": [{"url": source} for source in input_sources.split(",")],
@@ -199,6 +203,10 @@ def process_url(
             "table_mode": table_mode,
             "abort_on_error": abort_on_error,
             "return_as_file": return_as_file,
         },
     }
     if (
@@ -237,6 +245,10 @@ def process_file(
     table_mode,
     abort_on_error,
     return_as_file,
 ):
     if not files or len(files) == 0 or files[0] == "":
         logger.error("No files provided.")
@@ -254,6 +266,10 @@ def process_file(
         "table_mode": table_mode,
         "abort_on_error": str(abort_on_error).lower(),
         "return_as_file": str(return_as_file).lower(),
     }
     try:
@@ -478,6 +494,21 @@ with gr.Blocks(
             with gr.Column(scale=1):
                 abort_on_error = gr.Checkbox(label="Abort on Error", value=False)
                 return_as_file = gr.Checkbox(label="Return as File", value=False)
     # Document output
     with gr.Row(visible=False) as content_output:
@@ -561,6 +592,10 @@ with gr.Blocks(
             table_mode,
             abort_on_error,
             return_as_file,
         ],
         outputs=[
             output_markdown,
@@ -630,6 +665,10 @@ with gr.Blocks(
             table_mode,
             abort_on_error,
             return_as_file,
         ],
         outputs=[
             output_markdown,

     table_mode,
     abort_on_error,
     return_as_file,
+    do_code_enrichment,
+    do_formula_enrichment,
+    do_picture_classification,
+    do_picture_description,
 ):
     parameters = {
         "http_sources": [{"url": source} for source in input_sources.split(",")],
             "table_mode": table_mode,
             "abort_on_error": abort_on_error,
             "return_as_file": return_as_file,
+            "do_code_enrichment": do_code_enrichment,
+            "do_formula_enrichment": do_formula_enrichment,
+            "do_picture_classification": do_picture_classification,
+            "do_picture_description": do_picture_description,
         },
     }
     if (
     table_mode,
     abort_on_error,
     return_as_file,
+    do_code_enrichment,
+    do_formula_enrichment,
+    do_picture_classification,
+    do_picture_description,
 ):
     if not files or len(files) == 0 or files[0] == "":
         logger.error("No files provided.")
         "table_mode": table_mode,
         "abort_on_error": str(abort_on_error).lower(),
         "return_as_file": str(return_as_file).lower(),
+        "do_code_enrichment": str(do_code_enrichment).lower(),
+        "do_formula_enrichment": str(do_formula_enrichment).lower(),
+        "do_picture_classification": str(do_picture_classification).lower(),
+        "do_picture_description": str(do_picture_description).lower(),
     }
     try:
             with gr.Column(scale=1):
                 abort_on_error = gr.Checkbox(label="Abort on Error", value=False)
                 return_as_file = gr.Checkbox(label="Return as File", value=False)
+        with gr.Row():
+            with gr.Column():
+                do_code_enrichment = gr.Checkbox(
+                    label="Enable code enrichment", value=False
+                )
+                do_formula_enrichment = gr.Checkbox(
+                    label="Enable formula enrichment", value=False
+                )
+            with gr.Column():
+                do_picture_classification = gr.Checkbox(
+                    label="Enable picture classification", value=False
+                )
+                do_picture_description = gr.Checkbox(
+                    label="Enable picture description", value=False
+                )
     # Document output
     with gr.Row(visible=False) as content_output:
             table_mode,
             abort_on_error,
             return_as_file,
+            do_code_enrichment,
+            do_formula_enrichment,
+            do_picture_classification,
+            do_picture_description,
         ],
         outputs=[
             output_markdown,
             table_mode,
             abort_on_error,
             return_as_file,
+            do_code_enrichment,
+            do_formula_enrichment,
+            do_picture_classification,
+            do_picture_description,
         ],
         outputs=[
             output_markdown,