Spaces:
Configuration error
Configuration error
Tiago Santana
commited on
feat: expose options for new features (#92)
Browse filesSigned-off-by: Tiago Santana <[email protected]>
docling_serve/datamodel/convert.py
CHANGED
|
@@ -172,3 +172,47 @@ class ConvertDocumentsOptions(BaseModel):
|
|
| 172 |
examples=[2.0],
|
| 173 |
),
|
| 174 |
] = 2.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
examples=[2.0],
|
| 173 |
),
|
| 174 |
] = 2.0
|
| 175 |
+
|
| 176 |
+
do_code_enrichment: Annotated[
|
| 177 |
+
bool,
|
| 178 |
+
Field(
|
| 179 |
+
description=(
|
| 180 |
+
"If enabled, perform OCR code enrichment. "
|
| 181 |
+
"Boolean. Optional, defaults to false."
|
| 182 |
+
),
|
| 183 |
+
examples=[False],
|
| 184 |
+
),
|
| 185 |
+
] = False
|
| 186 |
+
|
| 187 |
+
do_formula_enrichment: Annotated[
|
| 188 |
+
bool,
|
| 189 |
+
Field(
|
| 190 |
+
description=(
|
| 191 |
+
"If enabled, perform formula OCR, return Latex code. "
|
| 192 |
+
"Boolean. Optional, defaults to false."
|
| 193 |
+
),
|
| 194 |
+
examples=[False],
|
| 195 |
+
),
|
| 196 |
+
] = False
|
| 197 |
+
|
| 198 |
+
do_picture_classification: Annotated[
|
| 199 |
+
bool,
|
| 200 |
+
Field(
|
| 201 |
+
description=(
|
| 202 |
+
"If enabled, classify pictures in documents. "
|
| 203 |
+
"Boolean. Optional, defaults to false."
|
| 204 |
+
),
|
| 205 |
+
examples=[False],
|
| 206 |
+
),
|
| 207 |
+
] = False
|
| 208 |
+
|
| 209 |
+
do_picture_description: Annotated[
|
| 210 |
+
bool,
|
| 211 |
+
Field(
|
| 212 |
+
description=(
|
| 213 |
+
"If enabled, describe pictures in documents. "
|
| 214 |
+
"Boolean. Optional, defaults to false."
|
| 215 |
+
),
|
| 216 |
+
examples=[False],
|
| 217 |
+
),
|
| 218 |
+
] = False
|
docling_serve/docling_conversion.py
CHANGED
|
@@ -120,6 +120,10 @@ def get_pdf_pipeline_opts( # noqa: C901
|
|
| 120 |
do_ocr=request.do_ocr,
|
| 121 |
ocr_options=ocr_options,
|
| 122 |
do_table_structure=request.do_table_structure,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
)
|
| 124 |
pipeline_options.table_structure_options.do_cell_matching = True # do_cell_matching
|
| 125 |
pipeline_options.table_structure_options.mode = TableFormerMode(request.table_mode)
|
|
|
|
| 120 |
do_ocr=request.do_ocr,
|
| 121 |
ocr_options=ocr_options,
|
| 122 |
do_table_structure=request.do_table_structure,
|
| 123 |
+
do_code_enrichment=request.do_code_enrichment,
|
| 124 |
+
do_formula_enrichment=request.do_formula_enrichment,
|
| 125 |
+
do_picture_classification=request.do_picture_classification,
|
| 126 |
+
do_picture_description=request.do_picture_description,
|
| 127 |
)
|
| 128 |
pipeline_options.table_structure_options.do_cell_matching = True # do_cell_matching
|
| 129 |
pipeline_options.table_structure_options.mode = TableFormerMode(request.table_mode)
|
docling_serve/gradio_ui.py
CHANGED
|
@@ -185,6 +185,10 @@ def process_url(
|
|
| 185 |
table_mode,
|
| 186 |
abort_on_error,
|
| 187 |
return_as_file,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
):
|
| 189 |
parameters = {
|
| 190 |
"http_sources": [{"url": source} for source in input_sources.split(",")],
|
|
@@ -199,6 +203,10 @@ def process_url(
|
|
| 199 |
"table_mode": table_mode,
|
| 200 |
"abort_on_error": abort_on_error,
|
| 201 |
"return_as_file": return_as_file,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
},
|
| 203 |
}
|
| 204 |
if (
|
|
@@ -237,6 +245,10 @@ def process_file(
|
|
| 237 |
table_mode,
|
| 238 |
abort_on_error,
|
| 239 |
return_as_file,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
):
|
| 241 |
if not files or len(files) == 0 or files[0] == "":
|
| 242 |
logger.error("No files provided.")
|
|
@@ -254,6 +266,10 @@ def process_file(
|
|
| 254 |
"table_mode": table_mode,
|
| 255 |
"abort_on_error": str(abort_on_error).lower(),
|
| 256 |
"return_as_file": str(return_as_file).lower(),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
}
|
| 258 |
|
| 259 |
try:
|
|
@@ -478,6 +494,21 @@ with gr.Blocks(
|
|
| 478 |
with gr.Column(scale=1):
|
| 479 |
abort_on_error = gr.Checkbox(label="Abort on Error", value=False)
|
| 480 |
return_as_file = gr.Checkbox(label="Return as File", value=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 481 |
|
| 482 |
# Document output
|
| 483 |
with gr.Row(visible=False) as content_output:
|
|
@@ -561,6 +592,10 @@ with gr.Blocks(
|
|
| 561 |
table_mode,
|
| 562 |
abort_on_error,
|
| 563 |
return_as_file,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 564 |
],
|
| 565 |
outputs=[
|
| 566 |
output_markdown,
|
|
@@ -630,6 +665,10 @@ with gr.Blocks(
|
|
| 630 |
table_mode,
|
| 631 |
abort_on_error,
|
| 632 |
return_as_file,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 633 |
],
|
| 634 |
outputs=[
|
| 635 |
output_markdown,
|
|
|
|
| 185 |
table_mode,
|
| 186 |
abort_on_error,
|
| 187 |
return_as_file,
|
| 188 |
+
do_code_enrichment,
|
| 189 |
+
do_formula_enrichment,
|
| 190 |
+
do_picture_classification,
|
| 191 |
+
do_picture_description,
|
| 192 |
):
|
| 193 |
parameters = {
|
| 194 |
"http_sources": [{"url": source} for source in input_sources.split(",")],
|
|
|
|
| 203 |
"table_mode": table_mode,
|
| 204 |
"abort_on_error": abort_on_error,
|
| 205 |
"return_as_file": return_as_file,
|
| 206 |
+
"do_code_enrichment": do_code_enrichment,
|
| 207 |
+
"do_formula_enrichment": do_formula_enrichment,
|
| 208 |
+
"do_picture_classification": do_picture_classification,
|
| 209 |
+
"do_picture_description": do_picture_description,
|
| 210 |
},
|
| 211 |
}
|
| 212 |
if (
|
|
|
|
| 245 |
table_mode,
|
| 246 |
abort_on_error,
|
| 247 |
return_as_file,
|
| 248 |
+
do_code_enrichment,
|
| 249 |
+
do_formula_enrichment,
|
| 250 |
+
do_picture_classification,
|
| 251 |
+
do_picture_description,
|
| 252 |
):
|
| 253 |
if not files or len(files) == 0 or files[0] == "":
|
| 254 |
logger.error("No files provided.")
|
|
|
|
| 266 |
"table_mode": table_mode,
|
| 267 |
"abort_on_error": str(abort_on_error).lower(),
|
| 268 |
"return_as_file": str(return_as_file).lower(),
|
| 269 |
+
"do_code_enrichment": str(do_code_enrichment).lower(),
|
| 270 |
+
"do_formula_enrichment": str(do_formula_enrichment).lower(),
|
| 271 |
+
"do_picture_classification": str(do_picture_classification).lower(),
|
| 272 |
+
"do_picture_description": str(do_picture_description).lower(),
|
| 273 |
}
|
| 274 |
|
| 275 |
try:
|
|
|
|
| 494 |
with gr.Column(scale=1):
|
| 495 |
abort_on_error = gr.Checkbox(label="Abort on Error", value=False)
|
| 496 |
return_as_file = gr.Checkbox(label="Return as File", value=False)
|
| 497 |
+
with gr.Row():
|
| 498 |
+
with gr.Column():
|
| 499 |
+
do_code_enrichment = gr.Checkbox(
|
| 500 |
+
label="Enable code enrichment", value=False
|
| 501 |
+
)
|
| 502 |
+
do_formula_enrichment = gr.Checkbox(
|
| 503 |
+
label="Enable formula enrichment", value=False
|
| 504 |
+
)
|
| 505 |
+
with gr.Column():
|
| 506 |
+
do_picture_classification = gr.Checkbox(
|
| 507 |
+
label="Enable picture classification", value=False
|
| 508 |
+
)
|
| 509 |
+
do_picture_description = gr.Checkbox(
|
| 510 |
+
label="Enable picture description", value=False
|
| 511 |
+
)
|
| 512 |
|
| 513 |
# Document output
|
| 514 |
with gr.Row(visible=False) as content_output:
|
|
|
|
| 592 |
table_mode,
|
| 593 |
abort_on_error,
|
| 594 |
return_as_file,
|
| 595 |
+
do_code_enrichment,
|
| 596 |
+
do_formula_enrichment,
|
| 597 |
+
do_picture_classification,
|
| 598 |
+
do_picture_description,
|
| 599 |
],
|
| 600 |
outputs=[
|
| 601 |
output_markdown,
|
|
|
|
| 665 |
table_mode,
|
| 666 |
abort_on_error,
|
| 667 |
return_as_file,
|
| 668 |
+
do_code_enrichment,
|
| 669 |
+
do_formula_enrichment,
|
| 670 |
+
do_picture_classification,
|
| 671 |
+
do_picture_description,
|
| 672 |
],
|
| 673 |
outputs=[
|
| 674 |
output_markdown,
|