Spaces:
Configuration error
Configuration error
Tiago Santana
commited on
feat: expose options for new features (#92)
Browse filesSigned-off-by: Tiago Santana <[email protected]>
docling_serve/datamodel/convert.py
CHANGED
@@ -172,3 +172,47 @@ class ConvertDocumentsOptions(BaseModel):
|
|
172 |
examples=[2.0],
|
173 |
),
|
174 |
] = 2.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
examples=[2.0],
|
173 |
),
|
174 |
] = 2.0
|
175 |
+
|
176 |
+
do_code_enrichment: Annotated[
|
177 |
+
bool,
|
178 |
+
Field(
|
179 |
+
description=(
|
180 |
+
"If enabled, perform OCR code enrichment. "
|
181 |
+
"Boolean. Optional, defaults to false."
|
182 |
+
),
|
183 |
+
examples=[False],
|
184 |
+
),
|
185 |
+
] = False
|
186 |
+
|
187 |
+
do_formula_enrichment: Annotated[
|
188 |
+
bool,
|
189 |
+
Field(
|
190 |
+
description=(
|
191 |
+
"If enabled, perform formula OCR, return Latex code. "
|
192 |
+
"Boolean. Optional, defaults to false."
|
193 |
+
),
|
194 |
+
examples=[False],
|
195 |
+
),
|
196 |
+
] = False
|
197 |
+
|
198 |
+
do_picture_classification: Annotated[
|
199 |
+
bool,
|
200 |
+
Field(
|
201 |
+
description=(
|
202 |
+
"If enabled, classify pictures in documents. "
|
203 |
+
"Boolean. Optional, defaults to false."
|
204 |
+
),
|
205 |
+
examples=[False],
|
206 |
+
),
|
207 |
+
] = False
|
208 |
+
|
209 |
+
do_picture_description: Annotated[
|
210 |
+
bool,
|
211 |
+
Field(
|
212 |
+
description=(
|
213 |
+
"If enabled, describe pictures in documents. "
|
214 |
+
"Boolean. Optional, defaults to false."
|
215 |
+
),
|
216 |
+
examples=[False],
|
217 |
+
),
|
218 |
+
] = False
|
docling_serve/docling_conversion.py
CHANGED
@@ -120,6 +120,10 @@ def get_pdf_pipeline_opts( # noqa: C901
|
|
120 |
do_ocr=request.do_ocr,
|
121 |
ocr_options=ocr_options,
|
122 |
do_table_structure=request.do_table_structure,
|
|
|
|
|
|
|
|
|
123 |
)
|
124 |
pipeline_options.table_structure_options.do_cell_matching = True # do_cell_matching
|
125 |
pipeline_options.table_structure_options.mode = TableFormerMode(request.table_mode)
|
|
|
120 |
do_ocr=request.do_ocr,
|
121 |
ocr_options=ocr_options,
|
122 |
do_table_structure=request.do_table_structure,
|
123 |
+
do_code_enrichment=request.do_code_enrichment,
|
124 |
+
do_formula_enrichment=request.do_formula_enrichment,
|
125 |
+
do_picture_classification=request.do_picture_classification,
|
126 |
+
do_picture_description=request.do_picture_description,
|
127 |
)
|
128 |
pipeline_options.table_structure_options.do_cell_matching = True # do_cell_matching
|
129 |
pipeline_options.table_structure_options.mode = TableFormerMode(request.table_mode)
|
docling_serve/gradio_ui.py
CHANGED
@@ -185,6 +185,10 @@ def process_url(
|
|
185 |
table_mode,
|
186 |
abort_on_error,
|
187 |
return_as_file,
|
|
|
|
|
|
|
|
|
188 |
):
|
189 |
parameters = {
|
190 |
"http_sources": [{"url": source} for source in input_sources.split(",")],
|
@@ -199,6 +203,10 @@ def process_url(
|
|
199 |
"table_mode": table_mode,
|
200 |
"abort_on_error": abort_on_error,
|
201 |
"return_as_file": return_as_file,
|
|
|
|
|
|
|
|
|
202 |
},
|
203 |
}
|
204 |
if (
|
@@ -237,6 +245,10 @@ def process_file(
|
|
237 |
table_mode,
|
238 |
abort_on_error,
|
239 |
return_as_file,
|
|
|
|
|
|
|
|
|
240 |
):
|
241 |
if not files or len(files) == 0 or files[0] == "":
|
242 |
logger.error("No files provided.")
|
@@ -254,6 +266,10 @@ def process_file(
|
|
254 |
"table_mode": table_mode,
|
255 |
"abort_on_error": str(abort_on_error).lower(),
|
256 |
"return_as_file": str(return_as_file).lower(),
|
|
|
|
|
|
|
|
|
257 |
}
|
258 |
|
259 |
try:
|
@@ -478,6 +494,21 @@ with gr.Blocks(
|
|
478 |
with gr.Column(scale=1):
|
479 |
abort_on_error = gr.Checkbox(label="Abort on Error", value=False)
|
480 |
return_as_file = gr.Checkbox(label="Return as File", value=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
481 |
|
482 |
# Document output
|
483 |
with gr.Row(visible=False) as content_output:
|
@@ -561,6 +592,10 @@ with gr.Blocks(
|
|
561 |
table_mode,
|
562 |
abort_on_error,
|
563 |
return_as_file,
|
|
|
|
|
|
|
|
|
564 |
],
|
565 |
outputs=[
|
566 |
output_markdown,
|
@@ -630,6 +665,10 @@ with gr.Blocks(
|
|
630 |
table_mode,
|
631 |
abort_on_error,
|
632 |
return_as_file,
|
|
|
|
|
|
|
|
|
633 |
],
|
634 |
outputs=[
|
635 |
output_markdown,
|
|
|
185 |
table_mode,
|
186 |
abort_on_error,
|
187 |
return_as_file,
|
188 |
+
do_code_enrichment,
|
189 |
+
do_formula_enrichment,
|
190 |
+
do_picture_classification,
|
191 |
+
do_picture_description,
|
192 |
):
|
193 |
parameters = {
|
194 |
"http_sources": [{"url": source} for source in input_sources.split(",")],
|
|
|
203 |
"table_mode": table_mode,
|
204 |
"abort_on_error": abort_on_error,
|
205 |
"return_as_file": return_as_file,
|
206 |
+
"do_code_enrichment": do_code_enrichment,
|
207 |
+
"do_formula_enrichment": do_formula_enrichment,
|
208 |
+
"do_picture_classification": do_picture_classification,
|
209 |
+
"do_picture_description": do_picture_description,
|
210 |
},
|
211 |
}
|
212 |
if (
|
|
|
245 |
table_mode,
|
246 |
abort_on_error,
|
247 |
return_as_file,
|
248 |
+
do_code_enrichment,
|
249 |
+
do_formula_enrichment,
|
250 |
+
do_picture_classification,
|
251 |
+
do_picture_description,
|
252 |
):
|
253 |
if not files or len(files) == 0 or files[0] == "":
|
254 |
logger.error("No files provided.")
|
|
|
266 |
"table_mode": table_mode,
|
267 |
"abort_on_error": str(abort_on_error).lower(),
|
268 |
"return_as_file": str(return_as_file).lower(),
|
269 |
+
"do_code_enrichment": str(do_code_enrichment).lower(),
|
270 |
+
"do_formula_enrichment": str(do_formula_enrichment).lower(),
|
271 |
+
"do_picture_classification": str(do_picture_classification).lower(),
|
272 |
+
"do_picture_description": str(do_picture_description).lower(),
|
273 |
}
|
274 |
|
275 |
try:
|
|
|
494 |
with gr.Column(scale=1):
|
495 |
abort_on_error = gr.Checkbox(label="Abort on Error", value=False)
|
496 |
return_as_file = gr.Checkbox(label="Return as File", value=False)
|
497 |
+
with gr.Row():
|
498 |
+
with gr.Column():
|
499 |
+
do_code_enrichment = gr.Checkbox(
|
500 |
+
label="Enable code enrichment", value=False
|
501 |
+
)
|
502 |
+
do_formula_enrichment = gr.Checkbox(
|
503 |
+
label="Enable formula enrichment", value=False
|
504 |
+
)
|
505 |
+
with gr.Column():
|
506 |
+
do_picture_classification = gr.Checkbox(
|
507 |
+
label="Enable picture classification", value=False
|
508 |
+
)
|
509 |
+
do_picture_description = gr.Checkbox(
|
510 |
+
label="Enable picture description", value=False
|
511 |
+
)
|
512 |
|
513 |
# Document output
|
514 |
with gr.Row(visible=False) as content_output:
|
|
|
592 |
table_mode,
|
593 |
abort_on_error,
|
594 |
return_as_file,
|
595 |
+
do_code_enrichment,
|
596 |
+
do_formula_enrichment,
|
597 |
+
do_picture_classification,
|
598 |
+
do_picture_description,
|
599 |
],
|
600 |
outputs=[
|
601 |
output_markdown,
|
|
|
665 |
table_mode,
|
666 |
abort_on_error,
|
667 |
return_as_file,
|
668 |
+
do_code_enrichment,
|
669 |
+
do_formula_enrichment,
|
670 |
+
do_picture_classification,
|
671 |
+
do_picture_description,
|
672 |
],
|
673 |
outputs=[
|
674 |
output_markdown,
|