Docling-UI / docling_serve /gradio_ui.py
Michele Dolfi
fix(ui): use --port parameter and avoid failing when image is not found (#97)
f0dc93c unverified
raw
history blame
22.2 kB
import importlib
import json
import logging
import tempfile
from pathlib import Path
import gradio as gr
import requests
from docling_serve.helper_functions import _to_list_of_strings
from docling_serve.settings import uvicorn_settings
logger = logging.getLogger(__name__)
##############################
# Head JS for web components #
##############################
head = """
<script src="https://unpkg.com/@docling/[email protected]" type="module"></script>
"""
#################
# CSS and theme #
#################
css = """
#logo {
border-style: none;
background: none;
box-shadow: none;
min-width: 80px;
}
#dark_mode_column {
display: flex;
align-content: flex-end;
}
#title {
text-align: left;
display:block;
height: auto;
padding-top: 5px;
line-height: 0;
}
.title-text h1 > p, .title-text p {
margin-top: 0px !important;
margin-bottom: 2px !important;
}
#custom-container {
border: 0.909091px solid;
padding: 10px;
border-radius: 4px;
}
#custom-container h4 {
font-size: 14px;
}
#file_input_zone {
height: 140px;
}
docling-img::part(pages) {
gap: 1rem;
}
docling-img::part(page) {
box-shadow: 0 0.5rem 1rem 0 rgba(0, 0, 0, 0.2);
}
"""
theme = gr.themes.Default(
text_size="md",
spacing_size="md",
font=[
gr.themes.GoogleFont("Red Hat Display"),
"ui-sans-serif",
"system-ui",
"sans-serif",
],
font_mono=[
gr.themes.GoogleFont("Red Hat Mono"),
"ui-monospace",
"Consolas",
"monospace",
],
)
#############
# Variables #
#############
gradio_output_dir = None # Will be set by FastAPI when mounted
file_output_path = None # Will be set when a new file is generated
#############
# Functions #
#############
def health_check():
response = requests.get(f"http://localhost:{uvicorn_settings.port}/health")
if response.status_code == 200:
return "Healthy"
return "Unhealthy"
def set_options_visibility(x):
return gr.Accordion("Options", open=x)
def set_outputs_visibility_direct(x, y):
content = gr.Row(visible=x)
file = gr.Row(visible=y)
return content, file
def set_outputs_visibility_process(x):
content = gr.Row(visible=not x)
file = gr.Row(visible=x)
return content, file
def set_download_button_label(label_text: gr.State):
return gr.DownloadButton(label=str(label_text), scale=1)
def clear_outputs():
markdown_content = ""
json_content = ""
json_rendered_content = ""
html_content = ""
text_content = ""
doctags_content = ""
return (
markdown_content,
markdown_content,
json_content,
json_rendered_content,
html_content,
html_content,
text_content,
doctags_content,
)
def clear_url_input():
return ""
def clear_file_input():
return None
def auto_set_return_as_file(url_input, file_input, image_export_mode):
# If more than one input source is provided, return as file
if (
(len(url_input.split(",")) > 1)
or (file_input and len(file_input) > 1)
or (image_export_mode == "referenced")
):
return True
else:
return False
def change_ocr_lang(ocr_engine):
if ocr_engine == "easyocr":
return "en,fr,de,es"
elif ocr_engine == "tesseract_cli":
return "eng,fra,deu,spa"
elif ocr_engine == "tesseract":
return "eng,fra,deu,spa"
elif ocr_engine == "rapidocr":
return "english,chinese"
def process_url(
input_sources,
to_formats,
image_export_mode,
ocr,
force_ocr,
ocr_engine,
ocr_lang,
pdf_backend,
table_mode,
abort_on_error,
return_as_file,
do_code_enrichment,
do_formula_enrichment,
do_picture_classification,
do_picture_description,
):
parameters = {
"http_sources": [{"url": source} for source in input_sources.split(",")],
"options": {
"to_formats": to_formats,
"image_export_mode": image_export_mode,
"ocr": ocr,
"force_ocr": force_ocr,
"ocr_engine": ocr_engine,
"ocr_lang": _to_list_of_strings(ocr_lang),
"pdf_backend": pdf_backend,
"table_mode": table_mode,
"abort_on_error": abort_on_error,
"return_as_file": return_as_file,
"do_code_enrichment": do_code_enrichment,
"do_formula_enrichment": do_formula_enrichment,
"do_picture_classification": do_picture_classification,
"do_picture_description": do_picture_description,
},
}
if (
not parameters["http_sources"]
or len(parameters["http_sources"]) == 0
or parameters["http_sources"][0]["url"] == ""
):
logger.error("No input sources provided.")
raise gr.Error("No input sources provided.", print_exception=False)
try:
response = requests.post(
f"http://localhost:{uvicorn_settings.port}/v1alpha/convert/source",
json=parameters,
)
except Exception as e:
logger.error(f"Error processing URL: {e}")
raise gr.Error(f"Error processing URL: {e}", print_exception=False)
if response.status_code != 200:
data = response.json()
error_message = data.get("detail", "An unknown error occurred.")
logger.error(f"Error processing file: {error_message}")
raise gr.Error(f"Error processing file: {error_message}", print_exception=False)
output = response_to_output(response, return_as_file)
return output
def process_file(
files,
to_formats,
image_export_mode,
ocr,
force_ocr,
ocr_engine,
ocr_lang,
pdf_backend,
table_mode,
abort_on_error,
return_as_file,
do_code_enrichment,
do_formula_enrichment,
do_picture_classification,
do_picture_description,
):
if not files or len(files) == 0 or files[0] == "":
logger.error("No files provided.")
raise gr.Error("No files provided.", print_exception=False)
files_data = [("files", (file.name, open(file.name, "rb"))) for file in files]
parameters = {
"to_formats": to_formats,
"image_export_mode": image_export_mode,
"ocr": str(ocr).lower(),
"force_ocr": str(force_ocr).lower(),
"ocr_engine": ocr_engine,
"ocr_lang": _to_list_of_strings(ocr_lang),
"pdf_backend": pdf_backend,
"table_mode": table_mode,
"abort_on_error": str(abort_on_error).lower(),
"return_as_file": str(return_as_file).lower(),
"do_code_enrichment": str(do_code_enrichment).lower(),
"do_formula_enrichment": str(do_formula_enrichment).lower(),
"do_picture_classification": str(do_picture_classification).lower(),
"do_picture_description": str(do_picture_description).lower(),
}
try:
response = requests.post(
f"http://localhost:{uvicorn_settings.port}/v1alpha/convert/file",
files=files_data,
data=parameters,
)
except Exception as e:
logger.error(f"Error processing file(s): {e}")
raise gr.Error(f"Error processing file(s): {e}", print_exception=False)
if response.status_code != 200:
data = response.json()
error_message = data.get("detail", "An unknown error occurred.")
logger.error(f"Error processing file: {error_message}")
raise gr.Error(f"Error processing file: {error_message}", print_exception=False)
output = response_to_output(response, return_as_file)
return output
def response_to_output(response, return_as_file):
markdown_content = ""
json_content = ""
json_rendered_content = ""
html_content = ""
text_content = ""
doctags_content = ""
download_button = gr.DownloadButton(visible=False, label="Download Output", scale=1)
if return_as_file:
filename = (
response.headers.get("Content-Disposition").split("filename=")[1].strip('"')
)
tmp_output_dir = Path(tempfile.mkdtemp(dir=gradio_output_dir, prefix="ui_"))
file_output_path = f"{tmp_output_dir}/{filename}"
# logger.info(f"Saving file to: {file_output_path}")
with open(file_output_path, "wb") as f:
f.write(response.content)
download_button = gr.DownloadButton(
visible=True, label=f"Download {filename}", scale=1, value=file_output_path
)
else:
full_content = response.json()
markdown_content = full_content.get("document").get("md_content")
json_content = json.dumps(
full_content.get("document").get("json_content"), indent=2
)
# Embed document JSON and trigger load at client via an image.
json_rendered_content = f"""
<docling-img id="dclimg" pagenumbers tooltip="parsed"></docling-img>
<script id="dcljson" type="application/json" onload="document.getElementById('dclimg').src = JSON.parse(document.getElementById('dcljson').textContent);">{json_content}</script>
<img src onerror="document.getElementById('dclimg').src = JSON.parse(document.getElementById('dcljson').textContent);" />
"""
html_content = full_content.get("document").get("html_content")
text_content = full_content.get("document").get("text_content")
doctags_content = full_content.get("document").get("doctags_content")
return (
markdown_content,
markdown_content,
json_content,
json_rendered_content,
html_content,
html_content,
text_content,
doctags_content,
download_button,
)
############
# UI Setup #
############
with gr.Blocks(
head=head,
css=css,
theme=theme,
title="Docling Serve",
delete_cache=(3600, 3600), # Delete all files older than 1 hour every hour
) as ui:
# Constants stored in states to be able to pass them as inputs to functions
processing_text = gr.State("Processing your document(s), please wait...")
true_bool = gr.State(True)
false_bool = gr.State(False)
# Banner
with gr.Row(elem_id="check_health"):
# Logo
with gr.Column(scale=1, min_width=90):
try:
gr.Image(
"https://raw.githubusercontent.com/docling-project/docling/refs/heads/main/docs/assets/logo.svg",
height=80,
width=80,
show_download_button=False,
show_label=False,
show_fullscreen_button=False,
container=False,
elem_id="logo",
scale=0,
)
except Exception:
logger.warning("Logo not found.")
# Title
with gr.Column(scale=1, min_width=200):
gr.Markdown(
f"# Docling Serve \n(docling version: "
f"{importlib.metadata.version('docling')})",
elem_id="title",
elem_classes=["title-text"],
)
# Dark mode button
with gr.Column(scale=16, elem_id="dark_mode_column"):
dark_mode_btn = gr.Button("Dark/Light Mode", scale=0)
dark_mode_btn.click(
None,
None,
None,
js="""() => {
if (document.querySelectorAll('.dark').length) {
document.querySelectorAll('.dark').forEach(
el => el.classList.remove('dark')
);
} else {
document.querySelector('body').classList.add('dark');
}
}""",
show_api=False,
)
# URL Processing Tab
with gr.Tab("Convert URL(s)"):
with gr.Row():
with gr.Column(scale=4):
url_input = gr.Textbox(
label="Input Sources (comma-separated URLs)",
placeholder="https://arxiv.org/pdf/2206.01062",
)
with gr.Column(scale=1):
url_process_btn = gr.Button("Process URL(s)", scale=1)
url_reset_btn = gr.Button("Reset", scale=1)
# File Processing Tab
with gr.Tab("Convert File(s)"):
with gr.Row():
with gr.Column(scale=4):
file_input = gr.File(
elem_id="file_input_zone",
label="Upload Files",
file_types=[
".pdf",
".docx",
".pptx",
".html",
".xlsx",
".asciidoc",
".txt",
".md",
".jpg",
".jpeg",
".png",
".gif",
],
file_count="multiple",
scale=4,
)
with gr.Column(scale=1):
file_process_btn = gr.Button("Process File(s)", scale=1)
file_reset_btn = gr.Button("Reset", scale=1)
# Options
with gr.Accordion("Options") as options:
with gr.Row():
with gr.Column(scale=1):
to_formats = gr.CheckboxGroup(
[
("Markdown", "md"),
("Docling (JSON)", "json"),
("HTML", "html"),
("Plain Text", "text"),
("Doc Tags", "doctags"),
],
label="To Formats",
value=["md"],
)
with gr.Column(scale=1):
image_export_mode = gr.Radio(
[
("Embedded", "embedded"),
("Placeholder", "placeholder"),
("Referenced", "referenced"),
],
label="Image Export Mode",
value="embedded",
)
with gr.Row():
with gr.Column(scale=1, min_width=200):
ocr = gr.Checkbox(label="Enable OCR", value=True)
force_ocr = gr.Checkbox(label="Force OCR", value=False)
with gr.Column(scale=1):
ocr_engine = gr.Radio(
[
("EasyOCR", "easyocr"),
("Tesseract", "tesseract"),
("RapidOCR", "rapidocr"),
],
label="OCR Engine",
value="easyocr",
)
with gr.Column(scale=1, min_width=200):
ocr_lang = gr.Textbox(
label="OCR Language (beware of the format)", value="en,fr,de,es"
)
ocr_engine.change(change_ocr_lang, inputs=[ocr_engine], outputs=[ocr_lang])
with gr.Row():
with gr.Column(scale=2):
pdf_backend = gr.Radio(
["pypdfium2", "dlparse_v1", "dlparse_v2"],
label="PDF Backend",
value="dlparse_v2",
)
with gr.Column(scale=2):
table_mode = gr.Radio(
["fast", "accurate"], label="Table Mode", value="fast"
)
with gr.Column(scale=1):
abort_on_error = gr.Checkbox(label="Abort on Error", value=False)
return_as_file = gr.Checkbox(label="Return as File", value=False)
with gr.Row():
with gr.Column():
do_code_enrichment = gr.Checkbox(
label="Enable code enrichment", value=False
)
do_formula_enrichment = gr.Checkbox(
label="Enable formula enrichment", value=False
)
with gr.Column():
do_picture_classification = gr.Checkbox(
label="Enable picture classification", value=False
)
do_picture_description = gr.Checkbox(
label="Enable picture description", value=False
)
# Document output
with gr.Row(visible=False) as content_output:
with gr.Tab("Markdown"):
output_markdown = gr.Code(
language="markdown", wrap_lines=True, show_label=False
)
with gr.Tab("Markdown-Rendered"):
output_markdown_rendered = gr.Markdown(label="Response")
with gr.Tab("Docling (JSON)"):
output_json = gr.Code(language="json", wrap_lines=True, show_label=False)
with gr.Tab("Docling-Rendered"):
output_json_rendered = gr.HTML()
with gr.Tab("HTML"):
output_html = gr.Code(language="html", wrap_lines=True, show_label=False)
with gr.Tab("HTML-Rendered"):
output_html_rendered = gr.HTML(label="Response")
with gr.Tab("Text"):
output_text = gr.Code(wrap_lines=True, show_label=False)
with gr.Tab("DocTags"):
output_doctags = gr.Code(wrap_lines=True, show_label=False)
# File download output
with gr.Row(visible=False) as file_output:
download_file_btn = gr.DownloadButton(label="Placeholder", scale=1)
##############
# UI Actions #
##############
# Handle Return as File
url_input.change(
auto_set_return_as_file,
inputs=[url_input, file_input, image_export_mode],
outputs=[return_as_file],
)
file_input.change(
auto_set_return_as_file,
inputs=[url_input, file_input, image_export_mode],
outputs=[return_as_file],
)
image_export_mode.change(
auto_set_return_as_file,
inputs=[url_input, file_input, image_export_mode],
outputs=[return_as_file],
)
# URL processing
url_process_btn.click(
set_options_visibility, inputs=[false_bool], outputs=[options]
).then(
set_download_button_label, inputs=[processing_text], outputs=[download_file_btn]
).then(
set_outputs_visibility_process,
inputs=[return_as_file],
outputs=[content_output, file_output],
).then(
clear_outputs,
inputs=None,
outputs=[
output_markdown,
output_markdown_rendered,
output_json,
output_json_rendered,
output_html,
output_html_rendered,
output_text,
output_doctags,
],
).then(
process_url,
inputs=[
url_input,
to_formats,
image_export_mode,
ocr,
force_ocr,
ocr_engine,
ocr_lang,
pdf_backend,
table_mode,
abort_on_error,
return_as_file,
do_code_enrichment,
do_formula_enrichment,
do_picture_classification,
do_picture_description,
],
outputs=[
output_markdown,
output_markdown_rendered,
output_json,
output_json_rendered,
output_html,
output_html_rendered,
output_text,
output_doctags,
download_file_btn,
],
)
url_reset_btn.click(
clear_outputs,
inputs=None,
outputs=[
output_markdown,
output_markdown_rendered,
output_json,
output_json_rendered,
output_html,
output_html_rendered,
output_text,
output_doctags,
],
).then(set_options_visibility, inputs=[true_bool], outputs=[options]).then(
set_outputs_visibility_direct,
inputs=[false_bool, false_bool],
outputs=[content_output, file_output],
).then(clear_url_input, inputs=None, outputs=[url_input])
# File processing
file_process_btn.click(
set_options_visibility, inputs=[false_bool], outputs=[options]
).then(
set_download_button_label, inputs=[processing_text], outputs=[download_file_btn]
).then(
set_outputs_visibility_process,
inputs=[return_as_file],
outputs=[content_output, file_output],
).then(
clear_outputs,
inputs=None,
outputs=[
output_markdown,
output_markdown_rendered,
output_json,
output_json_rendered,
output_html,
output_html_rendered,
output_text,
output_doctags,
],
).then(
process_file,
inputs=[
file_input,
to_formats,
image_export_mode,
ocr,
force_ocr,
ocr_engine,
ocr_lang,
pdf_backend,
table_mode,
abort_on_error,
return_as_file,
do_code_enrichment,
do_formula_enrichment,
do_picture_classification,
do_picture_description,
],
outputs=[
output_markdown,
output_markdown_rendered,
output_json,
output_json_rendered,
output_html,
output_html_rendered,
output_text,
output_doctags,
download_file_btn,
],
)
file_reset_btn.click(
clear_outputs,
inputs=None,
outputs=[
output_markdown,
output_markdown_rendered,
output_json,
output_json_rendered,
output_html,
output_html_rendered,
output_text,
output_doctags,
],
).then(set_options_visibility, inputs=[true_bool], outputs=[options]).then(
set_outputs_visibility_direct,
inputs=[false_bool, false_bool],
outputs=[content_output, file_output],
).then(clear_file_input, inputs=None, outputs=[file_input])