from PyPDF2 import PdfReader import gradio as gr from docling.document_converter import DocumentConverter, PdfFormatOption from docling.datamodel.pipeline_options import PdfPipelineOptions from docling.datamodel.base_models import InputFormat pipeline_options = PdfPipelineOptions(enable_remote_services=True) converter = DocumentConverter( format_options={ InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options) } ) def get_pdf_page_count(pdf_path): reader = PdfReader(pdf_path) return len(reader.pages) def get_docling_ocr(pdf_path, page_num): result = converter.convert(pdf_path, page_range=(page_num, page_num)) markdown_text_docling = result.document.export_to_markdown() return markdown_text_docling def inference(pdf_path, page_num): docling_ocr = get_docling_ocr(pdf_path, page_num) return docling_ocr title = "OCR Arena" description = "A simple Gradio interface to extract text from PDFs and compare OCR models" examples = [["data/amazon-10-k-2024.pdf"], ["data/goog-10-k-2023.pdf"]] with gr.Blocks(theme=gr.themes.Glass()) as demo: with gr.Row(): with gr.Column(): pdf = gr.File(label="Input PDFs", file_types=[".pdf"]) @gr.render(inputs=pdf) def show_slider(pdf_path): if pdf_path is None: page_num = gr.Markdown("## No Input Provided") else: page_count = get_pdf_page_count(pdf_path) page_num = gr.Slider(1, page_count, value=1, step=1, label="Page Number") with gr.Row(): clear_btn = gr.ClearButton(components=[pdf, page_num]) submit_btn = gr.Button("Submit", variant='primary') submit_btn.click(inference, inputs=[pdf, page_num], outputs=docling_ocr_out) with gr.Column(): docling_ocr_out = gr.Textbox(label="Docling OCR Output", type="text") examples_obj = gr.Examples(examples=examples, inputs=[pdf]) demo.launch()