Spaces:

AkashDataScience
/

OCRArena

Running

App Files Files Community

AkashDataScience commited on Jun 28

Commit

ae5ac9c

1 Parent(s): adf4200

Adding Docling

Browse files

Files changed (2) hide show

app.py +19 -3
requirements.txt +55 -0

app.py CHANGED Viewed

@@ -1,12 +1,28 @@
 from PyPDF2 import PdfReader
 import gradio as gr
 def get_pdf_page_count(pdf_path):
     reader = PdfReader(pdf_path)
     return len(reader.pages)
 def inference(pdf_path, page_num):
-    return "A"
 title = "OCR Arena"
 description = "A simple Gradio interface to extract text from PDFs and compare OCR models"
@@ -30,10 +46,10 @@ with gr.Blocks(theme=gr.themes.Glass()) as demo:
                         clear_btn = gr.ClearButton(components=[pdf, page_num])
                         submit_btn = gr.Button("Submit", variant='primary')
-                    submit_btn.click(inference, inputs=[pdf, page_num], outputs=ocr_out)
         with gr.Column():
-            ocr_out = gr.Textbox(label="OCR Output", type="text")
     examples_obj = gr.Examples(examples=examples, inputs=[pdf])

 from PyPDF2 import PdfReader
 import gradio as gr
+from docling.document_converter import DocumentConverter, PdfFormatOption
+from docling.datamodel.pipeline_options import PdfPipelineOptions
+from docling.datamodel.base_models import InputFormat
+pipeline_options = PdfPipelineOptions(enable_remote_services=True)
+converter = DocumentConverter(
+    format_options={
+        InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
+    }
+)
 def get_pdf_page_count(pdf_path):
     reader = PdfReader(pdf_path)
     return len(reader.pages)
+def get_docling_ocr(pdf_path, page_num):
+    result = converter.convert(pdf_path, page_range=(page_num, page_num))
+    markdown_text_docling = result.document.export_to_markdown()
+    return markdown_text_docling
 def inference(pdf_path, page_num):
+    docling_ocr = get_docling_ocr(pdf_path, page_num)
+    return docling_ocr
 title = "OCR Arena"
 description = "A simple Gradio interface to extract text from PDFs and compare OCR models"
                         clear_btn = gr.ClearButton(components=[pdf, page_num])
                         submit_btn = gr.Button("Submit", variant='primary')
+                    submit_btn.click(inference, inputs=[pdf, page_num], outputs=docling_ocr_out)
         with gr.Column():
+            docling_ocr_out = gr.Textbox(label="Docling OCR Output", type="text")
     examples_obj = gr.Examples(examples=examples, inputs=[pdf])

requirements.txt CHANGED Viewed

@@ -1,13 +1,23 @@
 aiofiles==24.1.0
 annotated-types==0.7.0
 anyio==4.9.0
 certifi==2025.6.15
 charset-normalizer==3.4.2
 click==8.2.1
 colorama==0.4.6
 fastapi==0.115.14
 ffmpy==0.6.0
 filelock==3.18.0
 fsspec==2025.5.1
 gradio==5.35.0
 gradio_client==1.10.4
@@ -17,35 +27,79 @@ httpcore==1.0.9
 httpx==0.28.1
 huggingface-hub==0.33.1
 idna==3.10
 Jinja2==3.1.6
 markdown-it-py==3.0.0
 MarkupSafe==3.0.2
 mdurl==0.1.2
 numpy==2.2.6
 orjson==3.10.18
 packaging==25.0
 pandas==2.3.0
 pillow==11.2.1
 pydantic==2.11.7
 pydantic_core==2.33.2
 pydub==0.25.1
 Pygments==2.19.2
 PyPDF2==3.0.1
 python-dateutil==2.9.0.post0
 python-multipart==0.0.20
 pytz==2025.2
 PyYAML==6.0.2
 requests==2.32.4
 rich==14.0.0
 ruff==0.12.1
 safehttpx==0.1.6
 semantic-version==2.10.0
 shellingham==1.5.4
 six==1.17.0
 sniffio==1.3.1
 starlette==0.46.2
 tomlkit==0.13.3
 tqdm==4.67.1
 typer==0.16.0
 typing-inspection==0.4.1
 typing_extensions==4.14.0
@@ -53,3 +107,4 @@ tzdata==2025.2
 urllib3==2.5.0
 uvicorn==0.34.3
 websockets==15.0.1

 aiofiles==24.1.0
 annotated-types==0.7.0
 anyio==4.9.0
+attrs==25.3.0
+beautifulsoup4==4.13.4
 certifi==2025.6.15
 charset-normalizer==3.4.2
 click==8.2.1
 colorama==0.4.6
+dill==0.4.0
+docling==2.39.0
+docling-core==2.39.0
+docling-ibm-models==3.6.0
+docling-parse==4.1.0
+easyocr==1.7.2
+et_xmlfile==2.0.0
 fastapi==0.115.14
 ffmpy==0.6.0
 filelock==3.18.0
+filetype==1.2.0
 fsspec==2025.5.1
 gradio==5.35.0
 gradio_client==1.10.4
 httpx==0.28.1
 huggingface-hub==0.33.1
 idna==3.10
+imageio==2.37.0
 Jinja2==3.1.6
+jsonlines==3.1.0
+jsonref==1.1.0
+jsonschema==4.24.0
+jsonschema-specifications==2025.4.1
+latex2mathml==3.78.0
+lazy_loader==0.4
+lxml==5.4.0
 markdown-it-py==3.0.0
+marko==2.1.4
 MarkupSafe==3.0.2
 mdurl==0.1.2
+mpire==2.10.2
+mpmath==1.3.0
+multiprocess==0.70.18
+networkx==3.5
+ninja==1.11.1.4
 numpy==2.2.6
+opencv-python-headless==4.11.0.86
+openpyxl==3.1.5
 orjson==3.10.18
 packaging==25.0
 pandas==2.3.0
 pillow==11.2.1
+pluggy==1.6.0
+pyclipper==1.3.0.post6
 pydantic==2.11.7
+pydantic-settings==2.10.1
 pydantic_core==2.33.2
 pydub==0.25.1
 Pygments==2.19.2
+pylatexenc==2.10
 PyPDF2==3.0.1
+pypdfium2==4.30.1
+python-bidi==0.6.6
 python-dateutil==2.9.0.post0
+python-docx==1.2.0
+python-dotenv==1.1.1
 python-multipart==0.0.20
+python-pptx==1.0.2
 pytz==2025.2
+pywin32==310
 PyYAML==6.0.2
+referencing==0.36.2
+regex==2024.11.6
 requests==2.32.4
 rich==14.0.0
+rpds-py==0.25.1
+rtree==1.4.0
 ruff==0.12.1
 safehttpx==0.1.6
+safetensors==0.5.3
+scikit-image==0.25.2
+scipy==1.16.0
 semantic-version==2.10.0
+semchunk==2.2.2
+setuptools==80.9.0
+shapely==2.1.1
 shellingham==1.5.4
 six==1.17.0
 sniffio==1.3.1
+soupsieve==2.7
 starlette==0.46.2
+sympy==1.14.0
+tabulate==0.9.0
+tifffile==2025.6.11
+tokenizers==0.21.2
 tomlkit==0.13.3
+torch==2.7.1
+torchvision==0.22.1
 tqdm==4.67.1
+transformers==4.53.0
 typer==0.16.0
 typing-inspection==0.4.1
 typing_extensions==4.14.0
 urllib3==2.5.0
 uvicorn==0.34.3
 websockets==15.0.1
+xlsxwriter==3.2.5