Spaces:

AkashDataScience
/

OCRArena

Running

App Files Files Community

AkashDataScience commited on Jun 28

Commit

1402288

1 Parent(s): f449995

Execution changes

Browse files

Files changed (1) hide show

app.py +15 -13

app.py CHANGED Viewed

@@ -35,12 +35,19 @@ def get_pdf_page_count(pdf_path):
     reader = PdfReader(pdf_path)
     return len(reader.pages)
 def get_docling_ocr(pdf_path, page_num):
     result = converter.convert(pdf_path, page_range=(page_num, page_num))
     markdown_text_docling = result.document.export_to_markdown()
     return markdown_text_docling
-def get_paddle_ocr(page_image):
     output = pipeline.predict(input=np.array(page_image))
     markdown_list = []
@@ -52,7 +59,8 @@ def get_paddle_ocr(page_image):
     markdown_text_paddleOCR = pipeline.concatenate_markdown_pages(markdown_list)
     return markdown_text_paddleOCR
-def get_smoldocling_ocr(page_image):
     image = load_image(page_image)
     # Create input messages
@@ -85,16 +93,6 @@ def get_smoldocling_ocr(page_image):
     markdown_text_smoldocling = doc.export_to_markdown()
     return markdown_text_smoldocling
-def inference(pdf_path, page_num):
-    docling_ocr = get_docling_ocr(pdf_path, page_num)
-    # Extract the first page as an image
-    images = convert_from_path(pdf_path, first_page=page_num, last_page=page_num)
-    page_image = images[0]
-    paddle_ocr = get_paddle_ocr(page_image)
-    smoldocling_ocr = get_smoldocling_ocr(page_image)
-    return page_image, docling_ocr, paddle_ocr, smoldocling_ocr
 title = "OCR Arena"
 description = "A simple Gradio interface to extract text from PDFs and compare OCR models"
@@ -102,6 +100,7 @@ examples = [["data/amazon-10-k-2024.pdf"],
             ["data/goog-10-k-2023.pdf"]]
 with gr.Blocks(theme=gr.themes.Glass()) as demo:
     with gr.Row():
         with gr.Column():
             pdf = gr.File(label="Input PDFs", file_types=[".pdf"])
@@ -118,7 +117,10 @@ with gr.Blocks(theme=gr.themes.Glass()) as demo:
                         clear_btn = gr.ClearButton(components=[pdf, page_num])
                         submit_btn = gr.Button("Submit", variant='primary')
-                    submit_btn.click(inference, inputs=[pdf, page_num], outputs=[original, docling_ocr_out, paddle_ocr_out, smoldocling_ocr_out])
         with gr.Column():
             original = gr.Image(width=640, height=640, label="Original Page", interactive=False)

     reader = PdfReader(pdf_path)
     return len(reader.pages)
+def get_page_image(pdf_path, page_num):
+    images = convert_from_path(pdf_path, first_page=page_num, last_page=page_num)
+    page_image = images[0]
+    return page_image
 def get_docling_ocr(pdf_path, page_num):
     result = converter.convert(pdf_path, page_range=(page_num, page_num))
     markdown_text_docling = result.document.export_to_markdown()
     return markdown_text_docling
+def get_paddle_ocr(pdf_path, page_num):
+    page_image = get_page_image(pdf_path, page_num)
     output = pipeline.predict(input=np.array(page_image))
     markdown_list = []
     markdown_text_paddleOCR = pipeline.concatenate_markdown_pages(markdown_list)
     return markdown_text_paddleOCR
+def get_smoldocling_ocr(pdf_path, page_num):
+    page_image = get_page_image(pdf_path, page_num)
     image = load_image(page_image)
     # Create input messages
     markdown_text_smoldocling = doc.export_to_markdown()
     return markdown_text_smoldocling
 title = "OCR Arena"
 description = "A simple Gradio interface to extract text from PDFs and compare OCR models"
             ["data/goog-10-k-2023.pdf"]]
 with gr.Blocks(theme=gr.themes.Glass()) as demo:
+    gr.Markdown(f"# {title}\n{description}")
     with gr.Row():
         with gr.Column():
             pdf = gr.File(label="Input PDFs", file_types=[".pdf"])
                         clear_btn = gr.ClearButton(components=[pdf, page_num])
                         submit_btn = gr.Button("Submit", variant='primary')
+                    submit_btn.click(get_page_image, inputs=[pdf, page_num], outputs=original).then(
+                        get_docling_ocr, inputs=[pdf, page_num], outputs=docling_ocr_out).then(
+                        get_paddle_ocr, inputs=[pdf, page_num], outputs=paddle_ocr_out).then(
+                        get_smoldocling_ocr, inputs=[pdf, page_num], outputs=smoldocling_ocr_out)
         with gr.Column():
             original = gr.Image(width=640, height=640, label="Original Page", interactive=False)