File size: 1,220 Bytes
fffbbeb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from PyPDF2 import PdfReader
import gradio as gr

def get_pdf_page_count(pdf_path):
    reader = PdfReader(pdf_path)
    return len(reader.pages)

def inference(pdf_path, page_num):
    return "A"

title = "OCR Arena"
description = "A simple Gradio interface to extract text from PDFs and compare OCR models"
examples = [["data/amazon-10-k-2024.pdf"],
            ["data/goog-10-k-2023.pdf"]]

with gr.Blocks(theme=gr.themes.Glass()) as demo:
    with gr.Column():
        pdf = gr.File(label="Input PDFs", file_types=[".pdf"])

        @gr.render(inputs=pdf)
        def show_slider(pdf_path):
            if len(pdf_path) == 0:
                page_num = gr.Markdown("## No Input Provided")
            else:
                page_count = get_pdf_page_count(pdf_path)
                page_num = gr.Slider(1, page_count, value=1, step=1, label="Page", interactive=False)

            with gr.Row():
                clear_btn = gr.ClearButton(components=[pdf, page_num])
                submit_btn = gr.Button("Submit", variant='primary')

            submit_btn.click(inference, inputs=[pdf, page_num], outputs=ocr_out)

    with gr.Column():
        ocr_out = gr.Textbox(label="OCR Output", type="text")

demo.launch()