Spaces:
Running
Running
from PyPDF2 import PdfReader | |
import gradio as gr | |
def get_pdf_page_count(pdf_path): | |
reader = PdfReader(pdf_path) | |
return len(reader.pages) | |
def inference(pdf_path, page_num): | |
return "A" | |
title = "OCR Arena" | |
description = "A simple Gradio interface to extract text from PDFs and compare OCR models" | |
examples = [["data/amazon-10-k-2024.pdf"], | |
["data/goog-10-k-2023.pdf"]] | |
with gr.Blocks(theme=gr.themes.Glass()) as demo: | |
with gr.Row(): | |
with gr.Column(): | |
pdf = gr.File(label="Input PDFs", file_types=[".pdf"]) | |
def show_slider(pdf_path): | |
if pdf_path is None: | |
page_num = gr.Markdown("## No Input Provided") | |
else: | |
page_count = get_pdf_page_count(pdf_path) | |
page_num = gr.Slider(1, page_count, value=1, step=1, label="Page Number") | |
with gr.Row(): | |
clear_btn = gr.ClearButton(components=[pdf, page_num]) | |
submit_btn = gr.Button("Submit", variant='primary') | |
submit_btn.click(inference, inputs=[pdf, page_num], outputs=ocr_out) | |
with gr.Column(): | |
ocr_out = gr.Textbox(label="OCR Output", type="text") | |
examples_obj = gr.Examples(examples=examples, inputs=[pdf]) | |
demo.launch() | |