Spaces:
Runtime error
Runtime error
import gradio as gr | |
import io | |
from doctr.io import DocumentFile | |
from doctr.models import ocr_predictor | |
# Initialize the OCR model | |
model = ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True) | |
def ocr_process(file): | |
# Read the uploaded file | |
if file.name.lower().endswith('.pdf'): | |
doc = DocumentFile.from_pdf(file.name) | |
else: | |
# Assume it's an image if not PDF | |
image_stream = io.BytesIO(file.read()) | |
doc = DocumentFile.from_images(image_stream) | |
# Perform OCR | |
result = model(doc) | |
# Extract text from the result | |
extracted_text = "" | |
for page in result.pages: | |
for block in page.blocks: | |
for line in block.lines: | |
for word in line.words: | |
extracted_text += word.value + " " | |
extracted_text += "\n" | |
extracted_text += "\n" | |
return extracted_text.strip() | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=ocr_process, | |
inputs=gr.File(label="Upload PDF or Image"), | |
outputs=gr.Textbox(label="Extracted Text"), | |
title="OCR with doctr", | |
description="Upload a PDF or image file to extract text using OCR." | |
) | |
# Launch the interface | |
iface.launch() |