Spaces:
Runtime error
Runtime error
File size: 1,242 Bytes
5b2c368 966ab7d 6b1b55d 966ab7d 5b2c368 966ab7d 5b2c368 966ab7d 6b1b55d 966ab7d 6b1b55d 966ab7d 41311bb 966ab7d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import gradio as gr
import io
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
# Initialize the OCR model
model = ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True)
def ocr_process(file):
# Read the uploaded file
if file.name.lower().endswith('.pdf'):
doc = DocumentFile.from_pdf(file.name)
else:
# Assume it's an image if not PDF
image_stream = io.BytesIO(file.read())
doc = DocumentFile.from_images(image_stream)
# Perform OCR
result = model(doc)
# Extract text from the result
extracted_text = ""
for page in result.pages:
for block in page.blocks:
for line in block.lines:
for word in line.words:
extracted_text += word.value + " "
extracted_text += "\n"
extracted_text += "\n"
return extracted_text.strip()
# Create Gradio interface
iface = gr.Interface(
fn=ocr_process,
inputs=gr.File(label="Upload PDF or Image"),
outputs=gr.Textbox(label="Extracted Text"),
title="OCR with doctr",
description="Upload a PDF or image file to extract text using OCR."
)
# Launch the interface
iface.launch() |