Spaces:
Runtime error
Runtime error
File size: 1,460 Bytes
5b2c368 41311bb 5b2c368 41311bb 5b2c368 41311bb 5b2c368 41311bb 5b2c368 41311bb 5b2c368 41311bb 5b2c368 41311bb 5b2c368 41311bb 5b2c368 41311bb 5b2c368 41311bb 08eaeab 41311bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import gradio as gr
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
import fitz # PyMuPDF
import io
from PIL import Image
# Initialize the OCR model
model = ocr_predictor(pretrained=True)
def perform_ocr(file):
if file.name.lower().endswith('.pdf'):
# Process PDF
text = ""
pdf_document = fitz.open(file.name)
for page_num in range(pdf_document.page_count):
page = pdf_document[page_num]
pix = page.get_pixmap()
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
# Convert PIL Image to bytes
img_byte_arr = io.BytesIO()
img.save(img_byte_arr, format='PNG')
img_byte_arr = img_byte_arr.getvalue()
# Perform OCR on the image
doc = DocumentFile.from_images(img_byte_arr)
result = model(doc)
text += result.render() + "\n\n" # Add newlines between pages
return text.strip()
else:
# Process image
doc = DocumentFile.from_images(file.name)
result = model(doc)
return result.render()
# Create Gradio interface
iface = gr.Interface(
fn=perform_ocr,
inputs=gr.File(label="Upload PDF or Image"),
outputs="text",
title="OCR with doctr (PDF and Images)",
description="Upload a PDF file or an image to extract text using OCR."
)
# Launch the interface
iface.launch()
|