Spaces:

GAS17
/

pdfextract

Runtime error

File size: 1,460 Bytes

5b2c368
41311bb
 
 
5b2c368
41311bb
5b2c368
41311bb
 
5b2c368
 
 
41311bb
5b2c368
41311bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5b2c368
41311bb
 
 
 
5b2c368
41311bb
5b2c368
 
41311bb
5b2c368
41311bb
 
5b2c368
 
41311bb
08eaeab
41311bb

import gradio as gr
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
import fitz  # PyMuPDF
import io
from PIL import Image

# Initialize the OCR model
model = ocr_predictor(pretrained=True)

def perform_ocr(file):
    if file.name.lower().endswith('.pdf'):
        # Process PDF
        text = ""
        pdf_document = fitz.open(file.name)
        for page_num in range(pdf_document.page_count):
            page = pdf_document[page_num]
            pix = page.get_pixmap()
            img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
            
            # Convert PIL Image to bytes
            img_byte_arr = io.BytesIO()
            img.save(img_byte_arr, format='PNG')
            img_byte_arr = img_byte_arr.getvalue()
            
            # Perform OCR on the image
            doc = DocumentFile.from_images(img_byte_arr)
            result = model(doc)
            text += result.render() + "\n\n"  # Add newlines between pages
        return text.strip()
    else:
        # Process image
        doc = DocumentFile.from_images(file.name)
        result = model(doc)
        return result.render()

# Create Gradio interface
iface = gr.Interface(
    fn=perform_ocr,
    inputs=gr.File(label="Upload PDF or Image"),
    outputs="text",
    title="OCR with doctr (PDF and Images)",
    description="Upload a PDF file or an image to extract text using OCR."
)

# Launch the interface
iface.launch()