File size: 1,460 Bytes
5b2c368
41311bb
 
 
5b2c368
41311bb
5b2c368
41311bb
 
5b2c368
 
 
41311bb
5b2c368
41311bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5b2c368
41311bb
 
 
 
5b2c368
41311bb
5b2c368
 
41311bb
5b2c368
41311bb
 
5b2c368
 
41311bb
08eaeab
41311bb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import gradio as gr
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
import fitz  # PyMuPDF
import io
from PIL import Image

# Initialize the OCR model
model = ocr_predictor(pretrained=True)

def perform_ocr(file):
    if file.name.lower().endswith('.pdf'):
        # Process PDF
        text = ""
        pdf_document = fitz.open(file.name)
        for page_num in range(pdf_document.page_count):
            page = pdf_document[page_num]
            pix = page.get_pixmap()
            img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
            
            # Convert PIL Image to bytes
            img_byte_arr = io.BytesIO()
            img.save(img_byte_arr, format='PNG')
            img_byte_arr = img_byte_arr.getvalue()
            
            # Perform OCR on the image
            doc = DocumentFile.from_images(img_byte_arr)
            result = model(doc)
            text += result.render() + "\n\n"  # Add newlines between pages
        return text.strip()
    else:
        # Process image
        doc = DocumentFile.from_images(file.name)
        result = model(doc)
        return result.render()

# Create Gradio interface
iface = gr.Interface(
    fn=perform_ocr,
    inputs=gr.File(label="Upload PDF or Image"),
    outputs="text",
    title="OCR with doctr (PDF and Images)",
    description="Upload a PDF file or an image to extract text using OCR."
)

# Launch the interface
iface.launch()