import gradio as gr import io from doctr.io import DocumentFile from doctr.models import ocr_predictor # Initialize the OCR model model = ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True) def ocr_process(file): # Read the uploaded file if file.name.lower().endswith('.pdf'): doc = DocumentFile.from_pdf(file.name) else: # Assume it's an image if not PDF image_stream = io.BytesIO(file.read()) doc = DocumentFile.from_images(image_stream) # Perform OCR result = model(doc) # Extract text from the result extracted_text = "" for page in result.pages: for block in page.blocks: for line in block.lines: for word in line.words: extracted_text += word.value + " " extracted_text += "\n" extracted_text += "\n" return extracted_text.strip() # Create Gradio interface iface = gr.Interface( fn=ocr_process, inputs=gr.File(label="Upload PDF or Image"), outputs=gr.Textbox(label="Extracted Text"), title="OCR with doctr", description="Upload a PDF or image file to extract text using OCR." ) # Launch the interface iface.launch()