Spaces:

darkbat
/

LayoutLMv3-Document-Analyzer

Running

File size: 2,632 Bytes

c720cc9

import gradio as gr
from transformers import AutoModel, AutoProcessor
from PIL import Image
import torch
import io
import json

# Load the LayoutLMv3 model and processor


def load_model():
    processor = AutoProcessor.from_pretrained(
        "microsoft/layoutlmv3-base", apply_ocr=True)
    model = AutoModel.from_pretrained("microsoft/layoutlmv3-base")
    return processor, model


processor, model = load_model()

# Function to process the uploaded image


def process_document(image):
    try:
        # Convert Gradio image input to PIL Image
        image = Image.frombytes("RGB", image.size, image.rgb).convert("RGB")

        # Preprocess the image with the processor
        encoding = processor(image, return_tensors="pt")

        # Run the model
        with torch.no_grad():
            outputs = model(**encoding)

        # Extract logits or embeddings (modify based on your task)
        logits = outputs.logits if hasattr(
            outputs, 'logits') else outputs.last_hidden_state

        # Placeholder result; customize based on your task (e.g., token classification, text extraction)
        result = {
            "status": "success",
            "model_output_shape": str(logits.shape),
            "message": "Document processed successfully. Customize this section for specific outputs."
        }

        return image, json.dumps(result, indent=2)

    except Exception as e:
        return image, f"Error processing document: {str(e)}"


# Gradio Interface
with gr.Blocks(title="Document Analysis with LayoutLMv3") as demo:
    gr.Markdown("# Document Analysis with LayoutLMv3")
    gr.Markdown(
        "Upload a document image (PNG, JPG, JPEG) to analyze its layout and extract text.")

    with gr.Row():
        with gr.Column():
            image_input = gr.Image(type="pil", label="Upload Document Image")
            submit_button = gr.Button("Process Document")
        with gr.Column():
            image_output = gr.Image(label="Uploaded Image")
            text_output = gr.Textbox(label="Analysis Results")

    submit_button.click(
        fn=process_document,
        inputs=image_input,
        outputs=[image_output, text_output]
    )

    gr.Markdown("""

    ### Instructions

    1. Upload a document image (PNG, JPG, or JPEG).

    2. Click "Process Document" to analyze the image.

    3. View the results in the output section.

    4. This is a basic demo; customize the output processing for specific tasks (e.g., text extraction, layout analysis).

    """)

# Launch the Gradio app
demo.launch()