|
import gradio as gr |
|
from transformers import AutoModel, AutoProcessor |
|
from PIL import Image |
|
import torch |
|
import json |
|
|
|
|
|
def load_model(): |
|
processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=True) |
|
model = AutoModel.from_pretrained("microsoft/layoutlmv3-base") |
|
return processor, model |
|
|
|
processor, model = load_model() |
|
|
|
|
|
def process_document(image): |
|
try: |
|
|
|
if not isinstance(image, Image.Image): |
|
return None, "Error: Invalid image format. Please upload a valid image." |
|
|
|
|
|
encoding = processor(image, return_tensors="pt") |
|
|
|
|
|
with torch.no_grad(): |
|
outputs = model(**encoding) |
|
|
|
|
|
logits = outputs.logits if hasattr(outputs, 'logits') else outputs.last_hidden_state |
|
|
|
|
|
result = { |
|
"status": "success", |
|
"model_output_shape": str(logits.shape), |
|
"message": "Document processed successfully. Customize this section for specific outputs." |
|
} |
|
|
|
return image, json.dumps(result, indent=2) |
|
|
|
except Exception as e: |
|
return image, f"Error processing document: {str(e)}" |
|
|
|
|
|
with gr.Blocks(title="Document Analysis with LayoutLMv3") as demo: |
|
gr.Markdown("# Document Analysis with LayoutLMv3") |
|
gr.Markdown("Upload a document image (PNG, JPG, JPEG) to analyze its layout and extract text.") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
image_input = gr.Image(type="pil", label="Upload Document Image") |
|
submit_button = gr.Button("Process Document") |
|
with gr.Column(): |
|
image_output = gr.Image(label="Uploaded Image") |
|
text_output = gr.Textbox(label="Analysis Results") |
|
|
|
submit_button.click( |
|
fn=process_document, |
|
inputs=image_input, |
|
outputs=[image_output, text_output] |
|
) |
|
|
|
gr.Markdown(""" |
|
### Instructions |
|
1. Upload a document image (PNG, JPG, or JPEG). |
|
2. Click "Process Document" to analyze the image. |
|
3. View the results in the output section. |
|
4. This is a basic demo; customize the output processing for specific tasks (e.g., text extraction, layout analysis). |
|
""") |
|
|
|
|
|
demo.launch() |