darkbat's picture
Update app.py
f3c4f99 verified
raw
history blame
2.64 kB
import gradio as gr
from transformers import AutoModel, AutoProcessor
from PIL import Image
import torch
import json
# Load the LayoutLMv3 model and processor
def load_model():
processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=True)
model = AutoModel.from_pretrained("microsoft/layoutlmv3-base")
return processor, model
processor, model = load_model()
# Function to process the uploaded image
def process_document(image):
try:
# Ensure image is a PIL Image (Gradio provides it as PIL with type="pil")
if not isinstance(image, Image.Image):
return None, "Error: Invalid image format. Please upload a valid image."
# Preprocess the image with the processor
encoding = processor(image, return_tensors="pt")
# Run the model
with torch.no_grad():
outputs = model(**encoding)
# Extract logits or embeddings (modify based on your task)
logits = outputs.logits if hasattr(outputs, 'logits') else outputs.last_hidden_state
# Placeholder result; customize based on your task (e.g., token classification, text extraction)
result = {
"status": "success",
"model_output_shape": str(logits.shape),
"message": "Document processed successfully. Customize this section for specific outputs."
}
return image, json.dumps(result, indent=2)
except Exception as e:
return image, f"Error processing document: {str(e)}"
# Gradio Interface
with gr.Blocks(title="Document Analysis with LayoutLMv3") as demo:
gr.Markdown("# Document Analysis with LayoutLMv3")
gr.Markdown("Upload a document image (PNG, JPG, JPEG) to analyze its layout and extract text.")
with gr.Row():
with gr.Column():
image_input = gr.Image(type="pil", label="Upload Document Image")
submit_button = gr.Button("Process Document")
with gr.Column():
image_output = gr.Image(label="Uploaded Image")
text_output = gr.Textbox(label="Analysis Results")
submit_button.click(
fn=process_document,
inputs=image_input,
outputs=[image_output, text_output]
)
gr.Markdown("""
### Instructions
1. Upload a document image (PNG, JPG, or JPEG).
2. Click "Process Document" to analyze the image.
3. View the results in the output section.
4. This is a basic demo; customize the output processing for specific tasks (e.g., text extraction, layout analysis).
""")
# Launch the Gradio app
demo.launch()