Spaces:

darkbat
/

LayoutLMv3-Document-Analyzer

Running

App Files Files Community

LayoutLMv3-Document-Analyzer / app.py

darkbat

Update app.py

f3c4f99 verified about 2 months ago

raw

history blame

2.64 kB

	import gradio as gr
	from transformers import AutoModel, AutoProcessor
	from PIL import Image
	import torch
	import json

	# Load the LayoutLMv3 model and processor
	def load_model():
	processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=True)
	model = AutoModel.from_pretrained("microsoft/layoutlmv3-base")
	return processor, model

	processor, model = load_model()

	# Function to process the uploaded image
	def process_document(image):
	try:
	# Ensure image is a PIL Image (Gradio provides it as PIL with type="pil")
	if not isinstance(image, Image.Image):
	return None, "Error: Invalid image format. Please upload a valid image."

	# Preprocess the image with the processor
	encoding = processor(image, return_tensors="pt")

	# Run the model
	with torch.no_grad():
	outputs = model(**encoding)

	# Extract logits or embeddings (modify based on your task)
	logits = outputs.logits if hasattr(outputs, 'logits') else outputs.last_hidden_state

	# Placeholder result; customize based on your task (e.g., token classification, text extraction)
	result = {
	"status": "success",
	"model_output_shape": str(logits.shape),
	"message": "Document processed successfully. Customize this section for specific outputs."
	}

	return image, json.dumps(result, indent=2)

	except Exception as e:
	return image, f"Error processing document: {str(e)}"

	# Gradio Interface
	with gr.Blocks(title="Document Analysis with LayoutLMv3") as demo:
	gr.Markdown("# Document Analysis with LayoutLMv3")
	gr.Markdown("Upload a document image (PNG, JPG, JPEG) to analyze its layout and extract text.")

	with gr.Row():
	with gr.Column():
	image_input = gr.Image(type="pil", label="Upload Document Image")
	submit_button = gr.Button("Process Document")
	with gr.Column():
	image_output = gr.Image(label="Uploaded Image")
	text_output = gr.Textbox(label="Analysis Results")

	submit_button.click(
	fn=process_document,
	inputs=image_input,
	outputs=[image_output, text_output]
	)

	gr.Markdown("""
	### Instructions
	1. Upload a document image (PNG, JPG, or JPEG).
	2. Click "Process Document" to analyze the image.
	3. View the results in the output section.
	4. This is a basic demo; customize the output processing for specific tasks (e.g., text extraction, layout analysis).
	""")

	# Launch the Gradio app
	demo.launch()