Spaces:

darkbat
/

LayoutLMv3-Document-Analyzer

Running

App Files Files Community

darkbat commited on Jun 29

Commit

f3c4f99

verified ·

1 Parent(s): c720cc9

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -81

app.py CHANGED Viewed

@@ -1,81 +1,72 @@
-import gradio as gr
-from transformers import AutoModel, AutoProcessor
-from PIL import Image
-import torch
-import io
-import json
-# Load the LayoutLMv3 model and processor
-def load_model():
-    processor = AutoProcessor.from_pretrained(
-        "microsoft/layoutlmv3-base", apply_ocr=True)
-    model = AutoModel.from_pretrained("microsoft/layoutlmv3-base")
-    return processor, model
-processor, model = load_model()
-# Function to process the uploaded image
-def process_document(image):
-    try:
-        # Convert Gradio image input to PIL Image
-        image = Image.frombytes("RGB", image.size, image.rgb).convert("RGB")
-        # Preprocess the image with the processor
-        encoding = processor(image, return_tensors="pt")
-        # Run the model
-        with torch.no_grad():
-            outputs = model(**encoding)
-        # Extract logits or embeddings (modify based on your task)
-        logits = outputs.logits if hasattr(
-            outputs, 'logits') else outputs.last_hidden_state
-        # Placeholder result; customize based on your task (e.g., token classification, text extraction)
-        result = {
-            "status": "success",
-            "model_output_shape": str(logits.shape),
-            "message": "Document processed successfully. Customize this section for specific outputs."
-        }
-        return image, json.dumps(result, indent=2)
-    except Exception as e:
-        return image, f"Error processing document: {str(e)}"
-# Gradio Interface
-with gr.Blocks(title="Document Analysis with LayoutLMv3") as demo:
-    gr.Markdown("# Document Analysis with LayoutLMv3")
-    gr.Markdown(
-        "Upload a document image (PNG, JPG, JPEG) to analyze its layout and extract text.")
-    with gr.Row():
-        with gr.Column():
-            image_input = gr.Image(type="pil", label="Upload Document Image")
-            submit_button = gr.Button("Process Document")
-        with gr.Column():
-            image_output = gr.Image(label="Uploaded Image")
-            text_output = gr.Textbox(label="Analysis Results")
-    submit_button.click(
-        fn=process_document,
-        inputs=image_input,
-        outputs=[image_output, text_output]
-    )
-    gr.Markdown("""
-    ### Instructions
-    1. Upload a document image (PNG, JPG, or JPEG).
-    2. Click "Process Document" to analyze the image.
-    3. View the results in the output section.
-    4. This is a basic demo; customize the output processing for specific tasks (e.g., text extraction, layout analysis).
-    """)
-# Launch the Gradio app
-demo.launch()

+import gradio as gr
+from transformers import AutoModel, AutoProcessor
+from PIL import Image
+import torch
+import json
+# Load the LayoutLMv3 model and processor
+def load_model():
+    processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=True)
+    model = AutoModel.from_pretrained("microsoft/layoutlmv3-base")
+    return processor, model
+processor, model = load_model()
+# Function to process the uploaded image
+def process_document(image):
+    try:
+        # Ensure image is a PIL Image (Gradio provides it as PIL with type="pil")
+        if not isinstance(image, Image.Image):
+            return None, "Error: Invalid image format. Please upload a valid image."
+        # Preprocess the image with the processor
+        encoding = processor(image, return_tensors="pt")
+        # Run the model
+        with torch.no_grad():
+            outputs = model(**encoding)
+        # Extract logits or embeddings (modify based on your task)
+        logits = outputs.logits if hasattr(outputs, 'logits') else outputs.last_hidden_state
+        # Placeholder result; customize based on your task (e.g., token classification, text extraction)
+        result = {
+            "status": "success",
+            "model_output_shape": str(logits.shape),
+            "message": "Document processed successfully. Customize this section for specific outputs."
+        }
+        return image, json.dumps(result, indent=2)
+    except Exception as e:
+        return image, f"Error processing document: {str(e)}"
+# Gradio Interface
+with gr.Blocks(title="Document Analysis with LayoutLMv3") as demo:
+    gr.Markdown("# Document Analysis with LayoutLMv3")
+    gr.Markdown("Upload a document image (PNG, JPG, JPEG) to analyze its layout and extract text.")
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(type="pil", label="Upload Document Image")
+            submit_button = gr.Button("Process Document")
+        with gr.Column():
+            image_output = gr.Image(label="Uploaded Image")
+            text_output = gr.Textbox(label="Analysis Results")
+    submit_button.click(
+        fn=process_document,
+        inputs=image_input,
+        outputs=[image_output, text_output]
+    )
+    gr.Markdown("""
+    ### Instructions
+    1. Upload a document image (PNG, JPG, or JPEG).
+    2. Click "Process Document" to analyze the image.
+    3. View the results in the output section.
+    4. This is a basic demo; customize the output processing for specific tasks (e.g., text extraction, layout analysis).
+    """)
+# Launch the Gradio app
+demo.launch()