darkbat commited on
Commit
c720cc9
·
verified ·
1 Parent(s): eb38151

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +81 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModel, AutoProcessor
3
+ from PIL import Image
4
+ import torch
5
+ import io
6
+ import json
7
+
8
+ # Load the LayoutLMv3 model and processor
9
+
10
+
11
+ def load_model():
12
+ processor = AutoProcessor.from_pretrained(
13
+ "microsoft/layoutlmv3-base", apply_ocr=True)
14
+ model = AutoModel.from_pretrained("microsoft/layoutlmv3-base")
15
+ return processor, model
16
+
17
+
18
+ processor, model = load_model()
19
+
20
+ # Function to process the uploaded image
21
+
22
+
23
+ def process_document(image):
24
+ try:
25
+ # Convert Gradio image input to PIL Image
26
+ image = Image.frombytes("RGB", image.size, image.rgb).convert("RGB")
27
+
28
+ # Preprocess the image with the processor
29
+ encoding = processor(image, return_tensors="pt")
30
+
31
+ # Run the model
32
+ with torch.no_grad():
33
+ outputs = model(**encoding)
34
+
35
+ # Extract logits or embeddings (modify based on your task)
36
+ logits = outputs.logits if hasattr(
37
+ outputs, 'logits') else outputs.last_hidden_state
38
+
39
+ # Placeholder result; customize based on your task (e.g., token classification, text extraction)
40
+ result = {
41
+ "status": "success",
42
+ "model_output_shape": str(logits.shape),
43
+ "message": "Document processed successfully. Customize this section for specific outputs."
44
+ }
45
+
46
+ return image, json.dumps(result, indent=2)
47
+
48
+ except Exception as e:
49
+ return image, f"Error processing document: {str(e)}"
50
+
51
+
52
+ # Gradio Interface
53
+ with gr.Blocks(title="Document Analysis with LayoutLMv3") as demo:
54
+ gr.Markdown("# Document Analysis with LayoutLMv3")
55
+ gr.Markdown(
56
+ "Upload a document image (PNG, JPG, JPEG) to analyze its layout and extract text.")
57
+
58
+ with gr.Row():
59
+ with gr.Column():
60
+ image_input = gr.Image(type="pil", label="Upload Document Image")
61
+ submit_button = gr.Button("Process Document")
62
+ with gr.Column():
63
+ image_output = gr.Image(label="Uploaded Image")
64
+ text_output = gr.Textbox(label="Analysis Results")
65
+
66
+ submit_button.click(
67
+ fn=process_document,
68
+ inputs=image_input,
69
+ outputs=[image_output, text_output]
70
+ )
71
+
72
+ gr.Markdown("""
73
+ ### Instructions
74
+ 1. Upload a document image (PNG, JPG, or JPEG).
75
+ 2. Click "Process Document" to analyze the image.
76
+ 3. View the results in the output section.
77
+ 4. This is a basic demo; customize the output processing for specific tasks (e.g., text extraction, layout analysis).
78
+ """)
79
+
80
+ # Launch the Gradio app
81
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ gradio
4
+ pillow