darkbat commited on
Commit
f3c4f99
·
verified ·
1 Parent(s): c720cc9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -81
app.py CHANGED
@@ -1,81 +1,72 @@
1
- import gradio as gr
2
- from transformers import AutoModel, AutoProcessor
3
- from PIL import Image
4
- import torch
5
- import io
6
- import json
7
-
8
- # Load the LayoutLMv3 model and processor
9
-
10
-
11
- def load_model():
12
- processor = AutoProcessor.from_pretrained(
13
- "microsoft/layoutlmv3-base", apply_ocr=True)
14
- model = AutoModel.from_pretrained("microsoft/layoutlmv3-base")
15
- return processor, model
16
-
17
-
18
- processor, model = load_model()
19
-
20
- # Function to process the uploaded image
21
-
22
-
23
- def process_document(image):
24
- try:
25
- # Convert Gradio image input to PIL Image
26
- image = Image.frombytes("RGB", image.size, image.rgb).convert("RGB")
27
-
28
- # Preprocess the image with the processor
29
- encoding = processor(image, return_tensors="pt")
30
-
31
- # Run the model
32
- with torch.no_grad():
33
- outputs = model(**encoding)
34
-
35
- # Extract logits or embeddings (modify based on your task)
36
- logits = outputs.logits if hasattr(
37
- outputs, 'logits') else outputs.last_hidden_state
38
-
39
- # Placeholder result; customize based on your task (e.g., token classification, text extraction)
40
- result = {
41
- "status": "success",
42
- "model_output_shape": str(logits.shape),
43
- "message": "Document processed successfully. Customize this section for specific outputs."
44
- }
45
-
46
- return image, json.dumps(result, indent=2)
47
-
48
- except Exception as e:
49
- return image, f"Error processing document: {str(e)}"
50
-
51
-
52
- # Gradio Interface
53
- with gr.Blocks(title="Document Analysis with LayoutLMv3") as demo:
54
- gr.Markdown("# Document Analysis with LayoutLMv3")
55
- gr.Markdown(
56
- "Upload a document image (PNG, JPG, JPEG) to analyze its layout and extract text.")
57
-
58
- with gr.Row():
59
- with gr.Column():
60
- image_input = gr.Image(type="pil", label="Upload Document Image")
61
- submit_button = gr.Button("Process Document")
62
- with gr.Column():
63
- image_output = gr.Image(label="Uploaded Image")
64
- text_output = gr.Textbox(label="Analysis Results")
65
-
66
- submit_button.click(
67
- fn=process_document,
68
- inputs=image_input,
69
- outputs=[image_output, text_output]
70
- )
71
-
72
- gr.Markdown("""
73
- ### Instructions
74
- 1. Upload a document image (PNG, JPG, or JPEG).
75
- 2. Click "Process Document" to analyze the image.
76
- 3. View the results in the output section.
77
- 4. This is a basic demo; customize the output processing for specific tasks (e.g., text extraction, layout analysis).
78
- """)
79
-
80
- # Launch the Gradio app
81
- demo.launch()
 
1
+ import gradio as gr
2
+ from transformers import AutoModel, AutoProcessor
3
+ from PIL import Image
4
+ import torch
5
+ import json
6
+
7
+ # Load the LayoutLMv3 model and processor
8
+ def load_model():
9
+ processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=True)
10
+ model = AutoModel.from_pretrained("microsoft/layoutlmv3-base")
11
+ return processor, model
12
+
13
+ processor, model = load_model()
14
+
15
+ # Function to process the uploaded image
16
+ def process_document(image):
17
+ try:
18
+ # Ensure image is a PIL Image (Gradio provides it as PIL with type="pil")
19
+ if not isinstance(image, Image.Image):
20
+ return None, "Error: Invalid image format. Please upload a valid image."
21
+
22
+ # Preprocess the image with the processor
23
+ encoding = processor(image, return_tensors="pt")
24
+
25
+ # Run the model
26
+ with torch.no_grad():
27
+ outputs = model(**encoding)
28
+
29
+ # Extract logits or embeddings (modify based on your task)
30
+ logits = outputs.logits if hasattr(outputs, 'logits') else outputs.last_hidden_state
31
+
32
+ # Placeholder result; customize based on your task (e.g., token classification, text extraction)
33
+ result = {
34
+ "status": "success",
35
+ "model_output_shape": str(logits.shape),
36
+ "message": "Document processed successfully. Customize this section for specific outputs."
37
+ }
38
+
39
+ return image, json.dumps(result, indent=2)
40
+
41
+ except Exception as e:
42
+ return image, f"Error processing document: {str(e)}"
43
+
44
+ # Gradio Interface
45
+ with gr.Blocks(title="Document Analysis with LayoutLMv3") as demo:
46
+ gr.Markdown("# Document Analysis with LayoutLMv3")
47
+ gr.Markdown("Upload a document image (PNG, JPG, JPEG) to analyze its layout and extract text.")
48
+
49
+ with gr.Row():
50
+ with gr.Column():
51
+ image_input = gr.Image(type="pil", label="Upload Document Image")
52
+ submit_button = gr.Button("Process Document")
53
+ with gr.Column():
54
+ image_output = gr.Image(label="Uploaded Image")
55
+ text_output = gr.Textbox(label="Analysis Results")
56
+
57
+ submit_button.click(
58
+ fn=process_document,
59
+ inputs=image_input,
60
+ outputs=[image_output, text_output]
61
+ )
62
+
63
+ gr.Markdown("""
64
+ ### Instructions
65
+ 1. Upload a document image (PNG, JPG, or JPEG).
66
+ 2. Click "Process Document" to analyze the image.
67
+ 3. View the results in the output section.
68
+ 4. This is a basic demo; customize the output processing for specific tasks (e.g., text extraction, layout analysis).
69
+ """)
70
+
71
+ # Launch the Gradio app
72
+ demo.launch()