Charles Kabui
commited on
Commit
·
c33e07b
1
Parent(s):
1f9e550
transparent bboxes
Browse files- analysis.ipynb +0 -0
- app.py +1 -0
- main.py +6 -18
- utils/visualize_bboxes_on_image.py +42 -28
analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import os
|
2 |
os.system("apt install -y poppler-utils")
|
3 |
os.system("python -m pip install --upgrade pip")
|
|
|
4 |
os.system("python -m pip install torch==2.1.0")
|
5 |
os.system("python -m pip install 'git+https://github.com/facebookresearch/detectron2.git@898507047cf441a1e4be7a729270961c401c4354'")
|
6 |
os.system("python -m pip install layoutparser==0.3.4 layoutparser[layoutmodels] layoutparser[ocr]")
|
|
|
1 |
import os
|
2 |
os.system("apt install -y poppler-utils")
|
3 |
os.system("python -m pip install --upgrade pip")
|
4 |
+
os.system("python -m pip install pdf2image==1.16.3")
|
5 |
os.system("python -m pip install torch==2.1.0")
|
6 |
os.system("python -m pip install 'git+https://github.com/facebookresearch/detectron2.git@898507047cf441a1e4be7a729270961c401c4354'")
|
7 |
os.system("python -m pip install layoutparser==0.3.4 layoutparser[layoutmodels] layoutparser[ocr]")
|
main.py
CHANGED
@@ -23,7 +23,7 @@ cache = {
|
|
23 |
pre_message_style = 'overflow:auto;border:2px solid pink;padding:4px;border-radius:4px;'
|
24 |
visualize_bboxes_on_image_kwargs = {
|
25 |
'label_text_color': 'white',
|
26 |
-
'
|
27 |
'label_text_size': 12,
|
28 |
'label_text_padding': 3,
|
29 |
'label_rectangle_left_margin': 0,
|
@@ -31,20 +31,6 @@ visualize_bboxes_on_image_kwargs = {
|
|
31 |
}
|
32 |
vectors_types = ['vectors', 'weighted_vectors', 'reduced_vectors', 'weighted_reduced_vectors']
|
33 |
|
34 |
-
annotation_key = 'is_annotated_document_image'
|
35 |
-
annotation_original_image_key = 'original_image'
|
36 |
-
def annotate_document_image(document_image: Image.Image, original_document_image: Image.Image):
|
37 |
-
document_image.info.update({
|
38 |
-
annotation_key: True,
|
39 |
-
annotation_original_image_key: original_document_image
|
40 |
-
})
|
41 |
-
return document_image
|
42 |
-
|
43 |
-
def get_original_document_image(document_image: Image.Image):
|
44 |
-
if document_image.info.get(annotation_key) == True:
|
45 |
-
return document_image.info.get(annotation_original_image_key)
|
46 |
-
return document_image
|
47 |
-
|
48 |
def similarity_fn(model: lp.Detectron2LayoutModel, document_image_1: Image.Image, document_image_2: Image.Image, vectors_type: str):
|
49 |
message = None
|
50 |
annotations = {
|
@@ -87,16 +73,18 @@ def similarity_fn(model: lp.Detectron2LayoutModel, document_image_1: Image.Image
|
|
87 |
document_image_1 = visualize_bboxes_on_image(
|
88 |
image = document_image_1,
|
89 |
bboxes = cache['document_image_1_features'][annotations['predicted_bboxes']],
|
90 |
-
|
91 |
cache['document_image_1_features'][annotations['predicted_labels']],
|
92 |
cache['document_image_1_features'][annotations['predicted_scores']])],
|
|
|
93 |
**visualize_bboxes_on_image_kwargs)
|
94 |
document_image_2 = visualize_bboxes_on_image(
|
95 |
image = document_image_2,
|
96 |
bboxes = cache['document_image_2_features'][annotations['predicted_bboxes']],
|
97 |
-
|
98 |
cache['document_image_2_features'][annotations['predicted_labels']],
|
99 |
cache['document_image_2_features'][annotations['predicted_scores']])],
|
|
|
100 |
**visualize_bboxes_on_image_kwargs)
|
101 |
|
102 |
cache['output_document_image_1_hash'] = str(average_hash(document_image_1))
|
@@ -141,7 +129,7 @@ def document_view(document_number: int):
|
|
141 |
gr.HTML(value=f'<h4>Load the {"first" if document_number == 1 else "second"} PDF or Document Image<h4>', elem_classes=['center'])
|
142 |
with gr.Tabs() as document_tabs:
|
143 |
with gr.Tab("From Image", id=0):
|
144 |
-
document = gr.Image(type="pil", label=f"Document {document_number}", visible=False, interactive=False, show_download_button=
|
145 |
document_error_message = gr.HTML(label="Error Message", visible=False)
|
146 |
document_preview = gr.UploadButton(
|
147 |
"Upload PDF or Document Image",
|
|
|
23 |
pre_message_style = 'overflow:auto;border:2px solid pink;padding:4px;border-radius:4px;'
|
24 |
visualize_bboxes_on_image_kwargs = {
|
25 |
'label_text_color': 'white',
|
26 |
+
'label_fill_color': 'black',
|
27 |
'label_text_size': 12,
|
28 |
'label_text_padding': 3,
|
29 |
'label_rectangle_left_margin': 0,
|
|
|
31 |
}
|
32 |
vectors_types = ['vectors', 'weighted_vectors', 'reduced_vectors', 'weighted_reduced_vectors']
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
def similarity_fn(model: lp.Detectron2LayoutModel, document_image_1: Image.Image, document_image_2: Image.Image, vectors_type: str):
|
35 |
message = None
|
36 |
annotations = {
|
|
|
73 |
document_image_1 = visualize_bboxes_on_image(
|
74 |
image = document_image_1,
|
75 |
bboxes = cache['document_image_1_features'][annotations['predicted_bboxes']],
|
76 |
+
labels = [f'{label}, score:{round(score, 2)}' for label, score in zip(
|
77 |
cache['document_image_1_features'][annotations['predicted_labels']],
|
78 |
cache['document_image_1_features'][annotations['predicted_scores']])],
|
79 |
+
bbox_outline_color = [color_map[label] for label in cache['document_image_1_features'][annotations['predicted_labels']]],
|
80 |
**visualize_bboxes_on_image_kwargs)
|
81 |
document_image_2 = visualize_bboxes_on_image(
|
82 |
image = document_image_2,
|
83 |
bboxes = cache['document_image_2_features'][annotations['predicted_bboxes']],
|
84 |
+
labels = [f'{label}, score:{score}' for label, score in zip(
|
85 |
cache['document_image_2_features'][annotations['predicted_labels']],
|
86 |
cache['document_image_2_features'][annotations['predicted_scores']])],
|
87 |
+
bbox_outline_color = [color_map[label] for label in cache['document_image_2_features'][annotations['predicted_labels']]],
|
88 |
**visualize_bboxes_on_image_kwargs)
|
89 |
|
90 |
cache['output_document_image_1_hash'] = str(average_hash(document_image_1))
|
|
|
129 |
gr.HTML(value=f'<h4>Load the {"first" if document_number == 1 else "second"} PDF or Document Image<h4>', elem_classes=['center'])
|
130 |
with gr.Tabs() as document_tabs:
|
131 |
with gr.Tab("From Image", id=0):
|
132 |
+
document = gr.Image(type="pil", label=f"Document {document_number}", visible=False, interactive=False, show_download_button=True)
|
133 |
document_error_message = gr.HTML(label="Error Message", visible=False)
|
134 |
document_preview = gr.UploadButton(
|
135 |
"Upload PDF or Document Image",
|
utils/visualize_bboxes_on_image.py
CHANGED
@@ -7,14 +7,15 @@ from typing import List
|
|
7 |
from functools import cache
|
8 |
|
9 |
DEFAULTS = {
|
10 |
-
'
|
11 |
-
'
|
|
|
12 |
'label_text_color': "black",
|
13 |
-
'
|
14 |
'label_text_padding': 0,
|
15 |
'label_rectangle_left_margin': 0,
|
16 |
'label_rectangle_top_margin': 0,
|
17 |
-
'label_text_size': 12
|
18 |
}
|
19 |
|
20 |
@cache
|
@@ -27,11 +28,12 @@ def get_font(path_or_url: str = 'https://github.com/googlefonts/roboto/raw/main/
|
|
27 |
def visualize_bboxes_on_image(
|
28 |
image: Image.Image,
|
29 |
bboxes: List[List[int]],
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
35 |
label_text_padding = DEFAULTS["label_text_padding"],
|
36 |
label_rectangle_left_margin = DEFAULTS["label_rectangle_left_margin"],
|
37 |
label_rectangle_top_margin = DEFAULTS['label_rectangle_top_margin'],
|
@@ -42,33 +44,45 @@ def visualize_bboxes_on_image(
|
|
42 |
Args:
|
43 |
image: Image to visualize
|
44 |
bboxes: List of bounding boxes
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
48 |
label_text_color: Color of the label text
|
49 |
-
|
50 |
-
convert_to_x0y0x1y1: Function to convert bounding box to x0y0x1y1 format
|
51 |
label_text_padding: Padding of the label text
|
52 |
-
|
53 |
-
|
54 |
label_text_size: Font size of the label text
|
|
|
55 |
Returns:
|
56 |
-
Image: Image with bounding boxes
|
57 |
'''
|
58 |
image = image.copy().convert("RGB")
|
59 |
draw = ImageDraw.Draw(image)
|
60 |
font = get_font(size = label_text_size)
|
61 |
-
|
62 |
-
|
|
|
|
|
63 |
x0, y0, x1, y1 = convert_to_x0y0x1y1(bbox) if convert_to_x0y0x1y1 is not None else bbox
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
draw_text_on_image(
|
67 |
draw,
|
68 |
[x0, y0],
|
69 |
-
|
70 |
label_text_color,
|
71 |
-
|
72 |
label_text_padding,
|
73 |
label_rectangle_left_margin,
|
74 |
label_rectangle_top_margin,
|
@@ -79,9 +93,9 @@ def visualize_bboxes_on_image(
|
|
79 |
def draw_text_on_image(
|
80 |
image_or_draw: Image.Image | ImageDraw.ImageDraw,
|
81 |
text_position_xy: List[int],
|
82 |
-
|
83 |
label_text_color = DEFAULTS["label_text_color"],
|
84 |
-
|
85 |
label_text_padding = DEFAULTS["label_text_padding"],
|
86 |
label_rectangle_left_margin = DEFAULTS["label_rectangle_left_margin"],
|
87 |
label_rectangle_top_margin = DEFAULTS['label_rectangle_top_margin'],
|
@@ -93,13 +107,13 @@ def draw_text_on_image(
|
|
93 |
x0, y0 = text_position_xy
|
94 |
text_position = (x0 - label_rectangle_left_margin + label_text_padding, y0 - label_rectangle_top_margin + label_text_padding)
|
95 |
draw = ImageDraw.Draw(image) if is_image else image_or_draw
|
96 |
-
|
97 |
xy = [
|
98 |
text_position[0] - label_text_padding,
|
99 |
text_position[1] - label_text_padding,
|
100 |
text_bbox_right + label_text_padding + label_text_padding,
|
101 |
text_bbox_bottom + label_text_padding + label_text_padding
|
102 |
]
|
103 |
-
draw.rectangle(xy, fill =
|
104 |
-
draw.text(text_position,
|
105 |
return image
|
|
|
7 |
from functools import cache
|
8 |
|
9 |
DEFAULTS = {
|
10 |
+
'bbox_outline_width': 2,
|
11 |
+
'bbox_outline_color': (0, 0, 256, 123), # alpha runs from 0 to 127
|
12 |
+
'bbox_fill_color': (256, 0, 0, 50), # alpha runs from 0 to 127
|
13 |
'label_text_color': "black",
|
14 |
+
'label_fill_color': "red",
|
15 |
'label_text_padding': 0,
|
16 |
'label_rectangle_left_margin': 0,
|
17 |
'label_rectangle_top_margin': 0,
|
18 |
+
'label_text_size': 12,
|
19 |
}
|
20 |
|
21 |
@cache
|
|
|
28 |
def visualize_bboxes_on_image(
|
29 |
image: Image.Image,
|
30 |
bboxes: List[List[int]],
|
31 |
+
labels: List[str] = None,
|
32 |
+
bbox_outline_width = DEFAULTS["bbox_outline_width"],
|
33 |
+
bbox_outline_color = DEFAULTS["bbox_outline_color"],
|
34 |
+
bbox_fill_color: str | list[tuple | str] = DEFAULTS["bbox_fill_color"],
|
35 |
+
label_text_color: str | list[tuple | str] = DEFAULTS["label_text_color"],
|
36 |
+
label_fill_color = DEFAULTS["label_fill_color"],
|
37 |
label_text_padding = DEFAULTS["label_text_padding"],
|
38 |
label_rectangle_left_margin = DEFAULTS["label_rectangle_left_margin"],
|
39 |
label_rectangle_top_margin = DEFAULTS['label_rectangle_top_margin'],
|
|
|
44 |
Args:
|
45 |
image: Image to visualize
|
46 |
bboxes: List of bounding boxes
|
47 |
+
labels: Titles of the bounding boxes
|
48 |
+
bbox_outline_width: Width of the bounding box
|
49 |
+
bbox_outline_color: Color of the bounding box
|
50 |
+
bbox_fill_color: Fill color of the bounding box
|
51 |
label_text_color: Color of the label text
|
52 |
+
label_fill_color: Color of the label rectangle
|
|
|
53 |
label_text_padding: Padding of the label text
|
54 |
+
label_rectangle_left_margin: Left padding of the label rectangle
|
55 |
+
label_rectangle_top_margin: Top padding of the label rectangle
|
56 |
label_text_size: Font size of the label text
|
57 |
+
convert_to_x0y0x1y1: Function to convert bounding box to x0y0x1y1 format
|
58 |
Returns:
|
59 |
+
Image: Image annotated with bounding boxes
|
60 |
'''
|
61 |
image = image.copy().convert("RGB")
|
62 |
draw = ImageDraw.Draw(image)
|
63 |
font = get_font(size = label_text_size)
|
64 |
+
labels = (labels or []) + np.full(len(bboxes) - len(labels or []), None).tolist()
|
65 |
+
bbox_fill_colors = bbox_fill_color if isinstance(bbox_fill_color, list) else [bbox_fill_color] * len(bboxes)
|
66 |
+
bbox_outline_colors = bbox_outline_color if isinstance(bbox_outline_color, list) else [bbox_outline_color] * len(bboxes)
|
67 |
+
for bbox, label, _bbox_fill_color, _bbox_outline_color in zip(bboxes, labels, bbox_fill_colors, bbox_outline_colors):
|
68 |
x0, y0, x1, y1 = convert_to_x0y0x1y1(bbox) if convert_to_x0y0x1y1 is not None else bbox
|
69 |
+
|
70 |
+
rectangle_image = Image.new('RGBA', image.size)
|
71 |
+
rectangle_image_draw = ImageDraw.Draw(rectangle_image)
|
72 |
+
rectangle_image_draw.rectangle(
|
73 |
+
xy = [x0, y0, x1, y1],
|
74 |
+
fill = _bbox_fill_color,
|
75 |
+
outline = _bbox_outline_color,
|
76 |
+
width = bbox_outline_width)
|
77 |
+
image.paste(im = rectangle_image, mask = rectangle_image)
|
78 |
+
|
79 |
+
if label is not None:
|
80 |
draw_text_on_image(
|
81 |
draw,
|
82 |
[x0, y0],
|
83 |
+
label,
|
84 |
label_text_color,
|
85 |
+
label_fill_color,
|
86 |
label_text_padding,
|
87 |
label_rectangle_left_margin,
|
88 |
label_rectangle_top_margin,
|
|
|
93 |
def draw_text_on_image(
|
94 |
image_or_draw: Image.Image | ImageDraw.ImageDraw,
|
95 |
text_position_xy: List[int],
|
96 |
+
label: str,
|
97 |
label_text_color = DEFAULTS["label_text_color"],
|
98 |
+
label_fill_color = DEFAULTS["label_fill_color"],
|
99 |
label_text_padding = DEFAULTS["label_text_padding"],
|
100 |
label_rectangle_left_margin = DEFAULTS["label_rectangle_left_margin"],
|
101 |
label_rectangle_top_margin = DEFAULTS['label_rectangle_top_margin'],
|
|
|
107 |
x0, y0 = text_position_xy
|
108 |
text_position = (x0 - label_rectangle_left_margin + label_text_padding, y0 - label_rectangle_top_margin + label_text_padding)
|
109 |
draw = ImageDraw.Draw(image) if is_image else image_or_draw
|
110 |
+
_, _, text_bbox_right, text_bbox_bottom = draw.textbbox(text_position, label, font=font)
|
111 |
xy = [
|
112 |
text_position[0] - label_text_padding,
|
113 |
text_position[1] - label_text_padding,
|
114 |
text_bbox_right + label_text_padding + label_text_padding,
|
115 |
text_bbox_bottom + label_text_padding + label_text_padding
|
116 |
]
|
117 |
+
draw.rectangle(xy, fill = label_fill_color)
|
118 |
+
draw.text(text_position, label, font=font, fill=label_text_color)
|
119 |
return image
|