Charles Kabui commited on
Commit
c33e07b
·
1 Parent(s): 1f9e550

transparent bboxes

Browse files
Files changed (4) hide show
  1. analysis.ipynb +0 -0
  2. app.py +1 -0
  3. main.py +6 -18
  4. utils/visualize_bboxes_on_image.py +42 -28
analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  os.system("apt install -y poppler-utils")
3
  os.system("python -m pip install --upgrade pip")
 
4
  os.system("python -m pip install torch==2.1.0")
5
  os.system("python -m pip install 'git+https://github.com/facebookresearch/detectron2.git@898507047cf441a1e4be7a729270961c401c4354'")
6
  os.system("python -m pip install layoutparser==0.3.4 layoutparser[layoutmodels] layoutparser[ocr]")
 
1
  import os
2
  os.system("apt install -y poppler-utils")
3
  os.system("python -m pip install --upgrade pip")
4
+ os.system("python -m pip install pdf2image==1.16.3")
5
  os.system("python -m pip install torch==2.1.0")
6
  os.system("python -m pip install 'git+https://github.com/facebookresearch/detectron2.git@898507047cf441a1e4be7a729270961c401c4354'")
7
  os.system("python -m pip install layoutparser==0.3.4 layoutparser[layoutmodels] layoutparser[ocr]")
main.py CHANGED
@@ -23,7 +23,7 @@ cache = {
23
  pre_message_style = 'overflow:auto;border:2px solid pink;padding:4px;border-radius:4px;'
24
  visualize_bboxes_on_image_kwargs = {
25
  'label_text_color': 'white',
26
- 'label_rectangle_color': 'black',
27
  'label_text_size': 12,
28
  'label_text_padding': 3,
29
  'label_rectangle_left_margin': 0,
@@ -31,20 +31,6 @@ visualize_bboxes_on_image_kwargs = {
31
  }
32
  vectors_types = ['vectors', 'weighted_vectors', 'reduced_vectors', 'weighted_reduced_vectors']
33
 
34
- annotation_key = 'is_annotated_document_image'
35
- annotation_original_image_key = 'original_image'
36
- def annotate_document_image(document_image: Image.Image, original_document_image: Image.Image):
37
- document_image.info.update({
38
- annotation_key: True,
39
- annotation_original_image_key: original_document_image
40
- })
41
- return document_image
42
-
43
- def get_original_document_image(document_image: Image.Image):
44
- if document_image.info.get(annotation_key) == True:
45
- return document_image.info.get(annotation_original_image_key)
46
- return document_image
47
-
48
  def similarity_fn(model: lp.Detectron2LayoutModel, document_image_1: Image.Image, document_image_2: Image.Image, vectors_type: str):
49
  message = None
50
  annotations = {
@@ -87,16 +73,18 @@ def similarity_fn(model: lp.Detectron2LayoutModel, document_image_1: Image.Image
87
  document_image_1 = visualize_bboxes_on_image(
88
  image = document_image_1,
89
  bboxes = cache['document_image_1_features'][annotations['predicted_bboxes']],
90
- titles = [f'{label}, score:{round(score, 2)}' for label, score in zip(
91
  cache['document_image_1_features'][annotations['predicted_labels']],
92
  cache['document_image_1_features'][annotations['predicted_scores']])],
 
93
  **visualize_bboxes_on_image_kwargs)
94
  document_image_2 = visualize_bboxes_on_image(
95
  image = document_image_2,
96
  bboxes = cache['document_image_2_features'][annotations['predicted_bboxes']],
97
- titles = [f'{label}, score:{score}' for label, score in zip(
98
  cache['document_image_2_features'][annotations['predicted_labels']],
99
  cache['document_image_2_features'][annotations['predicted_scores']])],
 
100
  **visualize_bboxes_on_image_kwargs)
101
 
102
  cache['output_document_image_1_hash'] = str(average_hash(document_image_1))
@@ -141,7 +129,7 @@ def document_view(document_number: int):
141
  gr.HTML(value=f'<h4>Load the {"first" if document_number == 1 else "second"} PDF or Document Image<h4>', elem_classes=['center'])
142
  with gr.Tabs() as document_tabs:
143
  with gr.Tab("From Image", id=0):
144
- document = gr.Image(type="pil", label=f"Document {document_number}", visible=False, interactive=False, show_download_button=False)
145
  document_error_message = gr.HTML(label="Error Message", visible=False)
146
  document_preview = gr.UploadButton(
147
  "Upload PDF or Document Image",
 
23
  pre_message_style = 'overflow:auto;border:2px solid pink;padding:4px;border-radius:4px;'
24
  visualize_bboxes_on_image_kwargs = {
25
  'label_text_color': 'white',
26
+ 'label_fill_color': 'black',
27
  'label_text_size': 12,
28
  'label_text_padding': 3,
29
  'label_rectangle_left_margin': 0,
 
31
  }
32
  vectors_types = ['vectors', 'weighted_vectors', 'reduced_vectors', 'weighted_reduced_vectors']
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  def similarity_fn(model: lp.Detectron2LayoutModel, document_image_1: Image.Image, document_image_2: Image.Image, vectors_type: str):
35
  message = None
36
  annotations = {
 
73
  document_image_1 = visualize_bboxes_on_image(
74
  image = document_image_1,
75
  bboxes = cache['document_image_1_features'][annotations['predicted_bboxes']],
76
+ labels = [f'{label}, score:{round(score, 2)}' for label, score in zip(
77
  cache['document_image_1_features'][annotations['predicted_labels']],
78
  cache['document_image_1_features'][annotations['predicted_scores']])],
79
+ bbox_outline_color = [color_map[label] for label in cache['document_image_1_features'][annotations['predicted_labels']]],
80
  **visualize_bboxes_on_image_kwargs)
81
  document_image_2 = visualize_bboxes_on_image(
82
  image = document_image_2,
83
  bboxes = cache['document_image_2_features'][annotations['predicted_bboxes']],
84
+ labels = [f'{label}, score:{score}' for label, score in zip(
85
  cache['document_image_2_features'][annotations['predicted_labels']],
86
  cache['document_image_2_features'][annotations['predicted_scores']])],
87
+ bbox_outline_color = [color_map[label] for label in cache['document_image_2_features'][annotations['predicted_labels']]],
88
  **visualize_bboxes_on_image_kwargs)
89
 
90
  cache['output_document_image_1_hash'] = str(average_hash(document_image_1))
 
129
  gr.HTML(value=f'<h4>Load the {"first" if document_number == 1 else "second"} PDF or Document Image<h4>', elem_classes=['center'])
130
  with gr.Tabs() as document_tabs:
131
  with gr.Tab("From Image", id=0):
132
+ document = gr.Image(type="pil", label=f"Document {document_number}", visible=False, interactive=False, show_download_button=True)
133
  document_error_message = gr.HTML(label="Error Message", visible=False)
134
  document_preview = gr.UploadButton(
135
  "Upload PDF or Document Image",
utils/visualize_bboxes_on_image.py CHANGED
@@ -7,14 +7,15 @@ from typing import List
7
  from functools import cache
8
 
9
  DEFAULTS = {
10
- 'width': 2,
11
- 'bbox_color': "red",
 
12
  'label_text_color': "black",
13
- 'label_rectangle_color': "red",
14
  'label_text_padding': 0,
15
  'label_rectangle_left_margin': 0,
16
  'label_rectangle_top_margin': 0,
17
- 'label_text_size': 12
18
  }
19
 
20
  @cache
@@ -27,11 +28,12 @@ def get_font(path_or_url: str = 'https://github.com/googlefonts/roboto/raw/main/
27
  def visualize_bboxes_on_image(
28
  image: Image.Image,
29
  bboxes: List[List[int]],
30
- titles: List[str] = None,
31
- width = DEFAULTS["width"],
32
- bbox_color = DEFAULTS["bbox_color"],
33
- label_text_color = DEFAULTS["label_text_color"],
34
- label_rectangle_color = DEFAULTS["label_rectangle_color"],
 
35
  label_text_padding = DEFAULTS["label_text_padding"],
36
  label_rectangle_left_margin = DEFAULTS["label_rectangle_left_margin"],
37
  label_rectangle_top_margin = DEFAULTS['label_rectangle_top_margin'],
@@ -42,33 +44,45 @@ def visualize_bboxes_on_image(
42
  Args:
43
  image: Image to visualize
44
  bboxes: List of bounding boxes
45
- titles: Titles of the bounding boxes
46
- width: Width of the bounding box
47
- bbox_color: Color of the bounding box
 
48
  label_text_color: Color of the label text
49
- label_rectangle_color: Color of the label rectangle
50
- convert_to_x0y0x1y1: Function to convert bounding box to x0y0x1y1 format
51
  label_text_padding: Padding of the label text
52
- label_rectangle_left_padding: Left padding of the label rectangle
53
- label_rectangle_top_padding: Top padding of the label rectangle
54
  label_text_size: Font size of the label text
 
55
  Returns:
56
- Image: Image with bounding boxes
57
  '''
58
  image = image.copy().convert("RGB")
59
  draw = ImageDraw.Draw(image)
60
  font = get_font(size = label_text_size)
61
- titles = (titles or []) + np.full(len(bboxes) - len(titles or []), None).tolist()
62
- for bbox, title in zip(bboxes, titles):
 
 
63
  x0, y0, x1, y1 = convert_to_x0y0x1y1(bbox) if convert_to_x0y0x1y1 is not None else bbox
64
- draw.rectangle([x0, y0, x1, y1], outline=bbox_color, width=width)
65
- if title is not None:
 
 
 
 
 
 
 
 
 
66
  draw_text_on_image(
67
  draw,
68
  [x0, y0],
69
- title,
70
  label_text_color,
71
- label_rectangle_color,
72
  label_text_padding,
73
  label_rectangle_left_margin,
74
  label_rectangle_top_margin,
@@ -79,9 +93,9 @@ def visualize_bboxes_on_image(
79
  def draw_text_on_image(
80
  image_or_draw: Image.Image | ImageDraw.ImageDraw,
81
  text_position_xy: List[int],
82
- title: str,
83
  label_text_color = DEFAULTS["label_text_color"],
84
- label_rectangle_color = DEFAULTS["label_rectangle_color"],
85
  label_text_padding = DEFAULTS["label_text_padding"],
86
  label_rectangle_left_margin = DEFAULTS["label_rectangle_left_margin"],
87
  label_rectangle_top_margin = DEFAULTS['label_rectangle_top_margin'],
@@ -93,13 +107,13 @@ def draw_text_on_image(
93
  x0, y0 = text_position_xy
94
  text_position = (x0 - label_rectangle_left_margin + label_text_padding, y0 - label_rectangle_top_margin + label_text_padding)
95
  draw = ImageDraw.Draw(image) if is_image else image_or_draw
96
- text_bbox_left, text_bbox_top, text_bbox_right, text_bbox_bottom = draw.textbbox(text_position, title, font=font)
97
  xy = [
98
  text_position[0] - label_text_padding,
99
  text_position[1] - label_text_padding,
100
  text_bbox_right + label_text_padding + label_text_padding,
101
  text_bbox_bottom + label_text_padding + label_text_padding
102
  ]
103
- draw.rectangle(xy, fill = label_rectangle_color)
104
- draw.text(text_position, title, font=font, fill=label_text_color)
105
  return image
 
7
  from functools import cache
8
 
9
  DEFAULTS = {
10
+ 'bbox_outline_width': 2,
11
+ 'bbox_outline_color': (0, 0, 256, 123), # alpha runs from 0 to 127
12
+ 'bbox_fill_color': (256, 0, 0, 50), # alpha runs from 0 to 127
13
  'label_text_color': "black",
14
+ 'label_fill_color': "red",
15
  'label_text_padding': 0,
16
  'label_rectangle_left_margin': 0,
17
  'label_rectangle_top_margin': 0,
18
+ 'label_text_size': 12,
19
  }
20
 
21
  @cache
 
28
  def visualize_bboxes_on_image(
29
  image: Image.Image,
30
  bboxes: List[List[int]],
31
+ labels: List[str] = None,
32
+ bbox_outline_width = DEFAULTS["bbox_outline_width"],
33
+ bbox_outline_color = DEFAULTS["bbox_outline_color"],
34
+ bbox_fill_color: str | list[tuple | str] = DEFAULTS["bbox_fill_color"],
35
+ label_text_color: str | list[tuple | str] = DEFAULTS["label_text_color"],
36
+ label_fill_color = DEFAULTS["label_fill_color"],
37
  label_text_padding = DEFAULTS["label_text_padding"],
38
  label_rectangle_left_margin = DEFAULTS["label_rectangle_left_margin"],
39
  label_rectangle_top_margin = DEFAULTS['label_rectangle_top_margin'],
 
44
  Args:
45
  image: Image to visualize
46
  bboxes: List of bounding boxes
47
+ labels: Titles of the bounding boxes
48
+ bbox_outline_width: Width of the bounding box
49
+ bbox_outline_color: Color of the bounding box
50
+ bbox_fill_color: Fill color of the bounding box
51
  label_text_color: Color of the label text
52
+ label_fill_color: Color of the label rectangle
 
53
  label_text_padding: Padding of the label text
54
+ label_rectangle_left_margin: Left padding of the label rectangle
55
+ label_rectangle_top_margin: Top padding of the label rectangle
56
  label_text_size: Font size of the label text
57
+ convert_to_x0y0x1y1: Function to convert bounding box to x0y0x1y1 format
58
  Returns:
59
+ Image: Image annotated with bounding boxes
60
  '''
61
  image = image.copy().convert("RGB")
62
  draw = ImageDraw.Draw(image)
63
  font = get_font(size = label_text_size)
64
+ labels = (labels or []) + np.full(len(bboxes) - len(labels or []), None).tolist()
65
+ bbox_fill_colors = bbox_fill_color if isinstance(bbox_fill_color, list) else [bbox_fill_color] * len(bboxes)
66
+ bbox_outline_colors = bbox_outline_color if isinstance(bbox_outline_color, list) else [bbox_outline_color] * len(bboxes)
67
+ for bbox, label, _bbox_fill_color, _bbox_outline_color in zip(bboxes, labels, bbox_fill_colors, bbox_outline_colors):
68
  x0, y0, x1, y1 = convert_to_x0y0x1y1(bbox) if convert_to_x0y0x1y1 is not None else bbox
69
+
70
+ rectangle_image = Image.new('RGBA', image.size)
71
+ rectangle_image_draw = ImageDraw.Draw(rectangle_image)
72
+ rectangle_image_draw.rectangle(
73
+ xy = [x0, y0, x1, y1],
74
+ fill = _bbox_fill_color,
75
+ outline = _bbox_outline_color,
76
+ width = bbox_outline_width)
77
+ image.paste(im = rectangle_image, mask = rectangle_image)
78
+
79
+ if label is not None:
80
  draw_text_on_image(
81
  draw,
82
  [x0, y0],
83
+ label,
84
  label_text_color,
85
+ label_fill_color,
86
  label_text_padding,
87
  label_rectangle_left_margin,
88
  label_rectangle_top_margin,
 
93
  def draw_text_on_image(
94
  image_or_draw: Image.Image | ImageDraw.ImageDraw,
95
  text_position_xy: List[int],
96
+ label: str,
97
  label_text_color = DEFAULTS["label_text_color"],
98
+ label_fill_color = DEFAULTS["label_fill_color"],
99
  label_text_padding = DEFAULTS["label_text_padding"],
100
  label_rectangle_left_margin = DEFAULTS["label_rectangle_left_margin"],
101
  label_rectangle_top_margin = DEFAULTS['label_rectangle_top_margin'],
 
107
  x0, y0 = text_position_xy
108
  text_position = (x0 - label_rectangle_left_margin + label_text_padding, y0 - label_rectangle_top_margin + label_text_padding)
109
  draw = ImageDraw.Draw(image) if is_image else image_or_draw
110
+ _, _, text_bbox_right, text_bbox_bottom = draw.textbbox(text_position, label, font=font)
111
  xy = [
112
  text_position[0] - label_text_padding,
113
  text_position[1] - label_text_padding,
114
  text_bbox_right + label_text_padding + label_text_padding,
115
  text_bbox_bottom + label_text_padding + label_text_padding
116
  ]
117
+ draw.rectangle(xy, fill = label_fill_color)
118
+ draw.text(text_position, label, font=font, fill=label_text_color)
119
  return image