Charles Kabui commited on
Commit
0da14c5
·
1 Parent(s): c33e07b

replaced pdf2image with PyMuPDF

Browse files
analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
app.py CHANGED
@@ -1,15 +1,3 @@
1
- import os
2
- os.system("apt install -y poppler-utils")
3
- os.system("python -m pip install --upgrade pip")
4
- os.system("python -m pip install pdf2image==1.16.3")
5
- os.system("python -m pip install torch==2.1.0")
6
- os.system("python -m pip install 'git+https://github.com/facebookresearch/detectron2.git@898507047cf441a1e4be7a729270961c401c4354'")
7
- os.system("python -m pip install layoutparser==0.3.4 layoutparser[layoutmodels] layoutparser[ocr]")
8
- os.system("python -m pip install Pillow==9.5.0")
9
- os.system("python -m pip install imagehash==4.3.1")
10
- os.system("python -m pip install tensorflow==2.15.0 tensorflow-estimator==2.15.0")
11
- os.system("python -m pip install scikit-learn==1.3.2")
12
-
13
  from main import app
14
 
15
  model_path = './model/trained_model/model_final.pth'
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from main import app
2
 
3
  model_path = './model/trained_model/model_final.pth'
main.py CHANGED
@@ -1,17 +1,19 @@
1
  import traceback
2
  import gradio as gr
3
  from utils.get_RGB_image import get_RGB_image, is_online_file, steam_online_file
4
- from pdf2image import convert_from_path, convert_from_bytes
5
  import layoutparser as lp
6
  from PIL import Image
7
  from utils.get_features import get_features
8
  from imagehash import average_hash
9
  from sklearn.metrics.pairwise import cosine_similarity
10
  from utils.visualize_bboxes_on_image import visualize_bboxes_on_image
 
11
 
12
- label_map = {0: 'Caption', 1: 'Footnote', 2: 'Formula', 3: 'List-item', 4: 'Page-footer', 5: 'Page-header', 6: 'Picture', 7: 'Section-header', 8: 'Table', 9: 'Text', 10: 'Title'}
 
13
  label_names = list(label_map.values())
14
- color_map = {'Caption': '#acc2d9', 'Footnote': '#56ae57', 'Formula': '#b2996e', 'List-item': '#a8ff04', 'Page-footer': '#69d84f', 'Page-header': '#894585', 'Picture': '#70b23f', 'Section-header': '#d4ffff', 'Table': '#65ab7c', 'Text': '#952e8f', 'Title': '#fcfc81'}
 
15
  cache = {
16
  'output_document_image_1_hash': None,
17
  'output_document_image_2_hash': None,
@@ -29,14 +31,16 @@ visualize_bboxes_on_image_kwargs = {
29
  'label_rectangle_left_margin': 0,
30
  'label_rectangle_top_margin': 0
31
  }
32
- vectors_types = ['vectors', 'weighted_vectors', 'reduced_vectors', 'weighted_reduced_vectors']
 
 
33
 
34
  def similarity_fn(model: lp.Detectron2LayoutModel, document_image_1: Image.Image, document_image_2: Image.Image, vectors_type: str):
35
  message = None
36
  annotations = {
37
- 'predicted_bboxes': 'predicted_bboxes' if vectors_type in ['vectors', 'weighted_vectors'] else 'reduced_predicted_bboxes',
38
- 'predicted_scores': 'predicted_scores' if vectors_type in ['vectors', 'weighted_vectors'] else 'reduced_predicted_scores',
39
- 'predicted_labels': 'predicted_labels' if vectors_type in ['vectors', 'weighted_vectors'] else 'reduced_predicted_labels',
40
  }
41
  show_vectors_type = False
42
  try:
@@ -50,7 +54,8 @@ def similarity_fn(model: lp.Detectron2LayoutModel, document_image_1: Image.Image
50
  document_image_1_features = cache['document_image_1_features']
51
  document_image_1 = cache['original_document_image_1']
52
  else:
53
- document_image_1_features = get_features(document_image_1, model, label_names)
 
54
  cache['document_image_1_features'] = document_image_1_features
55
  cache['original_document_image_1'] = document_image_1
56
 
@@ -58,105 +63,126 @@ def similarity_fn(model: lp.Detectron2LayoutModel, document_image_1: Image.Image
58
  document_image_2_features = cache['document_image_2_features']
59
  document_image_2 = cache['original_document_image_2']
60
  else:
61
- document_image_2_features = get_features(document_image_2, model, label_names)
 
62
  cache['document_image_2_features'] = document_image_2_features
63
  cache['original_document_image_2'] = document_image_2
64
 
65
  [[similarity]] = cosine_similarity(
66
  [
67
  cache['document_image_1_features'][vectors_type]
68
- ],
69
  [
70
  cache['document_image_2_features'][vectors_type]
71
  ])
72
  message = f'<pre style="{pre_message_style}">Similarity between the two documents is: {round(similarity, 4)}<pre>'
73
  document_image_1 = visualize_bboxes_on_image(
74
- image = document_image_1,
75
- bboxes = cache['document_image_1_features'][annotations['predicted_bboxes']],
76
- labels = [f'{label}, score:{round(score, 2)}' for label, score in zip(
77
- cache['document_image_1_features'][annotations['predicted_labels']],
78
  cache['document_image_1_features'][annotations['predicted_scores']])],
79
- bbox_outline_color = [color_map[label] for label in cache['document_image_1_features'][annotations['predicted_labels']]],
 
 
 
80
  **visualize_bboxes_on_image_kwargs)
81
  document_image_2 = visualize_bboxes_on_image(
82
- image = document_image_2,
83
- bboxes = cache['document_image_2_features'][annotations['predicted_bboxes']],
84
- labels = [f'{label}, score:{score}' for label, score in zip(
85
- cache['document_image_2_features'][annotations['predicted_labels']],
86
  cache['document_image_2_features'][annotations['predicted_scores']])],
87
- bbox_outline_color = [color_map[label] for label in cache['document_image_2_features'][annotations['predicted_labels']]],
 
 
 
88
  **visualize_bboxes_on_image_kwargs)
89
-
90
- cache['output_document_image_1_hash'] = str(average_hash(document_image_1))
91
- cache['output_document_image_2_hash'] = str(average_hash(document_image_2))
 
 
92
 
93
  show_vectors_type = True
94
  except Exception as e:
95
  message = f'<pre style="{pre_message_style}">{traceback.format_exc()}<pre>'
96
  return [
97
- gr.HTML(message, visible=True),
98
- document_image_1,
99
  document_image_2,
100
  gr.Dropdown(visible=show_vectors_type)
101
  ]
102
-
103
- def load_image(filename, page = 0):
 
104
  try:
105
  image = None
 
106
  try:
107
  if (is_online_file(filename)):
108
- image = get_RGB_image(convert_from_bytes(steam_online_file(filename))[page])
109
  else:
110
- image = get_RGB_image(convert_from_path(filename)[page])
111
- except:
 
 
112
  image = get_RGB_image(filename)
113
  return [
114
- gr.Image(value=image, visible=True),
115
  None
116
  ]
117
- except:
118
- error = traceback.format_exc()
119
  return [None, gr.HTML(value=error, visible=True)]
120
-
121
- def preview_url(url, page = 0):
122
- [image, error] = load_image(url, page = page)
 
123
  if image:
124
  return [gr.Tabs(selected=0), image, error]
125
  else:
126
- return [gr.Tabs(selected=1), image, error]
 
127
 
128
  def document_view(document_number: int):
129
- gr.HTML(value=f'<h4>Load the {"first" if document_number == 1 else "second"} PDF or Document Image<h4>', elem_classes=['center'])
 
130
  with gr.Tabs() as document_tabs:
131
  with gr.Tab("From Image", id=0):
132
- document = gr.Image(type="pil", label=f"Document {document_number}", visible=False, interactive=False, show_download_button=True)
133
- document_error_message = gr.HTML(label="Error Message", visible=False)
 
 
134
  document_preview = gr.UploadButton(
135
- "Upload PDF or Document Image",
136
- file_types=["image", ".pdf"],
137
  file_count="single")
138
  with gr.Tab("From URL", id=1):
139
  document_url = gr.Textbox(
140
  label=f"Document {document_number} URL",
141
  info="Paste a Link/URL to PDF or Document Image",
142
  placeholder="https://datasets-server.huggingface.co/.../image.jpg")
143
- document_url_error_message = gr.HTML(label="Error Message", visible=False)
144
- document_url_preview = gr.Button(value="Preview", variant="primary")
 
 
145
  document_preview.upload(
146
- fn = lambda file: load_image(file.name),
147
- inputs = [document_preview],
148
- outputs = [document, document_error_message])
149
  document_url_preview.click(
150
- fn = preview_url,
151
- inputs = [document_url],
152
- outputs = [document_tabs, document, document_url_error_message])
153
  return document
154
 
155
- def app(*, model_path, config_path, debug = False):
 
156
  model: lp.Detectron2LayoutModel = lp.Detectron2LayoutModel(
157
- config_path = config_path,
158
- model_path = model_path,
159
- label_map = label_map)
160
  title = 'Document Similarity Search Using Visual Layout Features'
161
  description = f"<h2>{title}<h2>"
162
  css = '''
@@ -167,7 +193,7 @@ def app(*, model_path, config_path, debug = False):
167
  with gr.Blocks(title=title, css=css) as app:
168
  with gr.Row():
169
  gr.HTML(value=description, elem_classes=['center'])
170
- with gr.Row(equal_height = False):
171
  with gr.Column():
172
  document_1_image = document_view(1)
173
  with gr.Column():
@@ -178,22 +204,23 @@ def app(*, model_path, config_path, debug = False):
178
  submit = gr.Button(value="Get Similarity", variant="primary")
179
  with gr.Column():
180
  vectors_type = gr.Dropdown(
181
- choices = vectors_types,
182
- value = vectors_types[0],
183
- visible = False,
184
- label = "Vectors Type",
185
- info = "Select the Vectors Type to use for Similarity Calculation")
186
- similarity_output = gr.HTML(label="Similarity Score", visible=False)
 
187
  reset = gr.Button(value="Reset", variant="secondary")
188
  kwargs = {
189
  'fn': lambda document_1_image, document_2_image, vectors_type: similarity_fn(
190
- model,
191
- document_1_image,
192
- document_2_image,
193
  vectors_type),
194
  'inputs': [document_1_image, document_2_image, vectors_type],
195
  'outputs': [similarity_output, document_1_image, document_2_image, vectors_type]
196
  }
197
  submit.click(**kwargs)
198
  vectors_type.change(**kwargs)
199
- return app.launch(debug=debug)
 
1
  import traceback
2
  import gradio as gr
3
  from utils.get_RGB_image import get_RGB_image, is_online_file, steam_online_file
 
4
  import layoutparser as lp
5
  from PIL import Image
6
  from utils.get_features import get_features
7
  from imagehash import average_hash
8
  from sklearn.metrics.pairwise import cosine_similarity
9
  from utils.visualize_bboxes_on_image import visualize_bboxes_on_image
10
+ import fitz
11
 
12
+ label_map = {0: 'Caption', 1: 'Footnote', 2: 'Formula', 3: 'List-item', 4: 'Page-footer',
13
+ 5: 'Page-header', 6: 'Picture', 7: 'Section-header', 8: 'Table', 9: 'Text', 10: 'Title'}
14
  label_names = list(label_map.values())
15
+ color_map = {'Caption': '#FF0000', 'Footnote': '#00FF00', 'Formula': '#0000FF', 'List-item': '#FF00FF', 'Page-footer': '#FFFF00',
16
+ 'Page-header': '#000000', 'Picture': '#FFFFFF', 'Section-header': '#40E0D0', 'Table': '#F28030', 'Text': '#7F00FF', 'Title': '#C0C0C0'}
17
  cache = {
18
  'output_document_image_1_hash': None,
19
  'output_document_image_2_hash': None,
 
31
  'label_rectangle_left_margin': 0,
32
  'label_rectangle_top_margin': 0
33
  }
34
+ vectors_types = ['vectors', 'weighted_vectors',
35
+ 'reduced_vectors', 'weighted_reduced_vectors']
36
+
37
 
38
  def similarity_fn(model: lp.Detectron2LayoutModel, document_image_1: Image.Image, document_image_2: Image.Image, vectors_type: str):
39
  message = None
40
  annotations = {
41
+ 'predicted_bboxes': 'predicted_bboxes' if vectors_type in ['vectors', 'weighted_vectors'] else 'reduced_predicted_bboxes',
42
+ 'predicted_scores': 'predicted_scores' if vectors_type in ['vectors', 'weighted_vectors'] else 'reduced_predicted_scores',
43
+ 'predicted_labels': 'predicted_labels' if vectors_type in ['vectors', 'weighted_vectors'] else 'reduced_predicted_labels',
44
  }
45
  show_vectors_type = False
46
  try:
 
54
  document_image_1_features = cache['document_image_1_features']
55
  document_image_1 = cache['original_document_image_1']
56
  else:
57
+ document_image_1_features = get_features(
58
+ document_image_1, model, label_names)
59
  cache['document_image_1_features'] = document_image_1_features
60
  cache['original_document_image_1'] = document_image_1
61
 
 
63
  document_image_2_features = cache['document_image_2_features']
64
  document_image_2 = cache['original_document_image_2']
65
  else:
66
+ document_image_2_features = get_features(
67
+ document_image_2, model, label_names)
68
  cache['document_image_2_features'] = document_image_2_features
69
  cache['original_document_image_2'] = document_image_2
70
 
71
  [[similarity]] = cosine_similarity(
72
  [
73
  cache['document_image_1_features'][vectors_type]
74
+ ],
75
  [
76
  cache['document_image_2_features'][vectors_type]
77
  ])
78
  message = f'<pre style="{pre_message_style}">Similarity between the two documents is: {round(similarity, 4)}<pre>'
79
  document_image_1 = visualize_bboxes_on_image(
80
+ image=document_image_1,
81
+ bboxes=cache['document_image_1_features'][annotations['predicted_bboxes']],
82
+ labels=[f'{label}, score:{round(score, 2)}' for label, score in zip(
83
+ cache['document_image_1_features'][annotations['predicted_labels']],
84
  cache['document_image_1_features'][annotations['predicted_scores']])],
85
+ bbox_outline_color=[
86
+ color_map[label] for label in cache['document_image_1_features'][annotations['predicted_labels']]],
87
+ bbox_fill_color=[
88
+ (color_map[label], 50) for label in cache['document_image_1_features'][annotations['predicted_labels']]],
89
  **visualize_bboxes_on_image_kwargs)
90
  document_image_2 = visualize_bboxes_on_image(
91
+ image=document_image_2,
92
+ bboxes=cache['document_image_2_features'][annotations['predicted_bboxes']],
93
+ labels=[f'{label}, score:{round(score, 2)}' for label, score in zip(
94
+ cache['document_image_2_features'][annotations['predicted_labels']],
95
  cache['document_image_2_features'][annotations['predicted_scores']])],
96
+ bbox_outline_color=[
97
+ color_map[label] for label in cache['document_image_2_features'][annotations['predicted_labels']]],
98
+ bbox_fill_color=[
99
+ (color_map[label], 50) for label in cache['document_image_2_features'][annotations['predicted_labels']]],
100
  **visualize_bboxes_on_image_kwargs)
101
+
102
+ cache['output_document_image_1_hash'] = str(
103
+ average_hash(document_image_1))
104
+ cache['output_document_image_2_hash'] = str(
105
+ average_hash(document_image_2))
106
 
107
  show_vectors_type = True
108
  except Exception as e:
109
  message = f'<pre style="{pre_message_style}">{traceback.format_exc()}<pre>'
110
  return [
111
+ gr.HTML(message, visible=True),
112
+ document_image_1,
113
  document_image_2,
114
  gr.Dropdown(visible=show_vectors_type)
115
  ]
116
+
117
+
118
+ def load_image(filename, page=0):
119
  try:
120
  image = None
121
+ first_error = None
122
  try:
123
  if (is_online_file(filename)):
124
+ pixmap = fitz.open("pdf", steam_online_file(filename))[page].get_pixmap()
125
  else:
126
+ pixmap = fitz.open(filename)[page].get_pixmap()
127
+ image = Image.frombytes("RGB", [pixmap.width, pixmap.height], pixmap.samples)
128
+ except Exception as e:
129
+ first_error = e
130
  image = get_RGB_image(filename)
131
  return [
132
+ gr.Image(value=image, visible=True),
133
  None
134
  ]
135
+ except Exception as second_error:
136
+ error = f'{traceback.format_exc()}\n\nFirst Error:\n{first_error}\n\nSecond Error:\n{second_error}'
137
  return [None, gr.HTML(value=error, visible=True)]
138
+
139
+
140
+ def preview_url(url, page=0):
141
+ [image, error] = load_image(url, page=page)
142
  if image:
143
  return [gr.Tabs(selected=0), image, error]
144
  else:
145
+ return [gr.Tabs(selected=1), image, error]
146
+
147
 
148
  def document_view(document_number: int):
149
+ gr.HTML(value=f'<h4>Load the {"first" if document_number == 1 else "second"} PDF or Document Image<h4>', elem_classes=[
150
+ 'center'])
151
  with gr.Tabs() as document_tabs:
152
  with gr.Tab("From Image", id=0):
153
+ document = gr.Image(
154
+ type="pil", label=f"Document {document_number}", visible=False, interactive=False, show_download_button=True)
155
+ document_error_message = gr.HTML(
156
+ label="Error Message", visible=False)
157
  document_preview = gr.UploadButton(
158
+ "Upload PDF or Document Image",
159
+ file_types=["image", ".pdf"],
160
  file_count="single")
161
  with gr.Tab("From URL", id=1):
162
  document_url = gr.Textbox(
163
  label=f"Document {document_number} URL",
164
  info="Paste a Link/URL to PDF or Document Image",
165
  placeholder="https://datasets-server.huggingface.co/.../image.jpg")
166
+ document_url_error_message = gr.HTML(
167
+ label="Error Message", visible=False)
168
+ document_url_preview = gr.Button(
169
+ value="Preview", variant="primary")
170
  document_preview.upload(
171
+ fn=lambda file: load_image(file.name),
172
+ inputs=[document_preview],
173
+ outputs=[document, document_error_message])
174
  document_url_preview.click(
175
+ fn=preview_url,
176
+ inputs=[document_url],
177
+ outputs=[document_tabs, document, document_url_error_message])
178
  return document
179
 
180
+
181
+ def app(*, model_path, config_path, debug=False):
182
  model: lp.Detectron2LayoutModel = lp.Detectron2LayoutModel(
183
+ config_path=config_path,
184
+ model_path=model_path,
185
+ label_map=label_map)
186
  title = 'Document Similarity Search Using Visual Layout Features'
187
  description = f"<h2>{title}<h2>"
188
  css = '''
 
193
  with gr.Blocks(title=title, css=css) as app:
194
  with gr.Row():
195
  gr.HTML(value=description, elem_classes=['center'])
196
+ with gr.Row(equal_height=False):
197
  with gr.Column():
198
  document_1_image = document_view(1)
199
  with gr.Column():
 
204
  submit = gr.Button(value="Get Similarity", variant="primary")
205
  with gr.Column():
206
  vectors_type = gr.Dropdown(
207
+ choices=vectors_types,
208
+ value=vectors_types[0],
209
+ visible=False,
210
+ label="Vectors Type",
211
+ info="Select the Vectors Type to use for Similarity Calculation")
212
+ similarity_output = gr.HTML(
213
+ label="Similarity Score", visible=False)
214
  reset = gr.Button(value="Reset", variant="secondary")
215
  kwargs = {
216
  'fn': lambda document_1_image, document_2_image, vectors_type: similarity_fn(
217
+ model,
218
+ document_1_image,
219
+ document_2_image,
220
  vectors_type),
221
  'inputs': [document_1_image, document_2_image, vectors_type],
222
  'outputs': [similarity_output, document_1_image, document_2_image, vectors_type]
223
  }
224
  submit.click(**kwargs)
225
  vectors_type.change(**kwargs)
226
+ return app.launch(debug=debug)
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ PyMuPDF==1.23.26
2
+ scikit-learn==1.3.2
3
+ torch==2.1.0
4
+ torchvision==0.16.0
5
+ tensorflow==2.15.0
6
+ ImageHash==4.3.1
7
+ Pillow==9.5.0
8
+ layoutparser[layoutmodels,ocr]==0.3.4
9
+ detectron2 @ git+https://github.com/facebookresearch/detectron2.git@898507047cf441a1e4be7a729270961c401c4354
10
+
utils/get_RGB_image.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from PIL import Image
2
  from urllib.parse import urlparse
3
  import requests
@@ -6,7 +7,7 @@ def is_online_file(url: str) -> bool:
6
  return urlparse(url).scheme in ["http", "https"]
7
 
8
  def steam_online_file(url: str) -> bytes:
9
- return requests.get(url, stream=True).raw
10
 
11
  def get_RGB_image(image_or_path: str | Image.Image) -> bytes:
12
  if isinstance(image_or_path, str):
 
1
+ import io
2
  from PIL import Image
3
  from urllib.parse import urlparse
4
  import requests
 
7
  return urlparse(url).scheme in ["http", "https"]
8
 
9
  def steam_online_file(url: str) -> bytes:
10
+ return io.BytesIO(requests.get(url, stream=True).content)
11
 
12
  def get_RGB_image(image_or_path: str | Image.Image) -> bytes:
13
  if isinstance(image_or_path, str):
utils/visualize_bboxes_on_image.py CHANGED
@@ -5,115 +5,144 @@ import numpy as np
5
  import requests
6
  from typing import List
7
  from functools import cache
 
8
 
9
  DEFAULTS = {
10
- 'bbox_outline_width': 2,
11
- 'bbox_outline_color': (0, 0, 256, 123), # alpha runs from 0 to 127
12
- 'bbox_fill_color': (256, 0, 0, 50), # alpha runs from 0 to 127
13
- 'label_text_color': "black",
14
- 'label_fill_color': "red",
15
- 'label_text_padding': 0,
16
- 'label_rectangle_left_margin': 0,
17
- 'label_rectangle_top_margin': 0,
18
- 'label_text_size': 12,
 
 
 
 
19
  }
20
 
 
21
  @cache
22
  def get_font(path_or_url: str = 'https://github.com/googlefonts/roboto/raw/main/src/hinted/Roboto-Regular.ttf', size: int = DEFAULTS['label_text_size']):
23
- if urlparse(path_or_url).scheme in ["http", "https"]: # Online
24
- return ImageFont.truetype(requests.get(path_or_url, stream=True).raw, size=size)
25
- else: # Local
26
- return ImageFont.truetype(path_or_url, size=size)
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  def visualize_bboxes_on_image(
29
- image: Image.Image,
30
- bboxes: List[List[int]],
31
- labels: List[str] = None,
32
- bbox_outline_width = DEFAULTS["bbox_outline_width"],
33
- bbox_outline_color = DEFAULTS["bbox_outline_color"],
34
- bbox_fill_color: str | list[tuple | str] = DEFAULTS["bbox_fill_color"],
35
- label_text_color: str | list[tuple | str] = DEFAULTS["label_text_color"],
36
- label_fill_color = DEFAULTS["label_fill_color"],
37
- label_text_padding = DEFAULTS["label_text_padding"],
38
- label_rectangle_left_margin = DEFAULTS["label_rectangle_left_margin"],
39
- label_rectangle_top_margin = DEFAULTS['label_rectangle_top_margin'],
40
- label_text_size = DEFAULTS["label_text_size"],
41
- convert_to_x0y0x1y1 = None) -> Image.Image:
42
- '''
43
- Visualize bounding boxes on an image
44
- Args:
45
- image: Image to visualize
46
- bboxes: List of bounding boxes
47
- labels: Titles of the bounding boxes
48
- bbox_outline_width: Width of the bounding box
49
- bbox_outline_color: Color of the bounding box
50
- bbox_fill_color: Fill color of the bounding box
51
- label_text_color: Color of the label text
52
- label_fill_color: Color of the label rectangle
53
- label_text_padding: Padding of the label text
54
- label_rectangle_left_margin: Left padding of the label rectangle
55
- label_rectangle_top_margin: Top padding of the label rectangle
56
- label_text_size: Font size of the label text
57
- convert_to_x0y0x1y1: Function to convert bounding box to x0y0x1y1 format
58
- Returns:
59
- Image: Image annotated with bounding boxes
60
- '''
61
- image = image.copy().convert("RGB")
62
- draw = ImageDraw.Draw(image)
63
- font = get_font(size = label_text_size)
64
- labels = (labels or []) + np.full(len(bboxes) - len(labels or []), None).tolist()
65
- bbox_fill_colors = bbox_fill_color if isinstance(bbox_fill_color, list) else [bbox_fill_color] * len(bboxes)
66
- bbox_outline_colors = bbox_outline_color if isinstance(bbox_outline_color, list) else [bbox_outline_color] * len(bboxes)
67
- for bbox, label, _bbox_fill_color, _bbox_outline_color in zip(bboxes, labels, bbox_fill_colors, bbox_outline_colors):
68
- x0, y0, x1, y1 = convert_to_x0y0x1y1(bbox) if convert_to_x0y0x1y1 is not None else bbox
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
- rectangle_image = Image.new('RGBA', image.size)
71
- rectangle_image_draw = ImageDraw.Draw(rectangle_image)
72
- rectangle_image_draw.rectangle(
73
- xy = [x0, y0, x1, y1],
74
- fill = _bbox_fill_color,
75
- outline = _bbox_outline_color,
76
- width = bbox_outline_width)
77
- image.paste(im = rectangle_image, mask = rectangle_image)
 
 
 
 
 
78
 
79
- if label is not None:
80
- draw_text_on_image(
81
- draw,
82
- [x0, y0],
83
- label,
84
- label_text_color,
85
- label_fill_color,
86
- label_text_padding,
87
- label_rectangle_left_margin,
88
- label_rectangle_top_margin,
89
- label_text_size,
90
- font)
91
- return image
92
 
93
  def draw_text_on_image(
94
- image_or_draw: Image.Image | ImageDraw.ImageDraw,
95
- text_position_xy: List[int],
96
- label: str,
97
- label_text_color = DEFAULTS["label_text_color"],
98
- label_fill_color = DEFAULTS["label_fill_color"],
99
- label_text_padding = DEFAULTS["label_text_padding"],
100
- label_rectangle_left_margin = DEFAULTS["label_rectangle_left_margin"],
101
- label_rectangle_top_margin = DEFAULTS['label_rectangle_top_margin'],
102
- label_text_size = DEFAULTS["label_text_size"],
103
- font: ImageFont.FreeTypeFont = None) -> Image.Image:
104
- is_image = isinstance(image_or_draw, Image.Image)
105
- image = image_or_draw.copy().convert("RGB") if is_image else None
106
- font = font or get_font(size = label_text_size)
107
- x0, y0 = text_position_xy
108
- text_position = (x0 - label_rectangle_left_margin + label_text_padding, y0 - label_rectangle_top_margin + label_text_padding)
109
- draw = ImageDraw.Draw(image) if is_image else image_or_draw
110
- _, _, text_bbox_right, text_bbox_bottom = draw.textbbox(text_position, label, font=font)
111
- xy = [
112
- text_position[0] - label_text_padding,
113
- text_position[1] - label_text_padding,
114
- text_bbox_right + label_text_padding + label_text_padding,
115
- text_bbox_bottom + label_text_padding + label_text_padding
116
- ]
117
- draw.rectangle(xy, fill = label_fill_color)
118
- draw.text(text_position, label, font=font, fill=label_text_color)
119
- return image
 
 
 
5
  import requests
6
  from typing import List
7
  from functools import cache
8
+ import matplotlib.colors as colors
9
 
10
  DEFAULTS = {
11
+ 'bbox_outline_width': 2,
12
+ # color name or hex code or tuple of RGBA or tuple of RGB or tuple (color_name, alpha)
13
+ # between 0 (fully transparent) and 255 (fully opaque)
14
+ 'bbox_outline_color': ('blue', 123),
15
+ # color name or hex code or tuple of RGBA or tuple of RGB or tuple (color_name, alpha)
16
+ # between 0 (fully transparent) and 255 (fully opaque)
17
+ 'bbox_fill_color': ('red', 50),
18
+ 'label_text_color': "black",
19
+ 'label_fill_color': "red",
20
+ 'label_text_padding': 0,
21
+ 'label_rectangle_left_margin': 0,
22
+ 'label_rectangle_top_margin': 0,
23
+ 'label_text_size': 12,
24
  }
25
 
26
+
27
  @cache
28
  def get_font(path_or_url: str = 'https://github.com/googlefonts/roboto/raw/main/src/hinted/Roboto-Regular.ttf', size: int = DEFAULTS['label_text_size']):
29
+ if urlparse(path_or_url).scheme in ["http", "https"]: # Online
30
+ return ImageFont.truetype(requests.get(path_or_url, stream=True).raw, size=size)
31
+ else: # Local
32
+ return ImageFont.truetype(path_or_url, size=size)
33
+
34
+ named_colors_mapping = colors.get_named_colors_mapping()
35
+ @cache
36
+ def get_color(color: str | tuple) -> tuple | str:
37
+ if isinstance(color, tuple):
38
+ if len(color) == 2:
39
+ real_color, alpha = (color[0], int(color[1]))
40
+ if colors.is_color_like(real_color):
41
+ real_color_rgb = colors.hex2color(named_colors_mapping.get(real_color, real_color))
42
+ if len(real_color_rgb) == 3:
43
+ real_color_alpha = (np.array(real_color_rgb, dtype=int) * 255).tolist() + [alpha]
44
+ return tuple(real_color_alpha)
45
+ return color
46
 
47
  def visualize_bboxes_on_image(
48
+ image: Image.Image,
49
+ bboxes: List[List[int]],
50
+ labels: List[str] = None,
51
+ bbox_outline_width=DEFAULTS["bbox_outline_width"],
52
+ bbox_outline_color=DEFAULTS["bbox_outline_color"],
53
+ bbox_fill_color: str | list[tuple | str] = DEFAULTS["bbox_fill_color"],
54
+ label_text_color: str | list[tuple |
55
+ str] = DEFAULTS["label_text_color"],
56
+ label_fill_color=DEFAULTS["label_fill_color"],
57
+ label_text_padding=DEFAULTS["label_text_padding"],
58
+ label_rectangle_left_margin=DEFAULTS["label_rectangle_left_margin"],
59
+ label_rectangle_top_margin=DEFAULTS['label_rectangle_top_margin'],
60
+ label_text_size=DEFAULTS["label_text_size"],
61
+ convert_to_x0y0x1y1=None) -> Image.Image:
62
+ '''
63
+ Visualize bounding boxes on an image
64
+ Args:
65
+ image: Image to visualize
66
+ bboxes: List of bounding boxes
67
+ labels: Titles of the bounding boxes
68
+ bbox_outline_width: Width of the bounding box
69
+ bbox_outline_color: Color of the bounding box
70
+ bbox_fill_color: Fill color of the bounding box
71
+ label_text_color: Color of the label text
72
+ label_fill_color: Color of the label rectangle
73
+ label_text_padding: Padding of the label text
74
+ label_rectangle_left_margin: Left padding of the label rectangle
75
+ label_rectangle_top_margin: Top padding of the label rectangle
76
+ label_text_size: Font size of the label text
77
+ convert_to_x0y0x1y1: Function to convert bounding box to x0y0x1y1 format
78
+ Returns:
79
+ Image: Image annotated with bounding boxes
80
+ '''
81
+ image = image.copy().convert("RGB")
82
+ draw = ImageDraw.Draw(image)
83
+ font = get_font(size=label_text_size)
84
+ labels = (labels or []) + np.full(len(bboxes) -
85
+ len(labels or []), None).tolist()
86
+ bbox_fill_colors = bbox_fill_color if isinstance(bbox_fill_color, list) else [
87
+ bbox_fill_color] * len(bboxes)
88
+ bbox_outline_colors = bbox_outline_color if isinstance(
89
+ bbox_outline_color, list) else [bbox_outline_color] * len(bboxes)
90
+
91
+ for bbox, label, _bbox_fill_color, _bbox_outline_color in zip(bboxes, labels, bbox_fill_colors, bbox_outline_colors):
92
+ x0, y0, x1, y1 = convert_to_x0y0x1y1(
93
+ bbox) if convert_to_x0y0x1y1 is not None else bbox
94
+ _bbox_fill_color = get_color(_bbox_fill_color)
95
+ _bbox_outline_color = get_color(_bbox_outline_color)
96
+ rectangle_image = Image.new('RGBA', image.size)
97
+ rectangle_image_draw = ImageDraw.Draw(rectangle_image)
98
+ rectangle_image_draw.rectangle(
99
+ xy=[x0, y0, x1, y1],
100
+ fill=_bbox_fill_color,
101
+ outline=_bbox_outline_color,
102
+ width=bbox_outline_width)
103
+ image.paste(im=rectangle_image, mask=rectangle_image)
104
 
105
+ if label is not None:
106
+ draw_text_on_image(
107
+ draw,
108
+ [x0, y0],
109
+ label,
110
+ label_text_color,
111
+ label_fill_color,
112
+ label_text_padding,
113
+ label_rectangle_left_margin,
114
+ label_rectangle_top_margin,
115
+ label_text_size,
116
+ font)
117
+ return image
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
  def draw_text_on_image(
121
+ image_or_draw: Image.Image | ImageDraw.ImageDraw,
122
+ text_position_xy: List[int],
123
+ label: str,
124
+ label_text_color=DEFAULTS["label_text_color"],
125
+ label_fill_color=DEFAULTS["label_fill_color"],
126
+ label_text_padding=DEFAULTS["label_text_padding"],
127
+ label_rectangle_left_margin=DEFAULTS["label_rectangle_left_margin"],
128
+ label_rectangle_top_margin=DEFAULTS['label_rectangle_top_margin'],
129
+ label_text_size=DEFAULTS["label_text_size"],
130
+ font: ImageFont.FreeTypeFont = None) -> Image.Image:
131
+ is_image = isinstance(image_or_draw, Image.Image)
132
+ image = image_or_draw.copy().convert("RGB") if is_image else None
133
+ font = font or get_font(size=label_text_size)
134
+ x0, y0 = text_position_xy
135
+ text_position = (x0 - label_rectangle_left_margin + label_text_padding,
136
+ y0 - label_rectangle_top_margin + label_text_padding)
137
+ draw = ImageDraw.Draw(image) if is_image else image_or_draw
138
+ _, _, text_bbox_right, text_bbox_bottom = draw.textbbox(
139
+ text_position, label, font=font)
140
+ xy = [
141
+ text_position[0] - label_text_padding,
142
+ text_position[1] - label_text_padding,
143
+ text_bbox_right + label_text_padding + label_text_padding,
144
+ text_bbox_bottom + label_text_padding + label_text_padding
145
+ ]
146
+ draw.rectangle(xy, fill=label_fill_color)
147
+ draw.text(text_position, label, font=font, fill=label_text_color)
148
+ return image