Spaces:

mckabue
/

document-similarity-matching-using-visual-layout-features-archive

Build error

App Files Files Community

Charles Kabui commited on Mar 13, 2024

Commit

c33e07b

1 Parent(s): 1f9e550

transparent bboxes

Browse files

Files changed (4) hide show

analysis.ipynb +0 -0
app.py +1 -0
main.py +6 -18
utils/visualize_bboxes_on_image.py +42 -28

analysis.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 os.system("apt install -y poppler-utils")
 os.system("python -m pip install --upgrade pip")
 os.system("python -m pip install torch==2.1.0")
 os.system("python -m pip install 'git+https://github.com/facebookresearch/detectron2.git@898507047cf441a1e4be7a729270961c401c4354'")
 os.system("python -m pip install layoutparser==0.3.4 layoutparser[layoutmodels] layoutparser[ocr]")

 import os
 os.system("apt install -y poppler-utils")
 os.system("python -m pip install --upgrade pip")
+os.system("python -m pip install pdf2image==1.16.3")
 os.system("python -m pip install torch==2.1.0")
 os.system("python -m pip install 'git+https://github.com/facebookresearch/detectron2.git@898507047cf441a1e4be7a729270961c401c4354'")
 os.system("python -m pip install layoutparser==0.3.4 layoutparser[layoutmodels] layoutparser[ocr]")

main.py CHANGED Viewed

@@ -23,7 +23,7 @@ cache = {
 pre_message_style = 'overflow:auto;border:2px solid pink;padding:4px;border-radius:4px;'
 visualize_bboxes_on_image_kwargs = {
     'label_text_color': 'white',
-    'label_rectangle_color': 'black',
     'label_text_size': 12,
     'label_text_padding': 3,
     'label_rectangle_left_margin': 0,
@@ -31,20 +31,6 @@ visualize_bboxes_on_image_kwargs = {
 }
 vectors_types = ['vectors', 'weighted_vectors', 'reduced_vectors', 'weighted_reduced_vectors']
-annotation_key = 'is_annotated_document_image'
-annotation_original_image_key = 'original_image'
-def annotate_document_image(document_image: Image.Image, original_document_image: Image.Image):
-    document_image.info.update({
-        annotation_key: True,
-        annotation_original_image_key: original_document_image
-    })
-    return document_image
-def get_original_document_image(document_image: Image.Image):
-    if document_image.info.get(annotation_key) == True:
-        return document_image.info.get(annotation_original_image_key)
-    return document_image
 def similarity_fn(model: lp.Detectron2LayoutModel, document_image_1: Image.Image, document_image_2: Image.Image, vectors_type: str):
     message = None
     annotations = {
@@ -87,16 +73,18 @@ def similarity_fn(model: lp.Detectron2LayoutModel, document_image_1: Image.Image
             document_image_1 = visualize_bboxes_on_image(
                 image = document_image_1,
                 bboxes = cache['document_image_1_features'][annotations['predicted_bboxes']],
-                titles = [f'{label}, score:{round(score, 2)}' for label, score in zip(
                     cache['document_image_1_features'][annotations['predicted_labels']],
                     cache['document_image_1_features'][annotations['predicted_scores']])],
                 **visualize_bboxes_on_image_kwargs)
             document_image_2 = visualize_bboxes_on_image(
                 image = document_image_2,
                 bboxes = cache['document_image_2_features'][annotations['predicted_bboxes']],
-                titles = [f'{label}, score:{score}' for label, score in zip(
                     cache['document_image_2_features'][annotations['predicted_labels']],
                     cache['document_image_2_features'][annotations['predicted_scores']])],
                 **visualize_bboxes_on_image_kwargs)
             cache['output_document_image_1_hash'] = str(average_hash(document_image_1))
@@ -141,7 +129,7 @@ def document_view(document_number: int):
     gr.HTML(value=f'<h4>Load the {"first" if document_number == 1 else "second"} PDF or Document Image<h4>', elem_classes=['center'])
     with gr.Tabs() as document_tabs:
         with gr.Tab("From Image", id=0):
-            document = gr.Image(type="pil", label=f"Document {document_number}", visible=False, interactive=False, show_download_button=False)
             document_error_message = gr.HTML(label="Error Message", visible=False)
             document_preview = gr.UploadButton(
                 "Upload PDF or Document Image",

 pre_message_style = 'overflow:auto;border:2px solid pink;padding:4px;border-radius:4px;'
 visualize_bboxes_on_image_kwargs = {
     'label_text_color': 'white',
+    'label_fill_color': 'black',
     'label_text_size': 12,
     'label_text_padding': 3,
     'label_rectangle_left_margin': 0,
 }
 vectors_types = ['vectors', 'weighted_vectors', 'reduced_vectors', 'weighted_reduced_vectors']
 def similarity_fn(model: lp.Detectron2LayoutModel, document_image_1: Image.Image, document_image_2: Image.Image, vectors_type: str):
     message = None
     annotations = {
             document_image_1 = visualize_bboxes_on_image(
                 image = document_image_1,
                 bboxes = cache['document_image_1_features'][annotations['predicted_bboxes']],
+                labels = [f'{label}, score:{round(score, 2)}' for label, score in zip(
                     cache['document_image_1_features'][annotations['predicted_labels']],
                     cache['document_image_1_features'][annotations['predicted_scores']])],
+                bbox_outline_color = [color_map[label] for label in cache['document_image_1_features'][annotations['predicted_labels']]],
                 **visualize_bboxes_on_image_kwargs)
             document_image_2 = visualize_bboxes_on_image(
                 image = document_image_2,
                 bboxes = cache['document_image_2_features'][annotations['predicted_bboxes']],
+                labels = [f'{label}, score:{score}' for label, score in zip(
                     cache['document_image_2_features'][annotations['predicted_labels']],
                     cache['document_image_2_features'][annotations['predicted_scores']])],
+                bbox_outline_color = [color_map[label] for label in cache['document_image_2_features'][annotations['predicted_labels']]],
                 **visualize_bboxes_on_image_kwargs)
             cache['output_document_image_1_hash'] = str(average_hash(document_image_1))
     gr.HTML(value=f'<h4>Load the {"first" if document_number == 1 else "second"} PDF or Document Image<h4>', elem_classes=['center'])
     with gr.Tabs() as document_tabs:
         with gr.Tab("From Image", id=0):
+            document = gr.Image(type="pil", label=f"Document {document_number}", visible=False, interactive=False, show_download_button=True)
             document_error_message = gr.HTML(label="Error Message", visible=False)
             document_preview = gr.UploadButton(
                 "Upload PDF or Document Image",

utils/visualize_bboxes_on_image.py CHANGED Viewed

@@ -7,14 +7,15 @@ from typing import List
 from functools import cache
 DEFAULTS = {
-  'width': 2,
-  'bbox_color': "red",
   'label_text_color': "black",
-  'label_rectangle_color': "red",
   'label_text_padding': 0,
   'label_rectangle_left_margin': 0,
   'label_rectangle_top_margin': 0,
-  'label_text_size': 12
 }
 @cache
@@ -27,11 +28,12 @@ def get_font(path_or_url: str = 'https://github.com/googlefonts/roboto/raw/main/
 def visualize_bboxes_on_image(
     image: Image.Image,
     bboxes: List[List[int]],
-    titles: List[str] = None,
-    width = DEFAULTS["width"],
-    bbox_color = DEFAULTS["bbox_color"],
-    label_text_color = DEFAULTS["label_text_color"],
-    label_rectangle_color = DEFAULTS["label_rectangle_color"],
     label_text_padding = DEFAULTS["label_text_padding"],
     label_rectangle_left_margin = DEFAULTS["label_rectangle_left_margin"],
     label_rectangle_top_margin = DEFAULTS['label_rectangle_top_margin'],
@@ -42,33 +44,45 @@ def visualize_bboxes_on_image(
   Args:
     image: Image to visualize
     bboxes: List of bounding boxes
-    titles: Titles of the bounding boxes
-    width: Width of the bounding box
-    bbox_color: Color of the bounding box
     label_text_color: Color of the label text
-    label_rectangle_color: Color of the label rectangle
-    convert_to_x0y0x1y1: Function to convert bounding box to x0y0x1y1 format
     label_text_padding: Padding of the label text
-    label_rectangle_left_padding: Left padding of the label rectangle
-    label_rectangle_top_padding: Top padding of the label rectangle
     label_text_size: Font size of the label text
   Returns:
-    Image: Image with bounding boxes
   '''
   image = image.copy().convert("RGB")
   draw = ImageDraw.Draw(image)
   font = get_font(size = label_text_size)
-  titles = (titles or []) + np.full(len(bboxes) - len(titles or []), None).tolist()
-  for bbox, title in zip(bboxes, titles):
     x0, y0, x1, y1 = convert_to_x0y0x1y1(bbox) if convert_to_x0y0x1y1 is not None else bbox
-    draw.rectangle([x0, y0, x1, y1], outline=bbox_color, width=width)
-    if title is not None:
       draw_text_on_image(
         draw,
         [x0, y0],
-        title,
         label_text_color,
-        label_rectangle_color,
         label_text_padding,
         label_rectangle_left_margin,
         label_rectangle_top_margin,
@@ -79,9 +93,9 @@ def visualize_bboxes_on_image(
 def draw_text_on_image(
     image_or_draw: Image.Image | ImageDraw.ImageDraw,
     text_position_xy: List[int],
-    title: str,
     label_text_color = DEFAULTS["label_text_color"],
-    label_rectangle_color = DEFAULTS["label_rectangle_color"],
     label_text_padding = DEFAULTS["label_text_padding"],
     label_rectangle_left_margin = DEFAULTS["label_rectangle_left_margin"],
     label_rectangle_top_margin = DEFAULTS['label_rectangle_top_margin'],
@@ -93,13 +107,13 @@ def draw_text_on_image(
   x0, y0 = text_position_xy
   text_position = (x0 - label_rectangle_left_margin + label_text_padding, y0 - label_rectangle_top_margin + label_text_padding)
   draw = ImageDraw.Draw(image) if is_image else image_or_draw
-  text_bbox_left, text_bbox_top, text_bbox_right, text_bbox_bottom = draw.textbbox(text_position, title, font=font)
   xy = [
     text_position[0] - label_text_padding,
     text_position[1] - label_text_padding,
     text_bbox_right + label_text_padding + label_text_padding,
     text_bbox_bottom + label_text_padding + label_text_padding
   ]
-  draw.rectangle(xy, fill = label_rectangle_color)
-  draw.text(text_position, title, font=font, fill=label_text_color)
   return image

 from functools import cache
 DEFAULTS = {
+  'bbox_outline_width': 2,
+  'bbox_outline_color': (0, 0, 256, 123), # alpha runs from 0 to 127
+  'bbox_fill_color': (256, 0, 0, 50), # alpha runs from 0 to 127
   'label_text_color': "black",
+  'label_fill_color': "red",
   'label_text_padding': 0,
   'label_rectangle_left_margin': 0,
   'label_rectangle_top_margin': 0,
+  'label_text_size': 12,
 }
 @cache
 def visualize_bboxes_on_image(
     image: Image.Image,
     bboxes: List[List[int]],
+    labels: List[str] = None,
+    bbox_outline_width = DEFAULTS["bbox_outline_width"],
+    bbox_outline_color = DEFAULTS["bbox_outline_color"],
+    bbox_fill_color: str | list[tuple | str] = DEFAULTS["bbox_fill_color"],
+    label_text_color: str | list[tuple | str] = DEFAULTS["label_text_color"],
+    label_fill_color = DEFAULTS["label_fill_color"],
     label_text_padding = DEFAULTS["label_text_padding"],
     label_rectangle_left_margin = DEFAULTS["label_rectangle_left_margin"],
     label_rectangle_top_margin = DEFAULTS['label_rectangle_top_margin'],
   Args:
     image: Image to visualize
     bboxes: List of bounding boxes
+    labels: Titles of the bounding boxes
+    bbox_outline_width: Width of the bounding box
+    bbox_outline_color: Color of the bounding box
+    bbox_fill_color: Fill color of the bounding box
     label_text_color: Color of the label text
+    label_fill_color: Color of the label rectangle
     label_text_padding: Padding of the label text
+    label_rectangle_left_margin: Left padding of the label rectangle
+    label_rectangle_top_margin: Top padding of the label rectangle
     label_text_size: Font size of the label text
+    convert_to_x0y0x1y1: Function to convert bounding box to x0y0x1y1 format
   Returns:
+    Image: Image annotated with bounding boxes
   '''
   image = image.copy().convert("RGB")
   draw = ImageDraw.Draw(image)
   font = get_font(size = label_text_size)
+  labels = (labels or []) + np.full(len(bboxes) - len(labels or []), None).tolist()
+  bbox_fill_colors = bbox_fill_color if isinstance(bbox_fill_color, list) else [bbox_fill_color] * len(bboxes)
+  bbox_outline_colors = bbox_outline_color if isinstance(bbox_outline_color, list) else [bbox_outline_color] * len(bboxes)
+  for bbox, label, _bbox_fill_color, _bbox_outline_color in zip(bboxes, labels, bbox_fill_colors, bbox_outline_colors):
     x0, y0, x1, y1 = convert_to_x0y0x1y1(bbox) if convert_to_x0y0x1y1 is not None else bbox
+    rectangle_image = Image.new('RGBA', image.size)
+    rectangle_image_draw = ImageDraw.Draw(rectangle_image)
+    rectangle_image_draw.rectangle(
+      xy = [x0, y0, x1, y1],
+      fill = _bbox_fill_color,
+      outline = _bbox_outline_color,
+      width = bbox_outline_width)
+    image.paste(im = rectangle_image, mask = rectangle_image)
+    if label is not None:
       draw_text_on_image(
         draw,
         [x0, y0],
+        label,
         label_text_color,
+        label_fill_color,
         label_text_padding,
         label_rectangle_left_margin,
         label_rectangle_top_margin,
 def draw_text_on_image(
     image_or_draw: Image.Image | ImageDraw.ImageDraw,
     text_position_xy: List[int],
+    label: str,
     label_text_color = DEFAULTS["label_text_color"],
+    label_fill_color = DEFAULTS["label_fill_color"],
     label_text_padding = DEFAULTS["label_text_padding"],
     label_rectangle_left_margin = DEFAULTS["label_rectangle_left_margin"],
     label_rectangle_top_margin = DEFAULTS['label_rectangle_top_margin'],
   x0, y0 = text_position_xy
   text_position = (x0 - label_rectangle_left_margin + label_text_padding, y0 - label_rectangle_top_margin + label_text_padding)
   draw = ImageDraw.Draw(image) if is_image else image_or_draw
+  _, _, text_bbox_right, text_bbox_bottom = draw.textbbox(text_position, label, font=font)
   xy = [
     text_position[0] - label_text_padding,
     text_position[1] - label_text_padding,
     text_bbox_right + label_text_padding + label_text_padding,
     text_bbox_bottom + label_text_padding + label_text_padding
   ]
+  draw.rectangle(xy, fill = label_fill_color)
+  draw.text(text_position, label, font=font, fill=label_text_color)
   return image