Charles Kabui
rotating bboxes
68847fc
raw
history blame
5.3 kB
import layoutparser as lp
from PIL import Image
import tensorflow as tf
import numpy as np
import torch
import torchvision.ops.boxes as box_ops
from typing import List, Tuple
from .split_image import split_image
from .get_unique_values import get_unique_values
def get_vectors(*,
predicted_bboxes: List[Tuple[int, int, int, int]],
predicted_scores: List[float],
predicted_labels: List[str],
label_names: List[str],
sub_images_bboxes: List[Tuple[int, int, int, int]],
index_start: int = 0.17,
index_end: int = 1,
weighted_jaccard_index = False):
bboxes_tensor: torch.Tensor = torch.tensor(predicted_bboxes)
labels_nonce = { value:key for key, value in zip(get_unique_values(start = index_start, end = index_end, count = len(label_names)), list(label_names)) }
def get_vector(bbox: Tuple[int, int, int, int], region_nonce: int):
# bbox: Expected to be in ``(x1, y1, x2, y2)`` format with ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
bbox_tensor: torch.Tensor = torch.tensor([bbox])
[jaccard_indexes] = box_ops.box_iou(bbox_tensor, bboxes_tensor)
'''
Either get the index of bounding box with largest jaccard_index (Intersection Over Union) or
get the index of bounding box with largest jaccard_index (Intersection Over Union) multiplied by the score.
By doing this we strike a balance between accuracy and relative position.
'''
index_of_jaccard_index = jaccard_indexes.argmax() if not weighted_jaccard_index else np.multiply(jaccard_indexes, predicted_scores).argmax()
jaccard_index = jaccard_indexes[index_of_jaccard_index]
jaccard_index_bbox_label__nonce = labels_nonce[predicted_labels[index_of_jaccard_index]]
jaccard_index_bbox_score = predicted_scores[index_of_jaccard_index]
vector = region_nonce * jaccard_index * jaccard_index_bbox_label__nonce * jaccard_index_bbox_score
return vector.item()
sub_images_nonces = get_unique_values(start = index_start, end = index_end, count = len(sub_images_bboxes))
for sub_image_bbox, region_nonce in zip(sub_images_bboxes, sub_images_nonces):
yield get_vector(sub_image_bbox, region_nonce)
def get_predictions(
image: Image.Image,
model: lp.Detectron2LayoutModel,
predictions_reducer = lambda *args: args):
layout_predicted = model.detect(image)
if len(layout_predicted) > 0:
predicted_bboxes = [block.coordinates for block in layout_predicted]
predicted_scores = [block.score for block in layout_predicted]
predicted_labels = [block.type for block in layout_predicted]
[predicted_bboxes, predicted_scores, predicted_labels] = predictions_reducer(
predicted_bboxes,
predicted_scores,
predicted_labels)
return {
'predicted_bboxes': predicted_bboxes,
'predicted_scores': predicted_scores,
'predicted_labels': predicted_labels,
}
else:
return {
'predicted_bboxes': [],
'predicted_scores': [],
'predicted_labels': [],
}
def predictions_reducer(
predicted_bboxes: List[Tuple[int, int, int, int]],
predicted_scores: List[float],
predicted_labels: List[str]):
selected_indices = tf.image.non_max_suppression(
boxes = predicted_bboxes,
scores = predicted_scores ,
max_output_size = len(predicted_bboxes),
iou_threshold = 0.01)
return {
'predicted_bboxes': tf.gather(predicted_bboxes, selected_indices).numpy().tolist(), # List[List[int, int, int, int]]
'predicted_scores': tf.gather(predicted_scores, selected_indices).numpy().astype(float).tolist(),
'predicted_labels': tf.gather(predicted_labels, selected_indices).numpy().astype(str).tolist()
}
def get_features(image: Image.Image, model: lp.Detectron2LayoutModel, label_names: List[str], width_parts = 100, height_parts = 100):
predictions = get_predictions(image, model)
reduced_predictions = predictions_reducer(**predictions)
sub_images_bboxes = list(split_image(np.array(image), width_parts, height_parts, result = 'bboxes'))
vectors = get_vectors(
sub_images_bboxes = sub_images_bboxes,
label_names = label_names,
weighted_jaccard_index = False,
**predictions)
weighted_vectors = get_vectors(
sub_images_bboxes = sub_images_bboxes,
label_names = label_names,
weighted_jaccard_index = True,
**predictions)
reduced_vectors = get_vectors(
sub_images_bboxes = sub_images_bboxes,
label_names = label_names,
weighted_jaccard_index = False,
**reduced_predictions)
reduced_weighted_vectors = get_vectors(
sub_images_bboxes = sub_images_bboxes,
label_names = label_names,
weighted_jaccard_index = True,
**reduced_predictions)
return {
'predicted_bboxes': predictions['predicted_bboxes'],
'predicted_scores': predictions['predicted_scores'],
'predicted_labels': predictions['predicted_labels'],
'vectors': list(vectors),
'weighted_vectors': list(weighted_vectors),
'reduced_predicted_bboxes': reduced_predictions['predicted_bboxes'],
'reduced_predicted_scores': reduced_predictions['predicted_scores'],
'reduced_predicted_labels': reduced_predictions['predicted_labels'],
'reduced_vectors': list(reduced_vectors),
'reduced_weighted_vectors': list(reduced_weighted_vectors),
}