Spaces:
Running
Running
import cv2 | |
import numpy as np | |
import detect_compo.lib_ip.ip_draw as draw | |
import detect_compo.lib_ip.ip_preprocessing as pre | |
from detect_compo.lib_ip.Component import Component | |
import detect_compo.lib_ip.Component as Compo | |
from config.CONFIG_UIED import Config | |
C = Config() | |
def merge_intersected_corner(compos, org, is_merge_contained_ele, max_gap=(0, 0), max_ele_height=25): | |
''' | |
:param is_merge_contained_ele: if true, merge compos nested in others | |
:param max_gap: (horizontal_distance, vertical_distance) to be merge into one line/column | |
:param max_ele_height: if higher than it, recognize the compo as text | |
:return: | |
''' | |
changed = False | |
new_compos = [] | |
Compo.compos_update(compos, org.shape) | |
for i in range(len(compos)): | |
merged = False | |
cur_compo = compos[i] | |
for j in range(len(new_compos)): | |
relation = cur_compo.compo_relation(new_compos[j], max_gap) | |
# print(relation) | |
# draw.draw_bounding_box(org, [cur_compo, new_compos[j]], name='b-merge', show=True) | |
# merge compo[i] to compo[j] if | |
# 1. compo[j] contains compo[i] | |
# 2. compo[j] intersects with compo[i] with certain iou | |
# 3. is_merge_contained_ele and compo[j] is contained in compo[i] | |
if relation == 1 or \ | |
relation == 2 or \ | |
(is_merge_contained_ele and relation == -1): | |
# (relation == 2 and new_compos[j].height < max_ele_height and cur_compo.height < max_ele_height) or\ | |
new_compos[j].compo_merge(cur_compo) | |
cur_compo = new_compos[j] | |
# draw.draw_bounding_box(org, [new_compos[j]], name='a-merge', show=True) | |
merged = True | |
changed = True | |
# break | |
if not merged: | |
new_compos.append(compos[i]) | |
if not changed: | |
return compos | |
else: | |
return merge_intersected_corner(new_compos, org, is_merge_contained_ele, max_gap, max_ele_height) | |
def merge_intersected_compos(compos): | |
changed = True | |
while changed: | |
changed = False | |
temp_set = [] | |
for compo_a in compos: | |
merged = False | |
for compo_b in temp_set: | |
if compo_a.compo_relation(compo_b) == 2: | |
compo_b.compo_merge(compo_a) | |
merged = True | |
changed = True | |
break | |
if not merged: | |
temp_set.append(compo_a) | |
compos = temp_set.copy() | |
return compos | |
def rm_contained_compos_not_in_block(compos): | |
''' | |
remove all components contained by others that are not Block | |
''' | |
marked = np.full(len(compos), False) | |
for i in range(len(compos) - 1): | |
for j in range(i + 1, len(compos)): | |
relation = compos[i].compo_relation(compos[j]) | |
if relation == -1 and compos[j].category != 'Block': | |
marked[i] = True | |
if relation == 1 and compos[i].category != 'Block': | |
marked[j] = True | |
new_compos = [] | |
for i in range(len(marked)): | |
if not marked[i]: | |
new_compos.append(compos[i]) | |
return new_compos | |
def merge_text(compos, org_shape, max_word_gad=4, max_word_height=20): | |
def is_text_line(compo_a, compo_b): | |
(col_min_a, row_min_a, col_max_a, row_max_a) = compo_a.put_bbox() | |
(col_min_b, row_min_b, col_max_b, row_max_b) = compo_b.put_bbox() | |
col_min_s = max(col_min_a, col_min_b) | |
col_max_s = min(col_max_a, col_max_b) | |
row_min_s = max(row_min_a, row_min_b) | |
row_max_s = min(row_max_a, row_max_b) | |
# on the same line | |
# if abs(row_min_a - row_min_b) < max_word_gad and abs(row_max_a - row_max_b) < max_word_gad: | |
if row_min_s < row_max_s: | |
# close distance | |
if col_min_s < col_max_s or \ | |
(0 < col_min_b - col_max_a < max_word_gad) or (0 < col_min_a - col_max_b < max_word_gad): | |
return True | |
return False | |
changed = False | |
new_compos = [] | |
row, col = org_shape[:2] | |
for i in range(len(compos)): | |
merged = False | |
height = compos[i].height | |
# ignore non-text | |
# if height / row > max_word_height_ratio\ | |
# or compos[i].category != 'Text': | |
if height > max_word_height: | |
new_compos.append(compos[i]) | |
continue | |
for j in range(len(new_compos)): | |
# if compos[j].category != 'Text': | |
# continue | |
if is_text_line(compos[i], new_compos[j]): | |
new_compos[j].compo_merge(compos[i]) | |
merged = True | |
changed = True | |
break | |
if not merged: | |
new_compos.append(compos[i]) | |
if not changed: | |
return compos | |
else: | |
return merge_text(new_compos, org_shape) | |
def rm_top_or_bottom_corners(components, org_shape, top_bottom_height=C.THRESHOLD_TOP_BOTTOM_BAR): | |
new_compos = [] | |
height, width = org_shape[:2] | |
for compo in components: | |
(column_min, row_min, column_max, row_max) = compo.put_bbox() | |
# remove big ones | |
# if (row_max - row_min) / height > 0.65 and (column_max - column_min) / width > 0.8: | |
# continue | |
if not (row_max < height * top_bottom_height[0] or row_min > height * top_bottom_height[1]): | |
new_compos.append(compo) | |
return new_compos | |
def rm_line_v_h(binary, show=False, max_line_thickness=C.THRESHOLD_LINE_THICKNESS): | |
def check_continuous_line(line, edge): | |
continuous_length = 0 | |
line_start = -1 | |
for j, p in enumerate(line): | |
if p > 0: | |
if line_start == -1: | |
line_start = j | |
continuous_length += 1 | |
elif continuous_length > 0: | |
if continuous_length / edge > 0.6: | |
return [line_start, j] | |
continuous_length = 0 | |
line_start = -1 | |
if continuous_length / edge > 0.6: | |
return [line_start, len(line)] | |
else: | |
return None | |
def extract_line_area(line, start_idx, flag='v'): | |
for e, l in enumerate(line): | |
if flag == 'v': | |
map_line[start_idx + e, l[0]:l[1]] = binary[start_idx + e, l[0]:l[1]] | |
map_line = np.zeros(binary.shape[:2], dtype=np.uint8) | |
cv2.imshow('binary', binary) | |
width = binary.shape[1] | |
start_row = -1 | |
line_area = [] | |
for i, row in enumerate(binary): | |
line_v = check_continuous_line(row, width) | |
if line_v is not None: | |
# new line | |
if start_row == -1: | |
start_row = i | |
line_area = [] | |
line_area.append(line_v) | |
else: | |
# checking line | |
if start_row != -1: | |
if i - start_row < max_line_thickness: | |
# binary[start_row: i] = 0 | |
# map_line[start_row: i] = binary[start_row: i] | |
print(line_area, start_row, i) | |
extract_line_area(line_area, start_row) | |
start_row = -1 | |
height = binary.shape[0] | |
start_col = -1 | |
for i in range(width): | |
col = binary[:, i] | |
line_h = check_continuous_line(col, height) | |
if line_h is not None: | |
# new line | |
if start_col == -1: | |
start_col = i | |
else: | |
# checking line | |
if start_col != -1: | |
if i - start_col < max_line_thickness: | |
# binary[:, start_col: i] = 0 | |
map_line[:, start_col: i] = binary[:, start_col: i] | |
start_col = -1 | |
binary -= map_line | |
if show: | |
cv2.imshow('no-line', binary) | |
cv2.imshow('lines', map_line) | |
cv2.waitKey() | |
def rm_line(binary, | |
max_line_thickness=C.THRESHOLD_LINE_THICKNESS, | |
min_line_length_ratio=C.THRESHOLD_LINE_MIN_LENGTH, | |
show=False, wait_key=0): | |
def is_valid_line(line): | |
line_length = 0 | |
line_gap = 0 | |
for j in line: | |
if j > 0: | |
if line_gap > 5: | |
return False | |
line_length += 1 | |
line_gap = 0 | |
elif line_length > 0: | |
line_gap += 1 | |
if line_length / width > 0.95: | |
return True | |
return False | |
height, width = binary.shape[:2] | |
board = np.zeros(binary.shape[:2], dtype=np.uint8) | |
start_row, end_row = -1, -1 | |
check_line = False | |
check_gap = False | |
for i, row in enumerate(binary): | |
# line_ratio = (sum(row) / 255) / width | |
# if line_ratio > 0.9: | |
if is_valid_line(row): | |
# new start: if it is checking a new line, mark this row as start | |
if not check_line: | |
start_row = i | |
check_line = True | |
else: | |
# end the line | |
if check_line: | |
# thin enough to be a line, then start checking gap | |
if i - start_row < max_line_thickness: | |
end_row = i | |
check_gap = True | |
else: | |
start_row, end_row = -1, -1 | |
check_line = False | |
# check gap | |
if check_gap and i - end_row > max_line_thickness: | |
binary[start_row: end_row] = 0 | |
start_row, end_row = -1, -1 | |
check_line = False | |
check_gap = False | |
if (check_line and (height - start_row) < max_line_thickness) or check_gap: | |
binary[start_row: end_row] = 0 | |
if show: | |
cv2.imshow('no-line binary', binary) | |
if wait_key is not None: | |
cv2.waitKey(wait_key) | |
if wait_key == 0: | |
cv2.destroyWindow('no-line binary') | |
def rm_noise_compos(compos): | |
compos_new = [] | |
for compo in compos: | |
if compo.category == 'Noise': | |
continue | |
compos_new.append(compo) | |
return compos_new | |
def rm_noise_in_large_img(compos, org, | |
max_compo_scale=C.THRESHOLD_COMPO_MAX_SCALE): | |
row, column = org.shape[:2] | |
remain = np.full(len(compos), True) | |
new_compos = [] | |
for compo in compos: | |
if compo.category == 'Image': | |
for i in compo.contain: | |
remain[i] = False | |
for i in range(len(remain)): | |
if remain[i]: | |
new_compos.append(compos[i]) | |
return new_compos | |
def detect_compos_in_img(compos, binary, org, max_compo_scale=C.THRESHOLD_COMPO_MAX_SCALE, show=False): | |
compos_new = [] | |
row, column = binary.shape[:2] | |
for compo in compos: | |
if compo.category == 'Image': | |
compo.compo_update_bbox_area() | |
# org_clip = compo.compo_clipping(org) | |
# bin_clip = pre.binarization(org_clip, show=show) | |
bin_clip = compo.compo_clipping(binary) | |
bin_clip = pre.reverse_binary(bin_clip, show=show) | |
compos_rec, compos_nonrec = component_detection(bin_clip, test=False, step_h=10, step_v=10, rec_detect=True) | |
for compo_rec in compos_rec: | |
compo_rec.compo_relative_position(compo.bbox.col_min, compo.bbox.row_min) | |
if compo_rec.bbox_area / compo.bbox_area < 0.8 and compo_rec.bbox.height > 20 and compo_rec.bbox.width > 20: | |
compos_new.append(compo_rec) | |
# draw.draw_bounding_box(org, [compo_rec], show=True) | |
# compos_inner = component_detection(bin_clip, rec_detect=False) | |
# for compo_inner in compos_inner: | |
# compo_inner.compo_relative_position(compo.bbox.col_min, compo.bbox.row_min) | |
# draw.draw_bounding_box(org, [compo_inner], show=True) | |
# if compo_inner.bbox_area / compo.bbox_area < 0.8: | |
# compos_new.append(compo_inner) | |
compos += compos_new | |
def compo_filter(compos, min_area, img_shape): | |
max_height = img_shape[0] * 0.8 | |
compos_new = [] | |
for compo in compos: | |
if compo.area < min_area: | |
continue | |
if compo.height > max_height: | |
continue | |
ratio_h = compo.width / compo.height | |
ratio_w = compo.height / compo.width | |
if ratio_h > 50 or ratio_w > 40 or \ | |
(min(compo.height, compo.width) < 8 and max(ratio_h, ratio_w) > 10): | |
continue | |
compos_new.append(compo) | |
return compos_new | |
def is_block(clip, thread=0.15): | |
''' | |
Block is a rectangle border enclosing a group of compos (consider it as a wireframe) | |
Check if a compo is block by checking if the inner side of its border is blank | |
''' | |
side = 4 # scan 4 lines inner forward each border | |
# top border - scan top down | |
blank_count = 0 | |
for i in range(1, 5): | |
if sum(clip[side + i].astype(np.int64)) / 255 > thread * clip.shape[1]: | |
blank_count += 1 | |
if blank_count > 2: return False | |
# left border - scan left to right | |
blank_count = 0 | |
for i in range(1, 5): | |
if sum(clip[:, side + i].astype(np.int64)) / 255 > thread * clip.shape[0]: | |
blank_count += 1 | |
if blank_count > 2: return False | |
side = -4 | |
# bottom border - scan bottom up | |
blank_count = 0 | |
for i in range(-1, -5, -1): | |
if sum(clip[side + i].astype(np.int64)) / 255 > thread * clip.shape[1]: | |
blank_count += 1 | |
if blank_count > 2: return False | |
# right border - scan right to left | |
blank_count = 0 | |
for i in range(-1, -5, -1): | |
if sum(clip[:, side + i].astype(np.int64)) / 255 > thread * clip.shape[0]: | |
blank_count += 1 | |
if blank_count > 2: return False | |
return True | |
def compo_block_recognition(binary, compos, block_side_length=0.15): | |
height, width = binary.shape | |
for compo in compos: | |
if compo.height / height > block_side_length and compo.width / width > block_side_length: | |
clip = compo.compo_clipping(binary) | |
if is_block(clip): | |
compo.category = 'Block' | |
# take the binary image as input | |
# calculate the connected regions -> get the bounding boundaries of them -> check if those regions are rectangles | |
# return all boundaries and boundaries of rectangles | |
def component_detection(binary, min_obj_area, | |
line_thickness=C.THRESHOLD_LINE_THICKNESS, | |
min_rec_evenness=C.THRESHOLD_REC_MIN_EVENNESS, | |
max_dent_ratio=C.THRESHOLD_REC_MAX_DENT_RATIO, | |
step_h = 5, step_v = 2, | |
rec_detect=False, show=False, test=False): | |
""" | |
:param binary: Binary image from pre-processing | |
:param min_obj_area: If not pass then ignore the small object | |
:param min_obj_perimeter: If not pass then ignore the small object | |
:param line_thickness: If not pass then ignore the slim object | |
:param min_rec_evenness: If not pass then this object cannot be rectangular | |
:param max_dent_ratio: If not pass then this object cannot be rectangular | |
:return: boundary: [top, bottom, left, right] | |
-> up, bottom: list of (column_index, min/max row border) | |
-> left, right: list of (row_index, min/max column border) detect range of each row | |
""" | |
mask = np.zeros((binary.shape[0] + 2, binary.shape[1] + 2), dtype=np.uint8) | |
compos_all = [] | |
compos_rec = [] | |
compos_nonrec = [] | |
row, column = binary.shape[0], binary.shape[1] | |
for i in range(0, row, step_h): | |
for j in range(i % 2, column, step_v): | |
if binary[i, j] == 255 and mask[i, j] == 0: | |
# get connected area | |
# region = util.boundary_bfs_connected_area(binary, i, j, mask) | |
mask_copy = mask.copy() | |
ff = cv2.floodFill(binary, mask, (j, i), None, 0, 0, cv2.FLOODFILL_MASK_ONLY) | |
if ff[0] < min_obj_area: continue | |
mask_copy = mask - mask_copy | |
region = np.reshape(cv2.findNonZero(mask_copy[1:-1, 1:-1]), (-1, 2)) | |
region = [(p[1], p[0]) for p in region] | |
# filter out some compos | |
component = Component(region, binary.shape) | |
# calculate the boundary of the connected area | |
# ignore small area | |
if component.width <= 3 or component.height <= 3: | |
continue | |
# check if it is line by checking the length of edges | |
# if component.compo_is_line(line_thickness): | |
# continue | |
if test: | |
print('Area:%d' % (len(region))) | |
draw.draw_boundary([component], binary.shape, show=True) | |
compos_all.append(component) | |
if rec_detect: | |
# rectangle check | |
if component.compo_is_rectangle(min_rec_evenness, max_dent_ratio): | |
component.rect_ = True | |
compos_rec.append(component) | |
else: | |
component.rect_ = False | |
compos_nonrec.append(component) | |
if show: | |
print('Area:%d' % (len(region))) | |
draw.draw_boundary(compos_all, binary.shape, show=True) | |
# draw.draw_boundary(compos_all, binary.shape, show=True) | |
if rec_detect: | |
return compos_rec, compos_nonrec | |
else: | |
return compos_all | |
def nested_components_detection(grey, org, grad_thresh, | |
show=False, write_path=None, | |
step_h=10, step_v=10, | |
line_thickness=C.THRESHOLD_LINE_THICKNESS, | |
min_rec_evenness=C.THRESHOLD_REC_MIN_EVENNESS, | |
max_dent_ratio=C.THRESHOLD_REC_MAX_DENT_RATIO): | |
''' | |
:param grey: grey-scale of original image | |
:return: corners: list of [(top_left, bottom_right)] | |
-> top_left: (column_min, row_min) | |
-> bottom_right: (column_max, row_max) | |
''' | |
compos = [] | |
mask = np.zeros((grey.shape[0]+2, grey.shape[1]+2), dtype=np.uint8) | |
broad = np.zeros((grey.shape[0], grey.shape[1], 3), dtype=np.uint8) | |
broad_all = broad.copy() | |
row, column = grey.shape[0], grey.shape[1] | |
for x in range(0, row, step_h): | |
for y in range(0, column, step_v): | |
if mask[x, y] == 0: | |
# region = flood_fill_bfs(grey, x, y, mask) | |
# flood fill algorithm to get background (layout block) | |
mask_copy = mask.copy() | |
ff = cv2.floodFill(grey, mask, (y, x), None, grad_thresh, grad_thresh, cv2.FLOODFILL_MASK_ONLY) | |
# ignore small regions | |
if ff[0] < 500: continue | |
mask_copy = mask - mask_copy | |
region = np.reshape(cv2.findNonZero(mask_copy[1:-1, 1:-1]), (-1, 2)) | |
region = [(p[1], p[0]) for p in region] | |
compo = Component(region, grey.shape) | |
# draw.draw_region(region, broad_all) | |
# if block.height < 40 and block.width < 40: | |
# continue | |
if compo.height < 30: | |
continue | |
# print(block.area / (row * column)) | |
if compo.area / (row * column) > 0.9: | |
continue | |
elif compo.area / (row * column) > 0.7: | |
compo.redundant = True | |
# get the boundary of this region | |
# ignore lines | |
if compo.compo_is_line(line_thickness): | |
continue | |
# ignore non-rectangle as blocks must be rectangular | |
if not compo.compo_is_rectangle(min_rec_evenness, max_dent_ratio): | |
continue | |
# if block.height/row < min_block_height_ratio: | |
# continue | |
compos.append(compo) | |
# draw.draw_region(region, broad) | |
if show: | |
cv2.imshow('flood-fill all', broad_all) | |
cv2.imshow('block', broad) | |
cv2.waitKey() | |
if write_path is not None: | |
cv2.imwrite(write_path, broad) | |
return compos | |