Jimmyzheng-10's picture
Update
0246ff9
import cv2
import numpy as np
import detect_compo.lib_ip.ip_draw as draw
import detect_compo.lib_ip.ip_preprocessing as pre
from detect_compo.lib_ip.Component import Component
import detect_compo.lib_ip.Component as Compo
from config.CONFIG_UIED import Config
C = Config()
def merge_intersected_corner(compos, org, is_merge_contained_ele, max_gap=(0, 0), max_ele_height=25):
'''
:param is_merge_contained_ele: if true, merge compos nested in others
:param max_gap: (horizontal_distance, vertical_distance) to be merge into one line/column
:param max_ele_height: if higher than it, recognize the compo as text
:return:
'''
changed = False
new_compos = []
Compo.compos_update(compos, org.shape)
for i in range(len(compos)):
merged = False
cur_compo = compos[i]
for j in range(len(new_compos)):
relation = cur_compo.compo_relation(new_compos[j], max_gap)
# print(relation)
# draw.draw_bounding_box(org, [cur_compo, new_compos[j]], name='b-merge', show=True)
# merge compo[i] to compo[j] if
# 1. compo[j] contains compo[i]
# 2. compo[j] intersects with compo[i] with certain iou
# 3. is_merge_contained_ele and compo[j] is contained in compo[i]
if relation == 1 or \
relation == 2 or \
(is_merge_contained_ele and relation == -1):
# (relation == 2 and new_compos[j].height < max_ele_height and cur_compo.height < max_ele_height) or\
new_compos[j].compo_merge(cur_compo)
cur_compo = new_compos[j]
# draw.draw_bounding_box(org, [new_compos[j]], name='a-merge', show=True)
merged = True
changed = True
# break
if not merged:
new_compos.append(compos[i])
if not changed:
return compos
else:
return merge_intersected_corner(new_compos, org, is_merge_contained_ele, max_gap, max_ele_height)
def merge_intersected_compos(compos):
changed = True
while changed:
changed = False
temp_set = []
for compo_a in compos:
merged = False
for compo_b in temp_set:
if compo_a.compo_relation(compo_b) == 2:
compo_b.compo_merge(compo_a)
merged = True
changed = True
break
if not merged:
temp_set.append(compo_a)
compos = temp_set.copy()
return compos
def rm_contained_compos_not_in_block(compos):
'''
remove all components contained by others that are not Block
'''
marked = np.full(len(compos), False)
for i in range(len(compos) - 1):
for j in range(i + 1, len(compos)):
relation = compos[i].compo_relation(compos[j])
if relation == -1 and compos[j].category != 'Block':
marked[i] = True
if relation == 1 and compos[i].category != 'Block':
marked[j] = True
new_compos = []
for i in range(len(marked)):
if not marked[i]:
new_compos.append(compos[i])
return new_compos
def merge_text(compos, org_shape, max_word_gad=4, max_word_height=20):
def is_text_line(compo_a, compo_b):
(col_min_a, row_min_a, col_max_a, row_max_a) = compo_a.put_bbox()
(col_min_b, row_min_b, col_max_b, row_max_b) = compo_b.put_bbox()
col_min_s = max(col_min_a, col_min_b)
col_max_s = min(col_max_a, col_max_b)
row_min_s = max(row_min_a, row_min_b)
row_max_s = min(row_max_a, row_max_b)
# on the same line
# if abs(row_min_a - row_min_b) < max_word_gad and abs(row_max_a - row_max_b) < max_word_gad:
if row_min_s < row_max_s:
# close distance
if col_min_s < col_max_s or \
(0 < col_min_b - col_max_a < max_word_gad) or (0 < col_min_a - col_max_b < max_word_gad):
return True
return False
changed = False
new_compos = []
row, col = org_shape[:2]
for i in range(len(compos)):
merged = False
height = compos[i].height
# ignore non-text
# if height / row > max_word_height_ratio\
# or compos[i].category != 'Text':
if height > max_word_height:
new_compos.append(compos[i])
continue
for j in range(len(new_compos)):
# if compos[j].category != 'Text':
# continue
if is_text_line(compos[i], new_compos[j]):
new_compos[j].compo_merge(compos[i])
merged = True
changed = True
break
if not merged:
new_compos.append(compos[i])
if not changed:
return compos
else:
return merge_text(new_compos, org_shape)
def rm_top_or_bottom_corners(components, org_shape, top_bottom_height=C.THRESHOLD_TOP_BOTTOM_BAR):
new_compos = []
height, width = org_shape[:2]
for compo in components:
(column_min, row_min, column_max, row_max) = compo.put_bbox()
# remove big ones
# if (row_max - row_min) / height > 0.65 and (column_max - column_min) / width > 0.8:
# continue
if not (row_max < height * top_bottom_height[0] or row_min > height * top_bottom_height[1]):
new_compos.append(compo)
return new_compos
def rm_line_v_h(binary, show=False, max_line_thickness=C.THRESHOLD_LINE_THICKNESS):
def check_continuous_line(line, edge):
continuous_length = 0
line_start = -1
for j, p in enumerate(line):
if p > 0:
if line_start == -1:
line_start = j
continuous_length += 1
elif continuous_length > 0:
if continuous_length / edge > 0.6:
return [line_start, j]
continuous_length = 0
line_start = -1
if continuous_length / edge > 0.6:
return [line_start, len(line)]
else:
return None
def extract_line_area(line, start_idx, flag='v'):
for e, l in enumerate(line):
if flag == 'v':
map_line[start_idx + e, l[0]:l[1]] = binary[start_idx + e, l[0]:l[1]]
map_line = np.zeros(binary.shape[:2], dtype=np.uint8)
cv2.imshow('binary', binary)
width = binary.shape[1]
start_row = -1
line_area = []
for i, row in enumerate(binary):
line_v = check_continuous_line(row, width)
if line_v is not None:
# new line
if start_row == -1:
start_row = i
line_area = []
line_area.append(line_v)
else:
# checking line
if start_row != -1:
if i - start_row < max_line_thickness:
# binary[start_row: i] = 0
# map_line[start_row: i] = binary[start_row: i]
print(line_area, start_row, i)
extract_line_area(line_area, start_row)
start_row = -1
height = binary.shape[0]
start_col = -1
for i in range(width):
col = binary[:, i]
line_h = check_continuous_line(col, height)
if line_h is not None:
# new line
if start_col == -1:
start_col = i
else:
# checking line
if start_col != -1:
if i - start_col < max_line_thickness:
# binary[:, start_col: i] = 0
map_line[:, start_col: i] = binary[:, start_col: i]
start_col = -1
binary -= map_line
if show:
cv2.imshow('no-line', binary)
cv2.imshow('lines', map_line)
cv2.waitKey()
def rm_line(binary,
max_line_thickness=C.THRESHOLD_LINE_THICKNESS,
min_line_length_ratio=C.THRESHOLD_LINE_MIN_LENGTH,
show=False, wait_key=0):
def is_valid_line(line):
line_length = 0
line_gap = 0
for j in line:
if j > 0:
if line_gap > 5:
return False
line_length += 1
line_gap = 0
elif line_length > 0:
line_gap += 1
if line_length / width > 0.95:
return True
return False
height, width = binary.shape[:2]
board = np.zeros(binary.shape[:2], dtype=np.uint8)
start_row, end_row = -1, -1
check_line = False
check_gap = False
for i, row in enumerate(binary):
# line_ratio = (sum(row) / 255) / width
# if line_ratio > 0.9:
if is_valid_line(row):
# new start: if it is checking a new line, mark this row as start
if not check_line:
start_row = i
check_line = True
else:
# end the line
if check_line:
# thin enough to be a line, then start checking gap
if i - start_row < max_line_thickness:
end_row = i
check_gap = True
else:
start_row, end_row = -1, -1
check_line = False
# check gap
if check_gap and i - end_row > max_line_thickness:
binary[start_row: end_row] = 0
start_row, end_row = -1, -1
check_line = False
check_gap = False
if (check_line and (height - start_row) < max_line_thickness) or check_gap:
binary[start_row: end_row] = 0
if show:
cv2.imshow('no-line binary', binary)
if wait_key is not None:
cv2.waitKey(wait_key)
if wait_key == 0:
cv2.destroyWindow('no-line binary')
def rm_noise_compos(compos):
compos_new = []
for compo in compos:
if compo.category == 'Noise':
continue
compos_new.append(compo)
return compos_new
def rm_noise_in_large_img(compos, org,
max_compo_scale=C.THRESHOLD_COMPO_MAX_SCALE):
row, column = org.shape[:2]
remain = np.full(len(compos), True)
new_compos = []
for compo in compos:
if compo.category == 'Image':
for i in compo.contain:
remain[i] = False
for i in range(len(remain)):
if remain[i]:
new_compos.append(compos[i])
return new_compos
def detect_compos_in_img(compos, binary, org, max_compo_scale=C.THRESHOLD_COMPO_MAX_SCALE, show=False):
compos_new = []
row, column = binary.shape[:2]
for compo in compos:
if compo.category == 'Image':
compo.compo_update_bbox_area()
# org_clip = compo.compo_clipping(org)
# bin_clip = pre.binarization(org_clip, show=show)
bin_clip = compo.compo_clipping(binary)
bin_clip = pre.reverse_binary(bin_clip, show=show)
compos_rec, compos_nonrec = component_detection(bin_clip, test=False, step_h=10, step_v=10, rec_detect=True)
for compo_rec in compos_rec:
compo_rec.compo_relative_position(compo.bbox.col_min, compo.bbox.row_min)
if compo_rec.bbox_area / compo.bbox_area < 0.8 and compo_rec.bbox.height > 20 and compo_rec.bbox.width > 20:
compos_new.append(compo_rec)
# draw.draw_bounding_box(org, [compo_rec], show=True)
# compos_inner = component_detection(bin_clip, rec_detect=False)
# for compo_inner in compos_inner:
# compo_inner.compo_relative_position(compo.bbox.col_min, compo.bbox.row_min)
# draw.draw_bounding_box(org, [compo_inner], show=True)
# if compo_inner.bbox_area / compo.bbox_area < 0.8:
# compos_new.append(compo_inner)
compos += compos_new
def compo_filter(compos, min_area, img_shape):
max_height = img_shape[0] * 0.8
compos_new = []
for compo in compos:
if compo.area < min_area:
continue
if compo.height > max_height:
continue
ratio_h = compo.width / compo.height
ratio_w = compo.height / compo.width
if ratio_h > 50 or ratio_w > 40 or \
(min(compo.height, compo.width) < 8 and max(ratio_h, ratio_w) > 10):
continue
compos_new.append(compo)
return compos_new
def is_block(clip, thread=0.15):
'''
Block is a rectangle border enclosing a group of compos (consider it as a wireframe)
Check if a compo is block by checking if the inner side of its border is blank
'''
side = 4 # scan 4 lines inner forward each border
# top border - scan top down
blank_count = 0
for i in range(1, 5):
if sum(clip[side + i].astype(np.int64)) / 255 > thread * clip.shape[1]:
blank_count += 1
if blank_count > 2: return False
# left border - scan left to right
blank_count = 0
for i in range(1, 5):
if sum(clip[:, side + i].astype(np.int64)) / 255 > thread * clip.shape[0]:
blank_count += 1
if blank_count > 2: return False
side = -4
# bottom border - scan bottom up
blank_count = 0
for i in range(-1, -5, -1):
if sum(clip[side + i].astype(np.int64)) / 255 > thread * clip.shape[1]:
blank_count += 1
if blank_count > 2: return False
# right border - scan right to left
blank_count = 0
for i in range(-1, -5, -1):
if sum(clip[:, side + i].astype(np.int64)) / 255 > thread * clip.shape[0]:
blank_count += 1
if blank_count > 2: return False
return True
def compo_block_recognition(binary, compos, block_side_length=0.15):
height, width = binary.shape
for compo in compos:
if compo.height / height > block_side_length and compo.width / width > block_side_length:
clip = compo.compo_clipping(binary)
if is_block(clip):
compo.category = 'Block'
# take the binary image as input
# calculate the connected regions -> get the bounding boundaries of them -> check if those regions are rectangles
# return all boundaries and boundaries of rectangles
def component_detection(binary, min_obj_area,
line_thickness=C.THRESHOLD_LINE_THICKNESS,
min_rec_evenness=C.THRESHOLD_REC_MIN_EVENNESS,
max_dent_ratio=C.THRESHOLD_REC_MAX_DENT_RATIO,
step_h = 5, step_v = 2,
rec_detect=False, show=False, test=False):
"""
:param binary: Binary image from pre-processing
:param min_obj_area: If not pass then ignore the small object
:param min_obj_perimeter: If not pass then ignore the small object
:param line_thickness: If not pass then ignore the slim object
:param min_rec_evenness: If not pass then this object cannot be rectangular
:param max_dent_ratio: If not pass then this object cannot be rectangular
:return: boundary: [top, bottom, left, right]
-> up, bottom: list of (column_index, min/max row border)
-> left, right: list of (row_index, min/max column border) detect range of each row
"""
mask = np.zeros((binary.shape[0] + 2, binary.shape[1] + 2), dtype=np.uint8)
compos_all = []
compos_rec = []
compos_nonrec = []
row, column = binary.shape[0], binary.shape[1]
for i in range(0, row, step_h):
for j in range(i % 2, column, step_v):
if binary[i, j] == 255 and mask[i, j] == 0:
# get connected area
# region = util.boundary_bfs_connected_area(binary, i, j, mask)
mask_copy = mask.copy()
ff = cv2.floodFill(binary, mask, (j, i), None, 0, 0, cv2.FLOODFILL_MASK_ONLY)
if ff[0] < min_obj_area: continue
mask_copy = mask - mask_copy
region = np.reshape(cv2.findNonZero(mask_copy[1:-1, 1:-1]), (-1, 2))
region = [(p[1], p[0]) for p in region]
# filter out some compos
component = Component(region, binary.shape)
# calculate the boundary of the connected area
# ignore small area
if component.width <= 3 or component.height <= 3:
continue
# check if it is line by checking the length of edges
# if component.compo_is_line(line_thickness):
# continue
if test:
print('Area:%d' % (len(region)))
draw.draw_boundary([component], binary.shape, show=True)
compos_all.append(component)
if rec_detect:
# rectangle check
if component.compo_is_rectangle(min_rec_evenness, max_dent_ratio):
component.rect_ = True
compos_rec.append(component)
else:
component.rect_ = False
compos_nonrec.append(component)
if show:
print('Area:%d' % (len(region)))
draw.draw_boundary(compos_all, binary.shape, show=True)
# draw.draw_boundary(compos_all, binary.shape, show=True)
if rec_detect:
return compos_rec, compos_nonrec
else:
return compos_all
def nested_components_detection(grey, org, grad_thresh,
show=False, write_path=None,
step_h=10, step_v=10,
line_thickness=C.THRESHOLD_LINE_THICKNESS,
min_rec_evenness=C.THRESHOLD_REC_MIN_EVENNESS,
max_dent_ratio=C.THRESHOLD_REC_MAX_DENT_RATIO):
'''
:param grey: grey-scale of original image
:return: corners: list of [(top_left, bottom_right)]
-> top_left: (column_min, row_min)
-> bottom_right: (column_max, row_max)
'''
compos = []
mask = np.zeros((grey.shape[0]+2, grey.shape[1]+2), dtype=np.uint8)
broad = np.zeros((grey.shape[0], grey.shape[1], 3), dtype=np.uint8)
broad_all = broad.copy()
row, column = grey.shape[0], grey.shape[1]
for x in range(0, row, step_h):
for y in range(0, column, step_v):
if mask[x, y] == 0:
# region = flood_fill_bfs(grey, x, y, mask)
# flood fill algorithm to get background (layout block)
mask_copy = mask.copy()
ff = cv2.floodFill(grey, mask, (y, x), None, grad_thresh, grad_thresh, cv2.FLOODFILL_MASK_ONLY)
# ignore small regions
if ff[0] < 500: continue
mask_copy = mask - mask_copy
region = np.reshape(cv2.findNonZero(mask_copy[1:-1, 1:-1]), (-1, 2))
region = [(p[1], p[0]) for p in region]
compo = Component(region, grey.shape)
# draw.draw_region(region, broad_all)
# if block.height < 40 and block.width < 40:
# continue
if compo.height < 30:
continue
# print(block.area / (row * column))
if compo.area / (row * column) > 0.9:
continue
elif compo.area / (row * column) > 0.7:
compo.redundant = True
# get the boundary of this region
# ignore lines
if compo.compo_is_line(line_thickness):
continue
# ignore non-rectangle as blocks must be rectangular
if not compo.compo_is_rectangle(min_rec_evenness, max_dent_ratio):
continue
# if block.height/row < min_block_height_ratio:
# continue
compos.append(compo)
# draw.draw_region(region, broad)
if show:
cv2.imshow('flood-fill all', broad_all)
cv2.imshow('block', broad)
cv2.waitKey()
if write_path is not None:
cv2.imwrite(write_path, broad)
return compos