Spaces:
Running
Running
File size: 8,743 Bytes
a383d0e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 |
import json
import cv2
import numpy as np
from os.path import join as pjoin
import os
import time
import shutil
from detect_merge.Element import Element
def show_elements(org_img, eles, show=False, win_name='element', wait_key=0, shown_resize=None, line=2):
color_map = {'Text':(0, 0, 255), 'Compo':(0, 255, 0), 'Block':(0, 255, 0), 'Text Content':(255, 0, 255)}
img = org_img.copy()
for ele in eles:
color = color_map[ele.category]
ele.visualize_element(img, color, line)
img_resize = img
if shown_resize is not None:
img_resize = cv2.resize(img, shown_resize)
if show:
cv2.imshow(win_name, img_resize)
cv2.waitKey(wait_key)
if wait_key == 0:
cv2.destroyWindow(win_name)
return img_resize
def save_elements(output_file, elements, img_shape):
components = {'compos': [], 'img_shape': img_shape}
for i, ele in enumerate(elements):
c = ele.wrap_info()
# c['id'] = i
components['compos'].append(c)
json.dump(components, open(output_file, 'w'), indent=4)
return components
def reassign_ids(elements):
for i, element in enumerate(elements):
element.id = i
def refine_texts(texts, img_shape):
refined_texts = []
for text in texts:
# remove potential noise
if len(text.text_content) > 1 and text.height / img_shape[0] < 0.075:
refined_texts.append(text)
return refined_texts
def merge_text_line_to_paragraph(elements, max_line_gap=5):
texts = []
non_texts = []
for ele in elements:
if ele.category == 'Text':
texts.append(ele)
else:
non_texts.append(ele)
changed = True
while changed:
changed = False
temp_set = []
for text_a in texts:
merged = False
for text_b in temp_set:
inter_area, _, _, _ = text_a.calc_intersection_area(text_b, bias=(0, max_line_gap))
if inter_area > 0:
text_b.element_merge(text_a)
merged = True
changed = True
break
if not merged:
temp_set.append(text_a)
texts = temp_set.copy()
return non_texts + texts
def refine_elements(compos, texts, intersection_bias=(2, 2), containment_ratio=0.8):
'''
1. remove compos contained in text
2. remove compos containing text area that's too large
3. store text in a compo if it's contained by the compo as the compo's text child element
'''
elements = []
contained_texts = []
for compo in compos:
is_valid = True
text_area = 0
for text in texts:
inter, iou, ioa, iob = compo.calc_intersection_area(text, bias=intersection_bias)
if inter > 0:
# the non-text is contained in the text compo
if ioa >= containment_ratio:
is_valid = False
break
text_area += inter
# the text is contained in the non-text compo
if iob >= containment_ratio and compo.category != 'Block':
contained_texts.append(text)
if is_valid and text_area / compo.area < containment_ratio:
# for t in contained_texts:
# t.parent_id = compo.id
# compo.children += contained_texts
elements.append(compo)
# elements += texts
for text in texts:
if text not in contained_texts:
elements.append(text)
return elements
def check_containment(elements):
for i in range(len(elements) - 1):
for j in range(i + 1, len(elements)):
relation = elements[i].element_relation(elements[j], bias=(2, 2))
if relation == -1:
elements[j].children.append(elements[i])
elements[i].parent_id = elements[j].id
if relation == 1:
elements[i].children.append(elements[j])
elements[j].parent_id = elements[i].id
def remove_top_bar(elements, img_height):
new_elements = []
max_height = img_height * 0.04
for ele in elements:
if ele.row_min < 10 and ele.height < max_height:
continue
new_elements.append(ele)
return new_elements
def remove_bottom_bar(elements, img_height):
new_elements = []
for ele in elements:
# parameters for 800-height GUI
if ele.row_min > 750 and 20 <= ele.height <= 30 and 20 <= ele.width <= 30:
continue
new_elements.append(ele)
return new_elements
def compos_clip_and_fill(clip_root, org, compos):
def most_pix_around(pad=6, offset=2):
'''
determine the filled background color according to the most surrounding pixel
'''
up = row_min - pad if row_min - pad >= 0 else 0
left = col_min - pad if col_min - pad >= 0 else 0
bottom = row_max + pad if row_max + pad < org.shape[0] - 1 else org.shape[0] - 1
right = col_max + pad if col_max + pad < org.shape[1] - 1 else org.shape[1] - 1
most = []
for i in range(3):
val = np.concatenate((org[up:row_min - offset, left:right, i].flatten(),
org[row_max + offset:bottom, left:right, i].flatten(),
org[up:bottom, left:col_min - offset, i].flatten(),
org[up:bottom, col_max + offset:right, i].flatten()))
most.append(int(np.argmax(np.bincount(val))))
return most
if os.path.exists(clip_root):
shutil.rmtree(clip_root)
os.mkdir(clip_root)
bkg = org.copy()
cls_dirs = []
for compo in compos:
cls = compo['class']
if cls == 'Background':
compo['path'] = pjoin(clip_root, 'bkg.png')
continue
c_root = pjoin(clip_root, cls)
c_path = pjoin(c_root, str(compo['id']) + '.jpg')
compo['path'] = c_path
if cls not in cls_dirs:
os.mkdir(c_root)
cls_dirs.append(cls)
position = compo['position']
col_min, row_min, col_max, row_max = position['column_min'], position['row_min'], position['column_max'], position['row_max']
cv2.imwrite(c_path, org[row_min:row_max, col_min:col_max])
# Fill up the background area
cv2.rectangle(bkg, (col_min, row_min), (col_max, row_max), most_pix_around(), -1)
cv2.imwrite(pjoin(clip_root, 'bkg.png'), bkg)
def merge(img_path, compo_path, text_path, merge_root=None, is_paragraph=False, is_remove_bar=True, show=False, wait_key=0):
compo_json = json.load(open(compo_path, 'r'))
text_json = json.load(open(text_path, 'r'))
# load text and non-text compo
ele_id = 0
compos = []
for compo in compo_json['compos']:
element = Element(ele_id, (compo['column_min'], compo['row_min'], compo['column_max'], compo['row_max']), compo['class'])
compos.append(element)
ele_id += 1
texts = []
for text in text_json['texts']:
element = Element(ele_id, (text['column_min'], text['row_min'], text['column_max'], text['row_max']), 'Text', text_content=text['content'])
texts.append(element)
ele_id += 1
if compo_json['img_shape'] != text_json['img_shape']:
resize_ratio = compo_json['img_shape'][0] / text_json['img_shape'][0]
for text in texts:
text.resize(resize_ratio)
# check the original detected elements
img = cv2.imread(img_path)
img_resize = cv2.resize(img, (compo_json['img_shape'][1], compo_json['img_shape'][0]))
show_elements(img_resize, texts + compos, show=show, win_name='all elements before merging', wait_key=wait_key)
# refine elements
texts = refine_texts(texts, compo_json['img_shape'])
elements = refine_elements(compos, texts)
if is_remove_bar:
elements = remove_top_bar(elements, img_height=compo_json['img_shape'][0])
elements = remove_bottom_bar(elements, img_height=compo_json['img_shape'][0])
if is_paragraph:
elements = merge_text_line_to_paragraph(elements, max_line_gap=7)
reassign_ids(elements)
check_containment(elements)
board = show_elements(img_resize, elements, show=show, win_name='elements after merging', wait_key=wait_key)
# save all merged elements, clips and blank background
name = img_path.replace('\\', '/').split('/')[-1][:-4]
components = save_elements(pjoin(merge_root, name + '.json'), elements, img_resize.shape)
cv2.imwrite(pjoin(merge_root, name + '.jpg'), board)
print('[Merge Completed] Input: %s Output: %s' % (img_path, pjoin(merge_root, name + '.jpg')))
return board, components
|