Spaces:

Jimmyzheng-10
/

ScreenCoder

Running

App Files Files Community

ScreenCoder / screencoder /UIED /result_processing /eval_size.py

Jimmyzheng-10

Add app.py and the screencoder repo

a383d0e 23 days ago

raw

history blame contribute delete

9.08 kB

	import json
	import numpy as np
	import cv2
	from glob import glob
	from os.path import join as pjoin
	from tqdm import tqdm


	def resize_label(bboxes, d_height, gt_height, bias=0):
	bboxes_new = []
	scale = gt_height / d_height
	for bbox in bboxes:
	bbox = [int(b * scale + bias) for b in bbox]
	bboxes_new.append(bbox)
	return bboxes_new


	def draw_bounding_box(org, corners, color=(0, 255, 0), line=2, show=False):
	board = cv2.resize(org, (608, 1024))
	for i in range(len(corners)):
	board = cv2.rectangle(board, (corners[i][0], corners[i][1]), (corners[i][2], corners[i][3]), color, line)
	if show:
	cv2.imshow('a', board)
	cv2.waitKey(0)
	return board


	def load_detect_result_json(reslut_file_root, shrink=3):
	def is_bottom_or_top(corner):
	column_min, row_min, column_max, row_max = corner
	if row_max < 36 or row_min > 725:
	return True
	return False

	result_files = glob(pjoin(reslut_file_root, '*.json'))
	compos_reform = {}
	print('Loading %d detection results' % len(result_files))
	for reslut_file in tqdm(result_files):
	img_name = reslut_file.split('\\')[-1].split('.')[0]
	compos = json.load(open(reslut_file, 'r'))['compos']
	for compo in compos:
	if compo['column_max'] - compo['column_min'] < 10 or compo['row_max'] - compo['row_min'] < 10:
	continue
	if is_bottom_or_top((compo['column_min'], compo['row_min'], compo['column_max'], compo['row_max'])):
	continue
	if img_name not in compos_reform:
	compos_reform[img_name] = {'bboxes': [
	[compo['column_min'] + shrink, compo['row_min'] + shrink, compo['column_max'] - shrink,
	compo['row_max'] - shrink]],
	'categories': [compo['category']]}
	else:
	compos_reform[img_name]['bboxes'].append(
	[compo['column_min'] + shrink, compo['row_min'] + shrink, compo['column_max'] - shrink,
	compo['row_max'] - shrink])
	compos_reform[img_name]['categories'].append(compo['category'])
	return compos_reform


	def load_ground_truth_json(gt_file):
	def get_img_by_id(img_id):
	for image in images:
	if image['id'] == img_id:
	return image['file_name'].split('/')[-1][:-4], (image['height'], image['width'])

	def cvt_bbox(bbox):
	'''
	:param bbox: [x,y,width,height]
	:return: [col_min, row_min, col_max, row_max]
	'''
	bbox = [int(b) for b in bbox]
	return [bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]]

	data = json.load(open(gt_file, 'r'))
	images = data['images']
	annots = data['annotations']
	compos = {}
	print('Loading %d ground truth' % len(annots))
	for annot in tqdm(annots):
	img_name, size = get_img_by_id(annot['image_id'])
	if img_name not in compos:
	compos[img_name] = {'bboxes': [cvt_bbox(annot['bbox'])], 'categories': [annot['category_id']], 'size': size}
	else:
	compos[img_name]['bboxes'].append(cvt_bbox(annot['bbox']))
	compos[img_name]['categories'].append(annot['category_id'])
	return compos


	def eval(detection, ground_truth, img_root, show=True, no_text=False, only_text=False):
	def compo_filter(compos, flag):
	if not no_text and not only_text:
	return compos
	compos_new = {'bboxes': [], 'categories': []}
	for k, category in enumerate(compos['categories']):
	if only_text:
	if flag == 'det' and category != 'TextView':
	continue
	if flag == 'gt' and int(category) != 14:
	continue
	elif no_text:
	if flag == 'det' and category == 'TextView':
	continue
	if flag == 'gt' and int(category) == 14:
	continue

	compos_new['bboxes'].append(compos['bboxes'][k])
	compos_new['categories'].append(category)
	return compos_new

	def match(org, d_bbox, gt_bboxes, matched):
	'''
	:param matched: mark if the ground truth component is matched
	:param d_bbox: [col_min, row_min, col_max, row_max]
	:param gt_bboxes: list of ground truth [[col_min, row_min, col_max, row_max]]
	:return: Boolean: if IOU large enough or detected box is contained by ground truth
	'''
	area_d = (d_bbox[2] - d_bbox[0]) * (d_bbox[3] - d_bbox[1])
	size = -1
	for i, gt_bbox in enumerate(gt_bboxes):
	if matched[i] == 0:
	continue
	area_gt = (gt_bbox[2] - gt_bbox[0]) * (gt_bbox[3] - gt_bbox[1])
	col_min = max(d_bbox[0], gt_bbox[0])
	row_min = max(d_bbox[1], gt_bbox[1])
	col_max = min(d_bbox[2], gt_bbox[2])
	row_max = min(d_bbox[3], gt_bbox[3])
	# if not intersected, area intersection should be 0
	w = max(0, col_max - col_min)
	h = max(0, row_max - row_min)
	area_inter = w * h
	if area_inter == 0:
	continue
	iod = area_inter / area_d
	iou = area_inter / (area_d + area_gt - area_inter)
	# if show:
	# cv2.putText(org, (str(round(iou, 2)) + ',' + str(round(iod, 2))), (d_bbox[0], d_bbox[1]),
	# cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

	if iou > 0.9 or iod == 1:
	if (gt_bbox[2] - gt_bbox[0]) < 64:
	size = 0
	elif 64 < (gt_bbox[2] - gt_bbox[0]) < 128:
	size = 1
	elif (gt_bbox[2] - gt_bbox[0]) > 128:
	size = 2
	matched[i] = 0
	return True, size
	return False, size

	amount = len(detection)
	TP, FP, FN = [[0, 0, 0], [0, 0, 0], [0, 0, 0]]
	for i, image_id in enumerate(detection):
	img = cv2.imread(pjoin(img_root, image_id + '.jpg'))
	d_compos = detection[image_id]
	if image_id not in ground_truth:
	continue
	gt_compos = ground_truth[image_id]

	org_height = gt_compos['size'][0]

	d_compos = compo_filter(d_compos, 'det')
	gt_compos = compo_filter(gt_compos, 'gt')

	d_compos['bboxes'] = resize_label(d_compos['bboxes'], 800, 1024)
	gt_compos['bboxes'] = resize_label(gt_compos['bboxes'], org_height, 1024)
	matched = np.ones(len(gt_compos['bboxes']), dtype=int)
	for d_bbox in d_compos['bboxes']:
	m, size = match(img, d_bbox, gt_compos['bboxes'], matched)
	if m:
	TP[size] += 1
	else:
	h = d_bbox[2] - d_bbox[0]
	if h < 64:
	size = 0
	elif 64 < h < 128:
	size = 1
	elif h > 128:
	size = 2
	FP[size] += 1

	for i in range(len(matched)):
	if matched[i] == 1:
	gt_bboxes = gt_compos['bboxes']
	h = gt_bboxes[i][2] - gt_bboxes[i][0]
	if h < 64:
	size = 0
	elif 64 < h < 128:
	size = 1
	elif h > 128:
	size = 2
	FN[size] += 1

	if show:
	print(image_id + '.jpg')
	# cv2.imshow('org', cv2.resize(img, (500, 1000)))
	broad = draw_bounding_box(img, d_compos['bboxes'], color=(255, 0, 0), line=3)
	draw_bounding_box(broad, gt_compos['bboxes'], color=(0, 0, 255), show=True, line=2)

	if i % 200 == 0:
	precision = [round(TP[i] / (TP[i] + FP[i]),3) for i in range(len(TP))]
	recall = [round(TP[i] / (TP[i] + FN[i]),3) for i in range(len(TP))]
	f1 = [round(2 * (precision[i] * recall[i]) / (precision[i] + recall[i]), 3) for i in range(3)]
	print(
	'[%d/%d] TP:%s, FP:%s, FN:%s, Precesion:%s, Recall:%s, F1:%s' % (
	i, amount, str(TP), str(FP), str(FN), str(precision), str(recall), str(f1)))

	precision = [round(TP[i] / (TP[i] + FP[i]),3) for i in range(len(TP))]
	recall = [round(TP[i] / (TP[i] + FN[i]),3) for i in range(len(TP))]
	f1 = [round(2 * (precision[i] * recall[i]) / (precision[i] + recall[i]), 3) for i in range(3)]
	print(
	'[%d/%d] TP:%s, FP:%s, FN:%s, Precesion:%s, Recall:%s, F1:%s' % (
	i, amount, str(TP), str(FP), str(FN), str(precision), str(recall), str(f1)))
	# print("Average precision:%.4f; Average recall:%.3f" % (sum(pres)/len(pres), sum(recalls)/len(recalls)))


	no_text = False
	only_text = False
	detect = load_detect_result_json('E:\\Mulong\\Result\\rico\\rico_uied\\rico_new_uied_v3\\merge')
	gt = load_ground_truth_json('E:\\Mulong\\Datasets\\rico\\instances_test.json')
	eval(detect, gt, 'E:\\Mulong\\Datasets\\rico\\combined', show=False, no_text=no_text, only_text=only_text)