|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | import copy | 
					
						
						|  | import time | 
					
						
						|  | import os | 
					
						
						|  |  | 
					
						
						|  | from huggingface_hub import snapshot_download | 
					
						
						|  |  | 
					
						
						|  | from api.utils.file_utils import get_project_base_directory | 
					
						
						|  | from .operators import * | 
					
						
						|  | import numpy as np | 
					
						
						|  | import onnxruntime as ort | 
					
						
						|  |  | 
					
						
						|  | from .postprocess import build_post_process | 
					
						
						|  | from rag.settings import cron_logger | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def transform(data, ops=None): | 
					
						
						|  | """ transform """ | 
					
						
						|  | if ops is None: | 
					
						
						|  | ops = [] | 
					
						
						|  | for op in ops: | 
					
						
						|  | data = op(data) | 
					
						
						|  | if data is None: | 
					
						
						|  | return None | 
					
						
						|  | return data | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def create_operators(op_param_list, global_config=None): | 
					
						
						|  | """ | 
					
						
						|  | create operators based on the config | 
					
						
						|  |  | 
					
						
						|  | Args: | 
					
						
						|  | params(list): a dict list, used to create some operators | 
					
						
						|  | """ | 
					
						
						|  | assert isinstance( | 
					
						
						|  | op_param_list, list), ('operator config should be a list') | 
					
						
						|  | ops = [] | 
					
						
						|  | for operator in op_param_list: | 
					
						
						|  | assert isinstance(operator, | 
					
						
						|  | dict) and len(operator) == 1, "yaml format error" | 
					
						
						|  | op_name = list(operator)[0] | 
					
						
						|  | param = {} if operator[op_name] is None else operator[op_name] | 
					
						
						|  | if global_config is not None: | 
					
						
						|  | param.update(global_config) | 
					
						
						|  | op = eval(op_name)(**param) | 
					
						
						|  | ops.append(op) | 
					
						
						|  | return ops | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def load_model(model_dir, nm): | 
					
						
						|  | model_file_path = os.path.join(model_dir, nm + ".onnx") | 
					
						
						|  | if not os.path.exists(model_file_path): | 
					
						
						|  | raise ValueError("not find model file path {}".format( | 
					
						
						|  | model_file_path)) | 
					
						
						|  |  | 
					
						
						|  | options = ort.SessionOptions() | 
					
						
						|  | options.enable_cpu_mem_arena = False | 
					
						
						|  | options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL | 
					
						
						|  | options.intra_op_num_threads = 2 | 
					
						
						|  | options.inter_op_num_threads = 2 | 
					
						
						|  | if False and ort.get_device() == "GPU": | 
					
						
						|  | sess = ort.InferenceSession( | 
					
						
						|  | model_file_path, | 
					
						
						|  | options=options, | 
					
						
						|  | providers=['CUDAExecutionProvider']) | 
					
						
						|  | else: | 
					
						
						|  | sess = ort.InferenceSession( | 
					
						
						|  | model_file_path, | 
					
						
						|  | options=options, | 
					
						
						|  | providers=['CPUExecutionProvider']) | 
					
						
						|  | return sess, sess.get_inputs()[0] | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | class TextRecognizer(object): | 
					
						
						|  | def __init__(self, model_dir): | 
					
						
						|  | self.rec_image_shape = [int(v) for v in "3, 48, 320".split(",")] | 
					
						
						|  | self.rec_batch_num = 16 | 
					
						
						|  | postprocess_params = { | 
					
						
						|  | 'name': 'CTCLabelDecode', | 
					
						
						|  | "character_dict_path": os.path.join(model_dir, "ocr.res"), | 
					
						
						|  | "use_space_char": True | 
					
						
						|  | } | 
					
						
						|  | self.postprocess_op = build_post_process(postprocess_params) | 
					
						
						|  | self.predictor, self.input_tensor = load_model(model_dir, 'rec') | 
					
						
						|  |  | 
					
						
						|  | def resize_norm_img(self, img, max_wh_ratio): | 
					
						
						|  | imgC, imgH, imgW = self.rec_image_shape | 
					
						
						|  |  | 
					
						
						|  | assert imgC == img.shape[2] | 
					
						
						|  | imgW = int((imgH * max_wh_ratio)) | 
					
						
						|  | w = self.input_tensor.shape[3:][0] | 
					
						
						|  | if isinstance(w, str): | 
					
						
						|  | pass | 
					
						
						|  | elif w is not None and w > 0: | 
					
						
						|  | imgW = w | 
					
						
						|  | h, w = img.shape[:2] | 
					
						
						|  | ratio = w / float(h) | 
					
						
						|  | if math.ceil(imgH * ratio) > imgW: | 
					
						
						|  | resized_w = imgW | 
					
						
						|  | else: | 
					
						
						|  | resized_w = int(math.ceil(imgH * ratio)) | 
					
						
						|  |  | 
					
						
						|  | resized_image = cv2.resize(img, (resized_w, imgH)) | 
					
						
						|  | resized_image = resized_image.astype('float32') | 
					
						
						|  | resized_image = resized_image.transpose((2, 0, 1)) / 255 | 
					
						
						|  | resized_image -= 0.5 | 
					
						
						|  | resized_image /= 0.5 | 
					
						
						|  | padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) | 
					
						
						|  | padding_im[:, :, 0:resized_w] = resized_image | 
					
						
						|  | return padding_im | 
					
						
						|  |  | 
					
						
						|  | def resize_norm_img_vl(self, img, image_shape): | 
					
						
						|  |  | 
					
						
						|  | imgC, imgH, imgW = image_shape | 
					
						
						|  | img = img[:, :, ::-1] | 
					
						
						|  | resized_image = cv2.resize( | 
					
						
						|  | img, (imgW, imgH), interpolation=cv2.INTER_LINEAR) | 
					
						
						|  | resized_image = resized_image.astype('float32') | 
					
						
						|  | resized_image = resized_image.transpose((2, 0, 1)) / 255 | 
					
						
						|  | return resized_image | 
					
						
						|  |  | 
					
						
						|  | def resize_norm_img_srn(self, img, image_shape): | 
					
						
						|  | imgC, imgH, imgW = image_shape | 
					
						
						|  |  | 
					
						
						|  | img_black = np.zeros((imgH, imgW)) | 
					
						
						|  | im_hei = img.shape[0] | 
					
						
						|  | im_wid = img.shape[1] | 
					
						
						|  |  | 
					
						
						|  | if im_wid <= im_hei * 1: | 
					
						
						|  | img_new = cv2.resize(img, (imgH * 1, imgH)) | 
					
						
						|  | elif im_wid <= im_hei * 2: | 
					
						
						|  | img_new = cv2.resize(img, (imgH * 2, imgH)) | 
					
						
						|  | elif im_wid <= im_hei * 3: | 
					
						
						|  | img_new = cv2.resize(img, (imgH * 3, imgH)) | 
					
						
						|  | else: | 
					
						
						|  | img_new = cv2.resize(img, (imgW, imgH)) | 
					
						
						|  |  | 
					
						
						|  | img_np = np.asarray(img_new) | 
					
						
						|  | img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY) | 
					
						
						|  | img_black[:, 0:img_np.shape[1]] = img_np | 
					
						
						|  | img_black = img_black[:, :, np.newaxis] | 
					
						
						|  |  | 
					
						
						|  | row, col, c = img_black.shape | 
					
						
						|  | c = 1 | 
					
						
						|  |  | 
					
						
						|  | return np.reshape(img_black, (c, row, col)).astype(np.float32) | 
					
						
						|  |  | 
					
						
						|  | def srn_other_inputs(self, image_shape, num_heads, max_text_length): | 
					
						
						|  |  | 
					
						
						|  | imgC, imgH, imgW = image_shape | 
					
						
						|  | feature_dim = int((imgH / 8) * (imgW / 8)) | 
					
						
						|  |  | 
					
						
						|  | encoder_word_pos = np.array(range(0, feature_dim)).reshape( | 
					
						
						|  | (feature_dim, 1)).astype('int64') | 
					
						
						|  | gsrm_word_pos = np.array(range(0, max_text_length)).reshape( | 
					
						
						|  | (max_text_length, 1)).astype('int64') | 
					
						
						|  |  | 
					
						
						|  | gsrm_attn_bias_data = np.ones((1, max_text_length, max_text_length)) | 
					
						
						|  | gsrm_slf_attn_bias1 = np.triu(gsrm_attn_bias_data, 1).reshape( | 
					
						
						|  | [-1, 1, max_text_length, max_text_length]) | 
					
						
						|  | gsrm_slf_attn_bias1 = np.tile( | 
					
						
						|  | gsrm_slf_attn_bias1, | 
					
						
						|  | [1, num_heads, 1, 1]).astype('float32') * [-1e9] | 
					
						
						|  |  | 
					
						
						|  | gsrm_slf_attn_bias2 = np.tril(gsrm_attn_bias_data, -1).reshape( | 
					
						
						|  | [-1, 1, max_text_length, max_text_length]) | 
					
						
						|  | gsrm_slf_attn_bias2 = np.tile( | 
					
						
						|  | gsrm_slf_attn_bias2, | 
					
						
						|  | [1, num_heads, 1, 1]).astype('float32') * [-1e9] | 
					
						
						|  |  | 
					
						
						|  | encoder_word_pos = encoder_word_pos[np.newaxis, :] | 
					
						
						|  | gsrm_word_pos = gsrm_word_pos[np.newaxis, :] | 
					
						
						|  |  | 
					
						
						|  | return [ | 
					
						
						|  | encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, | 
					
						
						|  | gsrm_slf_attn_bias2 | 
					
						
						|  | ] | 
					
						
						|  |  | 
					
						
						|  | def process_image_srn(self, img, image_shape, num_heads, max_text_length): | 
					
						
						|  | norm_img = self.resize_norm_img_srn(img, image_shape) | 
					
						
						|  | norm_img = norm_img[np.newaxis, :] | 
					
						
						|  |  | 
					
						
						|  | [encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2] = \ | 
					
						
						|  | self.srn_other_inputs(image_shape, num_heads, max_text_length) | 
					
						
						|  |  | 
					
						
						|  | gsrm_slf_attn_bias1 = gsrm_slf_attn_bias1.astype(np.float32) | 
					
						
						|  | gsrm_slf_attn_bias2 = gsrm_slf_attn_bias2.astype(np.float32) | 
					
						
						|  | encoder_word_pos = encoder_word_pos.astype(np.int64) | 
					
						
						|  | gsrm_word_pos = gsrm_word_pos.astype(np.int64) | 
					
						
						|  |  | 
					
						
						|  | return (norm_img, encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, | 
					
						
						|  | gsrm_slf_attn_bias2) | 
					
						
						|  |  | 
					
						
						|  | def resize_norm_img_sar(self, img, image_shape, | 
					
						
						|  | width_downsample_ratio=0.25): | 
					
						
						|  | imgC, imgH, imgW_min, imgW_max = image_shape | 
					
						
						|  | h = img.shape[0] | 
					
						
						|  | w = img.shape[1] | 
					
						
						|  | valid_ratio = 1.0 | 
					
						
						|  |  | 
					
						
						|  | width_divisor = int(1 / width_downsample_ratio) | 
					
						
						|  |  | 
					
						
						|  | ratio = w / float(h) | 
					
						
						|  | resize_w = math.ceil(imgH * ratio) | 
					
						
						|  | if resize_w % width_divisor != 0: | 
					
						
						|  | resize_w = round(resize_w / width_divisor) * width_divisor | 
					
						
						|  | if imgW_min is not None: | 
					
						
						|  | resize_w = max(imgW_min, resize_w) | 
					
						
						|  | if imgW_max is not None: | 
					
						
						|  | valid_ratio = min(1.0, 1.0 * resize_w / imgW_max) | 
					
						
						|  | resize_w = min(imgW_max, resize_w) | 
					
						
						|  | resized_image = cv2.resize(img, (resize_w, imgH)) | 
					
						
						|  | resized_image = resized_image.astype('float32') | 
					
						
						|  |  | 
					
						
						|  | if image_shape[0] == 1: | 
					
						
						|  | resized_image = resized_image / 255 | 
					
						
						|  | resized_image = resized_image[np.newaxis, :] | 
					
						
						|  | else: | 
					
						
						|  | resized_image = resized_image.transpose((2, 0, 1)) / 255 | 
					
						
						|  | resized_image -= 0.5 | 
					
						
						|  | resized_image /= 0.5 | 
					
						
						|  | resize_shape = resized_image.shape | 
					
						
						|  | padding_im = -1.0 * np.ones((imgC, imgH, imgW_max), dtype=np.float32) | 
					
						
						|  | padding_im[:, :, 0:resize_w] = resized_image | 
					
						
						|  | pad_shape = padding_im.shape | 
					
						
						|  |  | 
					
						
						|  | return padding_im, resize_shape, pad_shape, valid_ratio | 
					
						
						|  |  | 
					
						
						|  | def resize_norm_img_spin(self, img): | 
					
						
						|  | img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | 
					
						
						|  |  | 
					
						
						|  | img = cv2.resize(img, tuple([100, 32]), cv2.INTER_CUBIC) | 
					
						
						|  | img = np.array(img, np.float32) | 
					
						
						|  | img = np.expand_dims(img, -1) | 
					
						
						|  | img = img.transpose((2, 0, 1)) | 
					
						
						|  | mean = [127.5] | 
					
						
						|  | std = [127.5] | 
					
						
						|  | mean = np.array(mean, dtype=np.float32) | 
					
						
						|  | std = np.array(std, dtype=np.float32) | 
					
						
						|  | mean = np.float32(mean.reshape(1, -1)) | 
					
						
						|  | stdinv = 1 / np.float32(std.reshape(1, -1)) | 
					
						
						|  | img -= mean | 
					
						
						|  | img *= stdinv | 
					
						
						|  | return img | 
					
						
						|  |  | 
					
						
						|  | def resize_norm_img_svtr(self, img, image_shape): | 
					
						
						|  |  | 
					
						
						|  | imgC, imgH, imgW = image_shape | 
					
						
						|  | resized_image = cv2.resize( | 
					
						
						|  | img, (imgW, imgH), interpolation=cv2.INTER_LINEAR) | 
					
						
						|  | resized_image = resized_image.astype('float32') | 
					
						
						|  | resized_image = resized_image.transpose((2, 0, 1)) / 255 | 
					
						
						|  | resized_image -= 0.5 | 
					
						
						|  | resized_image /= 0.5 | 
					
						
						|  | return resized_image | 
					
						
						|  |  | 
					
						
						|  | def resize_norm_img_abinet(self, img, image_shape): | 
					
						
						|  |  | 
					
						
						|  | imgC, imgH, imgW = image_shape | 
					
						
						|  |  | 
					
						
						|  | resized_image = cv2.resize( | 
					
						
						|  | img, (imgW, imgH), interpolation=cv2.INTER_LINEAR) | 
					
						
						|  | resized_image = resized_image.astype('float32') | 
					
						
						|  | resized_image = resized_image / 255. | 
					
						
						|  |  | 
					
						
						|  | mean = np.array([0.485, 0.456, 0.406]) | 
					
						
						|  | std = np.array([0.229, 0.224, 0.225]) | 
					
						
						|  | resized_image = ( | 
					
						
						|  | resized_image - mean[None, None, ...]) / std[None, None, ...] | 
					
						
						|  | resized_image = resized_image.transpose((2, 0, 1)) | 
					
						
						|  | resized_image = resized_image.astype('float32') | 
					
						
						|  |  | 
					
						
						|  | return resized_image | 
					
						
						|  |  | 
					
						
						|  | def norm_img_can(self, img, image_shape): | 
					
						
						|  |  | 
					
						
						|  | img = cv2.cvtColor( | 
					
						
						|  | img, cv2.COLOR_BGR2GRAY) | 
					
						
						|  |  | 
					
						
						|  | if self.rec_image_shape[0] == 1: | 
					
						
						|  | h, w = img.shape | 
					
						
						|  | _, imgH, imgW = self.rec_image_shape | 
					
						
						|  | if h < imgH or w < imgW: | 
					
						
						|  | padding_h = max(imgH - h, 0) | 
					
						
						|  | padding_w = max(imgW - w, 0) | 
					
						
						|  | img_padded = np.pad(img, ((0, padding_h), (0, padding_w)), | 
					
						
						|  | 'constant', | 
					
						
						|  | constant_values=(255)) | 
					
						
						|  | img = img_padded | 
					
						
						|  |  | 
					
						
						|  | img = np.expand_dims(img, 0) / 255.0 | 
					
						
						|  | img = img.astype('float32') | 
					
						
						|  |  | 
					
						
						|  | return img | 
					
						
						|  |  | 
					
						
						|  | def __call__(self, img_list): | 
					
						
						|  | img_num = len(img_list) | 
					
						
						|  |  | 
					
						
						|  | width_list = [] | 
					
						
						|  | for img in img_list: | 
					
						
						|  | width_list.append(img.shape[1] / float(img.shape[0])) | 
					
						
						|  |  | 
					
						
						|  | indices = np.argsort(np.array(width_list)) | 
					
						
						|  | rec_res = [['', 0.0]] * img_num | 
					
						
						|  | batch_num = self.rec_batch_num | 
					
						
						|  | st = time.time() | 
					
						
						|  |  | 
					
						
						|  | for beg_img_no in range(0, img_num, batch_num): | 
					
						
						|  | end_img_no = min(img_num, beg_img_no + batch_num) | 
					
						
						|  | norm_img_batch = [] | 
					
						
						|  | imgC, imgH, imgW = self.rec_image_shape[:3] | 
					
						
						|  | max_wh_ratio = imgW / imgH | 
					
						
						|  |  | 
					
						
						|  | for ino in range(beg_img_no, end_img_no): | 
					
						
						|  | h, w = img_list[indices[ino]].shape[0:2] | 
					
						
						|  | wh_ratio = w * 1.0 / h | 
					
						
						|  | max_wh_ratio = max(max_wh_ratio, wh_ratio) | 
					
						
						|  | for ino in range(beg_img_no, end_img_no): | 
					
						
						|  | norm_img = self.resize_norm_img(img_list[indices[ino]], | 
					
						
						|  | max_wh_ratio) | 
					
						
						|  | norm_img = norm_img[np.newaxis, :] | 
					
						
						|  | norm_img_batch.append(norm_img) | 
					
						
						|  | norm_img_batch = np.concatenate(norm_img_batch) | 
					
						
						|  | norm_img_batch = norm_img_batch.copy() | 
					
						
						|  |  | 
					
						
						|  | input_dict = {} | 
					
						
						|  | input_dict[self.input_tensor.name] = norm_img_batch | 
					
						
						|  | for i in range(100000): | 
					
						
						|  | try: | 
					
						
						|  | outputs = self.predictor.run(None, input_dict) | 
					
						
						|  | break | 
					
						
						|  | except Exception as e: | 
					
						
						|  | if i >= 3: | 
					
						
						|  | raise e | 
					
						
						|  | time.sleep(5) | 
					
						
						|  | preds = outputs[0] | 
					
						
						|  | rec_result = self.postprocess_op(preds) | 
					
						
						|  | for rno in range(len(rec_result)): | 
					
						
						|  | rec_res[indices[beg_img_no + rno]] = rec_result[rno] | 
					
						
						|  |  | 
					
						
						|  | return rec_res, time.time() - st | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | class TextDetector(object): | 
					
						
						|  | def __init__(self, model_dir): | 
					
						
						|  | pre_process_list = [{ | 
					
						
						|  | 'DetResizeForTest': { | 
					
						
						|  | 'limit_side_len': 960, | 
					
						
						|  | 'limit_type': "max", | 
					
						
						|  | } | 
					
						
						|  | }, { | 
					
						
						|  | 'NormalizeImage': { | 
					
						
						|  | 'std': [0.229, 0.224, 0.225], | 
					
						
						|  | 'mean': [0.485, 0.456, 0.406], | 
					
						
						|  | 'scale': '1./255.', | 
					
						
						|  | 'order': 'hwc' | 
					
						
						|  | } | 
					
						
						|  | }, { | 
					
						
						|  | 'ToCHWImage': None | 
					
						
						|  | }, { | 
					
						
						|  | 'KeepKeys': { | 
					
						
						|  | 'keep_keys': ['image', 'shape'] | 
					
						
						|  | } | 
					
						
						|  | }] | 
					
						
						|  | postprocess_params = {"name": "DBPostProcess", "thresh": 0.3, "box_thresh": 0.5, "max_candidates": 1000, | 
					
						
						|  | "unclip_ratio": 1.5, "use_dilation": False, "score_mode": "fast", "box_type": "quad"} | 
					
						
						|  |  | 
					
						
						|  | self.postprocess_op = build_post_process(postprocess_params) | 
					
						
						|  | self.predictor, self.input_tensor = load_model(model_dir, 'det') | 
					
						
						|  |  | 
					
						
						|  | img_h, img_w = self.input_tensor.shape[2:] | 
					
						
						|  | if isinstance(img_h, str) or isinstance(img_w, str): | 
					
						
						|  | pass | 
					
						
						|  | elif img_h is not None and img_w is not None and img_h > 0 and img_w > 0: | 
					
						
						|  | pre_process_list[0] = { | 
					
						
						|  | 'DetResizeForTest': { | 
					
						
						|  | 'image_shape': [img_h, img_w] | 
					
						
						|  | } | 
					
						
						|  | } | 
					
						
						|  | self.preprocess_op = create_operators(pre_process_list) | 
					
						
						|  |  | 
					
						
						|  | def order_points_clockwise(self, pts): | 
					
						
						|  | rect = np.zeros((4, 2), dtype="float32") | 
					
						
						|  | s = pts.sum(axis=1) | 
					
						
						|  | rect[0] = pts[np.argmin(s)] | 
					
						
						|  | rect[2] = pts[np.argmax(s)] | 
					
						
						|  | tmp = np.delete(pts, (np.argmin(s), np.argmax(s)), axis=0) | 
					
						
						|  | diff = np.diff(np.array(tmp), axis=1) | 
					
						
						|  | rect[1] = tmp[np.argmin(diff)] | 
					
						
						|  | rect[3] = tmp[np.argmax(diff)] | 
					
						
						|  | return rect | 
					
						
						|  |  | 
					
						
						|  | def clip_det_res(self, points, img_height, img_width): | 
					
						
						|  | for pno in range(points.shape[0]): | 
					
						
						|  | points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1)) | 
					
						
						|  | points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1)) | 
					
						
						|  | return points | 
					
						
						|  |  | 
					
						
						|  | def filter_tag_det_res(self, dt_boxes, image_shape): | 
					
						
						|  | img_height, img_width = image_shape[0:2] | 
					
						
						|  | dt_boxes_new = [] | 
					
						
						|  | for box in dt_boxes: | 
					
						
						|  | if isinstance(box, list): | 
					
						
						|  | box = np.array(box) | 
					
						
						|  | box = self.order_points_clockwise(box) | 
					
						
						|  | box = self.clip_det_res(box, img_height, img_width) | 
					
						
						|  | rect_width = int(np.linalg.norm(box[0] - box[1])) | 
					
						
						|  | rect_height = int(np.linalg.norm(box[0] - box[3])) | 
					
						
						|  | if rect_width <= 3 or rect_height <= 3: | 
					
						
						|  | continue | 
					
						
						|  | dt_boxes_new.append(box) | 
					
						
						|  | dt_boxes = np.array(dt_boxes_new) | 
					
						
						|  | return dt_boxes | 
					
						
						|  |  | 
					
						
						|  | def filter_tag_det_res_only_clip(self, dt_boxes, image_shape): | 
					
						
						|  | img_height, img_width = image_shape[0:2] | 
					
						
						|  | dt_boxes_new = [] | 
					
						
						|  | for box in dt_boxes: | 
					
						
						|  | if isinstance(box, list): | 
					
						
						|  | box = np.array(box) | 
					
						
						|  | box = self.clip_det_res(box, img_height, img_width) | 
					
						
						|  | dt_boxes_new.append(box) | 
					
						
						|  | dt_boxes = np.array(dt_boxes_new) | 
					
						
						|  | return dt_boxes | 
					
						
						|  |  | 
					
						
						|  | def __call__(self, img): | 
					
						
						|  | ori_im = img.copy() | 
					
						
						|  | data = {'image': img} | 
					
						
						|  |  | 
					
						
						|  | st = time.time() | 
					
						
						|  | data = transform(data, self.preprocess_op) | 
					
						
						|  | img, shape_list = data | 
					
						
						|  | if img is None: | 
					
						
						|  | return None, 0 | 
					
						
						|  | img = np.expand_dims(img, axis=0) | 
					
						
						|  | shape_list = np.expand_dims(shape_list, axis=0) | 
					
						
						|  | img = img.copy() | 
					
						
						|  | input_dict = {} | 
					
						
						|  | input_dict[self.input_tensor.name] = img | 
					
						
						|  | for i in range(100000): | 
					
						
						|  | try: | 
					
						
						|  | outputs = self.predictor.run(None, input_dict) | 
					
						
						|  | break | 
					
						
						|  | except Exception as e: | 
					
						
						|  | if i >= 3: | 
					
						
						|  | raise e | 
					
						
						|  | time.sleep(5) | 
					
						
						|  |  | 
					
						
						|  | post_result = self.postprocess_op({"maps": outputs[0]}, shape_list) | 
					
						
						|  | dt_boxes = post_result[0]['points'] | 
					
						
						|  | dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape) | 
					
						
						|  |  | 
					
						
						|  | return dt_boxes, time.time() - st | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | class OCR(object): | 
					
						
						|  | def __init__(self, model_dir=None): | 
					
						
						|  | """ | 
					
						
						|  | If you have trouble downloading HuggingFace models, -_^ this might help!! | 
					
						
						|  |  | 
					
						
						|  | For Linux: | 
					
						
						|  | export HF_ENDPOINT=https://hf-mirror.com | 
					
						
						|  |  | 
					
						
						|  | For Windows: | 
					
						
						|  | Good luck | 
					
						
						|  | ^_- | 
					
						
						|  |  | 
					
						
						|  | """ | 
					
						
						|  | if not model_dir: | 
					
						
						|  | try: | 
					
						
						|  | model_dir = os.path.join( | 
					
						
						|  | get_project_base_directory(), | 
					
						
						|  | "rag/res/deepdoc") | 
					
						
						|  | self.text_detector = TextDetector(model_dir) | 
					
						
						|  | self.text_recognizer = TextRecognizer(model_dir) | 
					
						
						|  | except Exception as e: | 
					
						
						|  | model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc") | 
					
						
						|  | self.text_detector = TextDetector(model_dir) | 
					
						
						|  | self.text_recognizer = TextRecognizer(model_dir) | 
					
						
						|  |  | 
					
						
						|  | self.drop_score = 0.5 | 
					
						
						|  | self.crop_image_res_index = 0 | 
					
						
						|  |  | 
					
						
						|  | def get_rotate_crop_image(self, img, points): | 
					
						
						|  | ''' | 
					
						
						|  | img_height, img_width = img.shape[0:2] | 
					
						
						|  | left = int(np.min(points[:, 0])) | 
					
						
						|  | right = int(np.max(points[:, 0])) | 
					
						
						|  | top = int(np.min(points[:, 1])) | 
					
						
						|  | bottom = int(np.max(points[:, 1])) | 
					
						
						|  | img_crop = img[top:bottom, left:right, :].copy() | 
					
						
						|  | points[:, 0] = points[:, 0] - left | 
					
						
						|  | points[:, 1] = points[:, 1] - top | 
					
						
						|  | ''' | 
					
						
						|  | assert len(points) == 4, "shape of points must be 4*2" | 
					
						
						|  | img_crop_width = int( | 
					
						
						|  | max( | 
					
						
						|  | np.linalg.norm(points[0] - points[1]), | 
					
						
						|  | np.linalg.norm(points[2] - points[3]))) | 
					
						
						|  | img_crop_height = int( | 
					
						
						|  | max( | 
					
						
						|  | np.linalg.norm(points[0] - points[3]), | 
					
						
						|  | np.linalg.norm(points[1] - points[2]))) | 
					
						
						|  | pts_std = np.float32([[0, 0], [img_crop_width, 0], | 
					
						
						|  | [img_crop_width, img_crop_height], | 
					
						
						|  | [0, img_crop_height]]) | 
					
						
						|  | M = cv2.getPerspectiveTransform(points, pts_std) | 
					
						
						|  | dst_img = cv2.warpPerspective( | 
					
						
						|  | img, | 
					
						
						|  | M, (img_crop_width, img_crop_height), | 
					
						
						|  | borderMode=cv2.BORDER_REPLICATE, | 
					
						
						|  | flags=cv2.INTER_CUBIC) | 
					
						
						|  | dst_img_height, dst_img_width = dst_img.shape[0:2] | 
					
						
						|  | if dst_img_height * 1.0 / dst_img_width >= 1.5: | 
					
						
						|  | dst_img = np.rot90(dst_img) | 
					
						
						|  | return dst_img | 
					
						
						|  |  | 
					
						
						|  | def sorted_boxes(self, dt_boxes): | 
					
						
						|  | """ | 
					
						
						|  | Sort text boxes in order from top to bottom, left to right | 
					
						
						|  | args: | 
					
						
						|  | dt_boxes(array):detected text boxes with shape [4, 2] | 
					
						
						|  | return: | 
					
						
						|  | sorted boxes(array) with shape [4, 2] | 
					
						
						|  | """ | 
					
						
						|  | num_boxes = dt_boxes.shape[0] | 
					
						
						|  | sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0])) | 
					
						
						|  | _boxes = list(sorted_boxes) | 
					
						
						|  |  | 
					
						
						|  | for i in range(num_boxes - 1): | 
					
						
						|  | for j in range(i, -1, -1): | 
					
						
						|  | if abs(_boxes[j + 1][0][1] - _boxes[j][0][1]) < 10 and \ | 
					
						
						|  | (_boxes[j + 1][0][0] < _boxes[j][0][0]): | 
					
						
						|  | tmp = _boxes[j] | 
					
						
						|  | _boxes[j] = _boxes[j + 1] | 
					
						
						|  | _boxes[j + 1] = tmp | 
					
						
						|  | else: | 
					
						
						|  | break | 
					
						
						|  | return _boxes | 
					
						
						|  |  | 
					
						
						|  | def detect(self, img): | 
					
						
						|  | time_dict = {'det': 0, 'rec': 0, 'cls': 0, 'all': 0} | 
					
						
						|  |  | 
					
						
						|  | if img is None: | 
					
						
						|  | return None, None, time_dict | 
					
						
						|  |  | 
					
						
						|  | start = time.time() | 
					
						
						|  | dt_boxes, elapse = self.text_detector(img) | 
					
						
						|  | time_dict['det'] = elapse | 
					
						
						|  |  | 
					
						
						|  | if dt_boxes is None: | 
					
						
						|  | end = time.time() | 
					
						
						|  | time_dict['all'] = end - start | 
					
						
						|  | return None, None, time_dict | 
					
						
						|  | else: | 
					
						
						|  | cron_logger.debug("dt_boxes num : {}, elapsed : {}".format( | 
					
						
						|  | len(dt_boxes), elapse)) | 
					
						
						|  |  | 
					
						
						|  | return zip(self.sorted_boxes(dt_boxes), [ | 
					
						
						|  | ("", 0) for _ in range(len(dt_boxes))]) | 
					
						
						|  |  | 
					
						
						|  | def recognize(self, ori_im, box): | 
					
						
						|  | img_crop = self.get_rotate_crop_image(ori_im, box) | 
					
						
						|  |  | 
					
						
						|  | rec_res, elapse = self.text_recognizer([img_crop]) | 
					
						
						|  | text, score = rec_res[0] | 
					
						
						|  | if score < self.drop_score: | 
					
						
						|  | return "" | 
					
						
						|  | return text | 
					
						
						|  |  | 
					
						
						|  | def __call__(self, img, cls=True): | 
					
						
						|  | time_dict = {'det': 0, 'rec': 0, 'cls': 0, 'all': 0} | 
					
						
						|  |  | 
					
						
						|  | if img is None: | 
					
						
						|  | return None, None, time_dict | 
					
						
						|  |  | 
					
						
						|  | start = time.time() | 
					
						
						|  | ori_im = img.copy() | 
					
						
						|  | dt_boxes, elapse = self.text_detector(img) | 
					
						
						|  | time_dict['det'] = elapse | 
					
						
						|  |  | 
					
						
						|  | if dt_boxes is None: | 
					
						
						|  | end = time.time() | 
					
						
						|  | time_dict['all'] = end - start | 
					
						
						|  | return None, None, time_dict | 
					
						
						|  | else: | 
					
						
						|  | cron_logger.debug("dt_boxes num : {}, elapsed : {}".format( | 
					
						
						|  | len(dt_boxes), elapse)) | 
					
						
						|  | img_crop_list = [] | 
					
						
						|  |  | 
					
						
						|  | dt_boxes = self.sorted_boxes(dt_boxes) | 
					
						
						|  |  | 
					
						
						|  | for bno in range(len(dt_boxes)): | 
					
						
						|  | tmp_box = copy.deepcopy(dt_boxes[bno]) | 
					
						
						|  | img_crop = self.get_rotate_crop_image(ori_im, tmp_box) | 
					
						
						|  | img_crop_list.append(img_crop) | 
					
						
						|  |  | 
					
						
						|  | rec_res, elapse = self.text_recognizer(img_crop_list) | 
					
						
						|  |  | 
					
						
						|  | time_dict['rec'] = elapse | 
					
						
						|  | cron_logger.debug("rec_res num  : {}, elapsed : {}".format( | 
					
						
						|  | len(rec_res), elapse)) | 
					
						
						|  |  | 
					
						
						|  | filter_boxes, filter_rec_res = [], [] | 
					
						
						|  | for box, rec_result in zip(dt_boxes, rec_res): | 
					
						
						|  | text, score = rec_result | 
					
						
						|  | if score >= self.drop_score: | 
					
						
						|  | filter_boxes.append(box) | 
					
						
						|  | filter_rec_res.append(rec_result) | 
					
						
						|  | end = time.time() | 
					
						
						|  | time_dict['all'] = end - start | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | return list(zip([a.tolist() for a in filter_boxes], filter_rec_res)) | 
					
						
						|  |  |