import numpy as np from datasets import load_metric from PIL import ImageDraw, ImageFont import pandas as pd metric = load_metric("seqeval") def unnormalize_box(bbox, width, height): return [ width * (bbox[0] / 1000), height * (bbox[1] / 1000), width * (bbox[2] / 1000), height * (bbox[3] / 1000) ] def normalize_box(bbox, width, height): return [ int((bbox[0] / width) * 1000), int((bbox[1] / height) * 1000), int((bbox[2] / width) * 1000), int((bbox[3] / height) * 1000) ] def draw_output(image, true_predictions, true_boxes): def iob_to_label(label): label = label if not label: return 'other' return label # width, height = image.size # predictions = logits.argmax(-1).squeeze().tolist() # is_subword = np.array(offset_mapping)[:,0] != 0 # true_predictions = [id2label[pred] for idx, pred in enumerate(predictions) if not is_subword[idx]] # true_boxes = [unnormalize_box(box, width, height) for idx, box in enumerate(token_boxes) if not is_subword[idx]] # draw draw = ImageDraw.Draw(image) font = ImageFont.load_default() for prediction, box in zip(true_predictions, true_boxes): predicted_label = iob_to_label(prediction).lower() draw.rectangle(box, outline='red') draw.text((box[0] + 10, box[1] - 10), text=predicted_label, fill='red', font=font) return image def create_df(true_texts, true_predictions, chosen_labels=['SHOP_NAME', 'ADDR', 'TITLE', 'PHONE', 'PRODUCT_NAME', 'AMOUNT', 'UNIT', 'UPRICE', 'SUB_TPRICE', 'UDISCOUNT', 'TAMOUNT', 'TPRICE', 'FPRICE', 'TDISCOUNT', 'RECEMONEY', 'REMAMONEY', 'BILLID', 'DATETIME', 'CASHIER'] ): data = {'text': [], 'class_label': [], 'product_id': []} product_id = -1 for text, prediction in zip(true_texts, true_predictions): if prediction not in chosen_labels: continue if prediction == 'PRODUCT_NAME': product_id += 1 if prediction in ['AMOUNT', 'UNIT', 'UDISCOUNT', 'UPRICE', 'SUB_TPRICE', 'UDISCOUNT', 'TAMOUNT', 'TPRICE', 'FPRICE', 'TDISCOUNT', 'RECEMONEY', 'REMAMONEY']: text = reformat(text) if prediction in ['AMOUNT', 'SUB_TPRICE', 'UPRICE', 'PRODUCT_NAME']: data['product_id'].append(product_id) else: data['product_id'].append('') data['class_label'].append(prediction) data['text'].append(text) df = pd.DataFrame(data) return df def reformat(text: str): try: text = text.replace('.', '').replace(',', '').replace(':', '').replace('/', '').replace('|', '').replace( '\\', '').replace(')', '').replace('(', '').replace('-', '').replace(';', '').replace('_', '') return int(text) except: return text def find_product(product_name, df): product_name = product_name.lower() product_df = df[df['class_label'] == 'PRODUCT_NAME'] mask = product_df['text'].str.lower().str.contains(product_name, case=False, na=False) if mask.any(): product_id = product_df.loc[mask, 'product_id'].iloc[0] product_info = df[df['product_id'] == product_id] prod_name = product_info.loc[product_info['class_label'] == 'PRODUCT_NAME', 'text'].iloc[0] try: amount = product_info.loc[product_info['class_label'] == 'AMOUNT', 'text'].iloc[0] except: print("Error: cannot find amount") amount = '' try: uprice = product_info.loc[product_info['class_label'] == 'UPRICE', 'text'].iloc[0] except: print("Error: cannot find unit price") uprice = '' try: sub_tprice = product_info.loc[product_info['class_label'] == 'SUB_TPRICE', 'text'].iloc[0] except: print("Error: cannot find sub total price") sub_tprice = '' #print("Sản phẩm: ", product_info.loc[product_info['class_label'] == 'PRODUCT_NAME', 'text'].iloc[0]) #print("Số lượng: ", product_info.loc[product_info['class_label'] == 'AMOUNT', 'text'].iloc[0]) #print("Đơn giá: ", product_info.loc[product_info['class_label'] == 'UPRICE', 'text'].iloc[0]) #print("Thành tiền: ", product_info.loc[product_info['class_label'] == 'SUB_TPRICE', 'text'].iloc[0]) return f"Sản phẩm: {prod_name}\n Số lượng: {amount}\n Đơn giá: {uprice}\n Thành tiền: {sub_tprice}" else: #print("Không tìm thấy item nào phù hợp.") return "Không tìm thấy item nào phù hợp." #return result = product_df['text'].str.contains(product_name, case=False, na=False).any() #return product_df[product_df['text'].str.contains(product_name, case=False, na=False)] def get_info(df): try: shop_name = df.loc[df['class_label'] == 'SHOP_NAME', 'text'].iloc[0] except: print("Error: cannot find shop name") shop_name = '' print("Tên siêu thị: ", shop_name) try: addr = df.loc[df['class_label'] == 'ADDR', 'text'].iloc[0] except: print("Error: cannot find address") addr = '' print("Địa chỉ: ", addr) try: bill_id = df.loc[df['class_label'] == 'BILLID', 'text'].iloc[0] except: print("Error: cannot find bill id") bill_id = '' print("ID hóa đơn: ", bill_id) try: date_time = df.loc[df['class_label'] == 'DATETIME', 'text'].iloc[0] except: print("Error: cannot find date and time") date_time = '' print("Ngày: ", date_time) try: cashier = df.loc[df['class_label'] == 'CASHIER', 'text'].iloc[0] except: print("Error: cannot find cashier") cashier = '' print("Nhân viên: ", cashier) return f"Tên siêu thị: {shop_name}\n Địa chỉ: {addr}\n ID hóa đơn: {bill_id}\n Ngày: {date_time}\n Nhân viên: {cashier}\n"