Spaces:
Running
Running
Delete utils.py
Browse files
utils.py
DELETED
@@ -1,140 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
from datasets import load_metric
|
3 |
-
from PIL import ImageDraw, ImageFont
|
4 |
-
import pandas as pd
|
5 |
-
|
6 |
-
|
7 |
-
metric = load_metric("seqeval")
|
8 |
-
|
9 |
-
|
10 |
-
def unnormalize_box(bbox, width, height):
|
11 |
-
return [
|
12 |
-
width * (bbox[0] / 1000),
|
13 |
-
height * (bbox[1] / 1000),
|
14 |
-
width * (bbox[2] / 1000),
|
15 |
-
height * (bbox[3] / 1000)
|
16 |
-
]
|
17 |
-
|
18 |
-
|
19 |
-
def normalize_box(bbox, width, height):
|
20 |
-
return [
|
21 |
-
int((bbox[0] / width) * 1000),
|
22 |
-
int((bbox[1] / height) * 1000),
|
23 |
-
int((bbox[2] / width) * 1000),
|
24 |
-
int((bbox[3] / height) * 1000)
|
25 |
-
]
|
26 |
-
|
27 |
-
|
28 |
-
def draw_output(image, true_predictions, true_boxes):
|
29 |
-
def iob_to_label(label):
|
30 |
-
label = label
|
31 |
-
if not label:
|
32 |
-
return 'other'
|
33 |
-
return label
|
34 |
-
|
35 |
-
# width, height = image.size
|
36 |
-
|
37 |
-
# predictions = logits.argmax(-1).squeeze().tolist()
|
38 |
-
# is_subword = np.array(offset_mapping)[:,0] != 0
|
39 |
-
# true_predictions = [id2label[pred] for idx, pred in enumerate(predictions) if not is_subword[idx]]
|
40 |
-
# true_boxes = [unnormalize_box(box, width, height) for idx, box in enumerate(token_boxes) if not is_subword[idx]]
|
41 |
-
|
42 |
-
# draw
|
43 |
-
draw = ImageDraw.Draw(image)
|
44 |
-
font = ImageFont.load_default()
|
45 |
-
|
46 |
-
for prediction, box in zip(true_predictions, true_boxes):
|
47 |
-
predicted_label = iob_to_label(prediction).lower()
|
48 |
-
draw.rectangle(box, outline='red')
|
49 |
-
draw.text((box[0] + 10, box[1] - 10),
|
50 |
-
text=predicted_label, fill='red', font=font)
|
51 |
-
|
52 |
-
return image
|
53 |
-
|
54 |
-
|
55 |
-
def create_df(true_texts,
|
56 |
-
true_predictions,
|
57 |
-
chosen_labels=['SHOP_NAME', 'ADDR', 'TITLE', 'PHONE',
|
58 |
-
'PRODUCT_NAME', 'AMOUNT', 'UNIT', 'UPRICE', 'SUB_TPRICE', 'UDISCOUNT',
|
59 |
-
'TAMOUNT', 'TPRICE', 'FPRICE', 'TDISCOUNT',
|
60 |
-
'RECEMONEY', 'REMAMONEY',
|
61 |
-
'BILLID', 'DATETIME', 'CASHIER']
|
62 |
-
):
|
63 |
-
|
64 |
-
data = {'text': [], 'class_label': [], 'product_id': []}
|
65 |
-
product_id = -1
|
66 |
-
for text, prediction in zip(true_texts, true_predictions):
|
67 |
-
if prediction not in chosen_labels:
|
68 |
-
continue
|
69 |
-
|
70 |
-
if prediction == 'PRODUCT_NAME':
|
71 |
-
product_id += 1
|
72 |
-
|
73 |
-
|
74 |
-
if prediction in ['AMOUNT', 'UNIT', 'UDISCOUNT', 'UPRICE', 'SUB_TPRICE',
|
75 |
-
'UDISCOUNT', 'TAMOUNT', 'TPRICE', 'FPRICE', 'TDISCOUNT',
|
76 |
-
'RECEMONEY', 'REMAMONEY']:
|
77 |
-
text = reformat(text)
|
78 |
-
|
79 |
-
|
80 |
-
if prediction in ['AMOUNT', 'SUB_TPRICE', 'UPRICE', 'PRODUCT_NAME']:
|
81 |
-
data['product_id'].append(product_id)
|
82 |
-
else:
|
83 |
-
data['product_id'].append('')
|
84 |
-
|
85 |
-
|
86 |
-
data['class_label'].append(prediction)
|
87 |
-
data['text'].append(text)
|
88 |
-
|
89 |
-
|
90 |
-
df = pd.DataFrame(data)
|
91 |
-
|
92 |
-
return df
|
93 |
-
|
94 |
-
|
95 |
-
def reformat(text: str):
|
96 |
-
try:
|
97 |
-
text = text.replace('.', '').replace(',', '').replace(':', '').replace('/', '').replace('|', '').replace(
|
98 |
-
'\\', '').replace(')', '').replace('(', '').replace('-', '').replace(';', '').replace('_', '')
|
99 |
-
return int(text)
|
100 |
-
except:
|
101 |
-
return text
|
102 |
-
|
103 |
-
def find_product(product_name, df):
|
104 |
-
product_name = product_name.lower()
|
105 |
-
product_df = df[df['class_label'] == 'PRODUCT_NAME']
|
106 |
-
mask = product_df['text'].str.lower().str.contains(product_name, case=False, na=False)
|
107 |
-
if mask.any():
|
108 |
-
product_id = product_df.loc[mask, 'product_id'].iloc[0]
|
109 |
-
product_info = df[df['product_id'] == product_id]
|
110 |
-
|
111 |
-
prod_name = product_info.loc[product_info['class_label'] == 'PRODUCT_NAME', 'text'].iloc[0]
|
112 |
-
|
113 |
-
try:
|
114 |
-
amount = product_info.loc[product_info['class_label'] == 'AMOUNT', 'text'].iloc[0]
|
115 |
-
except:
|
116 |
-
print("Error: cannot find amount")
|
117 |
-
amount = ''
|
118 |
-
|
119 |
-
try:
|
120 |
-
uprice = product_info.loc[product_info['class_label'] == 'UPRICE', 'text'].iloc[0]
|
121 |
-
except:
|
122 |
-
print("Error: cannot find unit price")
|
123 |
-
uprice = ''
|
124 |
-
|
125 |
-
try:
|
126 |
-
sub_tprice = product_info.loc[product_info['class_label'] == 'SUB_TPRICE', 'text'].iloc[0]
|
127 |
-
except:
|
128 |
-
print("Error: cannot find sub total price")
|
129 |
-
sub_tprice = ''
|
130 |
-
|
131 |
-
#print("Sản phẩm: ", product_info.loc[product_info['class_label'] == 'PRODUCT_NAME', 'text'].iloc[0])
|
132 |
-
#print("Số lượng: ", product_info.loc[product_info['class_label'] == 'AMOUNT', 'text'].iloc[0])
|
133 |
-
#print("Đơn giá: ", product_info.loc[product_info['class_label'] == 'UPRICE', 'text'].iloc[0])
|
134 |
-
#print("Thành tiền: ", product_info.loc[product_info['class_label'] == 'SUB_TPRICE', 'text'].iloc[0])
|
135 |
-
return f"Sản phẩm: {prod_name}\n Số lượng: {amount}\n Đơn giá: {uprice}\n Thành tiền: {sub_tprice}"
|
136 |
-
else:
|
137 |
-
#print("Không tìm thấy item nào phù hợp.")
|
138 |
-
return "Không tìm thấy item nào phù hợp."
|
139 |
-
#return result = product_df['text'].str.contains(product_name, case=False, na=False).any()
|
140 |
-
#return product_df[product_df['text'].str.contains(product_name, case=False, na=False)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|