Huy0502 commited on
Commit
2ce2db1
·
verified ·
1 Parent(s): b6ea1b2

Delete utils.py

Browse files
Files changed (1) hide show
  1. utils.py +0 -140
utils.py DELETED
@@ -1,140 +0,0 @@
1
- import numpy as np
2
- from datasets import load_metric
3
- from PIL import ImageDraw, ImageFont
4
- import pandas as pd
5
-
6
-
7
- metric = load_metric("seqeval")
8
-
9
-
10
- def unnormalize_box(bbox, width, height):
11
- return [
12
- width * (bbox[0] / 1000),
13
- height * (bbox[1] / 1000),
14
- width * (bbox[2] / 1000),
15
- height * (bbox[3] / 1000)
16
- ]
17
-
18
-
19
- def normalize_box(bbox, width, height):
20
- return [
21
- int((bbox[0] / width) * 1000),
22
- int((bbox[1] / height) * 1000),
23
- int((bbox[2] / width) * 1000),
24
- int((bbox[3] / height) * 1000)
25
- ]
26
-
27
-
28
- def draw_output(image, true_predictions, true_boxes):
29
- def iob_to_label(label):
30
- label = label
31
- if not label:
32
- return 'other'
33
- return label
34
-
35
- # width, height = image.size
36
-
37
- # predictions = logits.argmax(-1).squeeze().tolist()
38
- # is_subword = np.array(offset_mapping)[:,0] != 0
39
- # true_predictions = [id2label[pred] for idx, pred in enumerate(predictions) if not is_subword[idx]]
40
- # true_boxes = [unnormalize_box(box, width, height) for idx, box in enumerate(token_boxes) if not is_subword[idx]]
41
-
42
- # draw
43
- draw = ImageDraw.Draw(image)
44
- font = ImageFont.load_default()
45
-
46
- for prediction, box in zip(true_predictions, true_boxes):
47
- predicted_label = iob_to_label(prediction).lower()
48
- draw.rectangle(box, outline='red')
49
- draw.text((box[0] + 10, box[1] - 10),
50
- text=predicted_label, fill='red', font=font)
51
-
52
- return image
53
-
54
-
55
- def create_df(true_texts,
56
- true_predictions,
57
- chosen_labels=['SHOP_NAME', 'ADDR', 'TITLE', 'PHONE',
58
- 'PRODUCT_NAME', 'AMOUNT', 'UNIT', 'UPRICE', 'SUB_TPRICE', 'UDISCOUNT',
59
- 'TAMOUNT', 'TPRICE', 'FPRICE', 'TDISCOUNT',
60
- 'RECEMONEY', 'REMAMONEY',
61
- 'BILLID', 'DATETIME', 'CASHIER']
62
- ):
63
-
64
- data = {'text': [], 'class_label': [], 'product_id': []}
65
- product_id = -1
66
- for text, prediction in zip(true_texts, true_predictions):
67
- if prediction not in chosen_labels:
68
- continue
69
-
70
- if prediction == 'PRODUCT_NAME':
71
- product_id += 1
72
-
73
-
74
- if prediction in ['AMOUNT', 'UNIT', 'UDISCOUNT', 'UPRICE', 'SUB_TPRICE',
75
- 'UDISCOUNT', 'TAMOUNT', 'TPRICE', 'FPRICE', 'TDISCOUNT',
76
- 'RECEMONEY', 'REMAMONEY']:
77
- text = reformat(text)
78
-
79
-
80
- if prediction in ['AMOUNT', 'SUB_TPRICE', 'UPRICE', 'PRODUCT_NAME']:
81
- data['product_id'].append(product_id)
82
- else:
83
- data['product_id'].append('')
84
-
85
-
86
- data['class_label'].append(prediction)
87
- data['text'].append(text)
88
-
89
-
90
- df = pd.DataFrame(data)
91
-
92
- return df
93
-
94
-
95
- def reformat(text: str):
96
- try:
97
- text = text.replace('.', '').replace(',', '').replace(':', '').replace('/', '').replace('|', '').replace(
98
- '\\', '').replace(')', '').replace('(', '').replace('-', '').replace(';', '').replace('_', '')
99
- return int(text)
100
- except:
101
- return text
102
-
103
- def find_product(product_name, df):
104
- product_name = product_name.lower()
105
- product_df = df[df['class_label'] == 'PRODUCT_NAME']
106
- mask = product_df['text'].str.lower().str.contains(product_name, case=False, na=False)
107
- if mask.any():
108
- product_id = product_df.loc[mask, 'product_id'].iloc[0]
109
- product_info = df[df['product_id'] == product_id]
110
-
111
- prod_name = product_info.loc[product_info['class_label'] == 'PRODUCT_NAME', 'text'].iloc[0]
112
-
113
- try:
114
- amount = product_info.loc[product_info['class_label'] == 'AMOUNT', 'text'].iloc[0]
115
- except:
116
- print("Error: cannot find amount")
117
- amount = ''
118
-
119
- try:
120
- uprice = product_info.loc[product_info['class_label'] == 'UPRICE', 'text'].iloc[0]
121
- except:
122
- print("Error: cannot find unit price")
123
- uprice = ''
124
-
125
- try:
126
- sub_tprice = product_info.loc[product_info['class_label'] == 'SUB_TPRICE', 'text'].iloc[0]
127
- except:
128
- print("Error: cannot find sub total price")
129
- sub_tprice = ''
130
-
131
- #print("Sản phẩm: ", product_info.loc[product_info['class_label'] == 'PRODUCT_NAME', 'text'].iloc[0])
132
- #print("Số lượng: ", product_info.loc[product_info['class_label'] == 'AMOUNT', 'text'].iloc[0])
133
- #print("Đơn giá: ", product_info.loc[product_info['class_label'] == 'UPRICE', 'text'].iloc[0])
134
- #print("Thành tiền: ", product_info.loc[product_info['class_label'] == 'SUB_TPRICE', 'text'].iloc[0])
135
- return f"Sản phẩm: {prod_name}\n Số lượng: {amount}\n Đơn giá: {uprice}\n Thành tiền: {sub_tprice}"
136
- else:
137
- #print("Không tìm thấy item nào phù hợp.")
138
- return "Không tìm thấy item nào phù hợp."
139
- #return result = product_df['text'].str.contains(product_name, case=False, na=False).any()
140
- #return product_df[product_df['text'].str.contains(product_name, case=False, na=False)]