Spaces:
Running
Running
Update cached embeddings with new sizes, enhance prediction output by adding standard and aggregate unit columns, and refactor loading logic in the sentence transformer service for improved error handling.
Browse files- data/cached_embeddings_abstract.pkl +2 -2
- data/cached_embeddings_name.pkl +2 -2
- data/cached_embeddings_sub_subject.pkl +2 -2
- data/cached_embeddings_subject.pkl +2 -2
- data/cached_embeddings_unit.pkl +2 -2
- data/outputData.csv +0 -0
- data/outputDataUnit.csv +116 -116
- data/unitMapData.csv +58 -58
- routes/predict.py +10 -8
- services/sentence_transformer_service.py +26 -21
data/cached_embeddings_abstract.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f83a38e1839d4169d2e9154428627435bc270485ecbc136a17c4d6c5dfb3e046
|
3 |
+
size 6243374
|
data/cached_embeddings_name.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a0c8ee1c0be9dcd79c7fccd6f67fb51925980e5b45707705f56093f707fc52e
|
3 |
+
size 27426684
|
data/cached_embeddings_sub_subject.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cded7544600ebfb2154c2f36de3268f3ae6b8966c175eb2f57635225ceae0d9a
|
3 |
+
size 978708
|
data/cached_embeddings_subject.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55ff495af4f2403b0f213c5ee2543ab0c50ff24f126e1d6c1579c4d1113d07a4
|
3 |
+
size 562493
|
data/cached_embeddings_unit.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c241a434f1ec36358410bb9782036d5cacb18fd6a766bc1cf78707b08ffa7c0
|
3 |
+
size 369237
|
data/outputData.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
data/outputDataUnit.csv
CHANGED
@@ -1,116 +1,116 @@
|
|
1 |
-
単位,単位文,出力_基準単位,出力_単位類似度,出力_
|
2 |
-
m,単位:m。,m,99.99995827674866,m
|
3 |
-
か所,単位:か所。,か所,99.99998807907104
|
4 |
-
人日,単位:人日。,人日,99.99998807907104
|
5 |
-
式,単位:式。,式,100.00001192092896
|
6 |
-
建発m3,単位:建発m3。,m3,96.60412073135376,m3
|
7 |
-
㎡,単位:㎡。,m2,100.00002384185791,m2
|
8 |
-
往復,単位:往復。,往復,100.0
|
9 |
-
t,単位:t。,t,100.00001192092896,t
|
10 |
-
個,単位:個。,個,100.0
|
11 |
-
組,単位:組。,組,99.99997019767761
|
12 |
-
枚,単位:枚。,枚,99.99998211860657
|
13 |
-
本,単位:本。,本,100.0
|
14 |
-
延㎡,単位:延㎡。,m2,97.71089553833008,m2
|
15 |
-
㎏,単位:㎏。,kg,99.99998807907104,kg
|
16 |
-
台,単位:台。,台,100.00001192092896
|
17 |
-
回,単位:回。,回,99.99995231628418
|
18 |
-
kg,単位:kg。,kg,99.99998807907104,kg
|
19 |
-
トン,単位:トン。,トン,100.00001192092896,t
|
20 |
-
%,単位:%。,%,100.0
|
21 |
-
面,単位:面。,面,99.99995231628418
|
22 |
-
基,単位:基。,基,100.0
|
23 |
-
日,単位:日。,日,100.00001192092896
|
24 |
-
人,単位:人。,人,99.99998211860657
|
25 |
-
検体,単位:検体。,検体,99.99999403953552
|
26 |
-
株,単位:株。,株,99.99999403953552
|
27 |
-
m3,単位:m3。,m3,100.00002384185791,m3
|
28 |
-
セット,単位:セット。,セット,99.99998807907104
|
29 |
-
架㎡,単位:架㎡。,m2,98.18716049194336,m2
|
30 |
-
床㎡,単位:床㎡。,m2,97.14738130569458,m2
|
31 |
-
室,単位:室。,室,100.0
|
32 |
-
m2,単位:m2。,m2,100.00004768371582,m2
|
33 |
-
セット,単位:セット。,セット,99.99997615814209
|
34 |
-
|
35 |
-
t,単位:t。,t,100.0,t
|
36 |
-
帖,単位:帖。,帖,100.0
|
37 |
-
L,単位:L。,L,99.99998807907104,L
|
38 |
-
業務,単位:業務。,業務,100.0
|
39 |
-
利権者,単位:利権者。,利権者,99.99999403953552
|
40 |
-
棟,単位:棟。,棟,100.0
|
41 |
-
部,単位:部。,部,100.00001192092896
|
42 |
-
点,単位:点。,点,99.99998807907104
|
43 |
-
月,単位:月。,月,100.0
|
44 |
-
1,単位:1。,1,100.00002384185791
|
45 |
-
1工事,単位:1工事。,1工事,99.99998211860657
|
46 |
-
脚,単位:脚。,脚,99.99998807907104
|
47 |
-
台月,単位:台月。,台月,99.99998807907104
|
48 |
-
m,単位:m。,m,99.99996423721313,m
|
49 |
-
一式,単位:一式。,一式,100.0
|
50 |
-
ユニット,単位:ユニット。,ユニット,100.00001192092896
|
51 |
-
張,単位:張。,張,100.00001192092896
|
52 |
-
POT,単位:POT。,POT,100.00001192092896
|
53 |
-
軒,単位:軒。,軒,100.00001192092896
|
54 |
-
鉢,単位:鉢。,鉢,100.00001192092896
|
55 |
-
畳,単位:畳。,畳,100.00001192092896
|
56 |
-
建新共人日,単位:建新共人日。,人日,95.0712502002716
|
57 |
-
建新共日,単位:建新共日。,日,93.64779591560364
|
58 |
-
建新共か所,単位:建新共か所。,か所,95.1797604560852
|
59 |
-
件,単位:件。,件,99.99998807907104
|
60 |
-
建新式,単位:建新式。,式,96.09522223472595
|
61 |
-
建新㎡,単位:建新㎡。,m2,98.18736910820007,m2
|
62 |
-
建新本,単位:建新本。,本,96.02546691894531
|
63 |
-
建新m3,単位:建新m3。,m3,98.15722107887268,m3
|
64 |
-
建新t,単位:建新t。,t,97.09546566009521,t
|
65 |
-
建新か所,単位:建新か所。,か所,97.41055965423584
|
66 |
-
建新m,単位:建新m。,m,94.88064050674438,m
|
67 |
-
建新鉄t,単位:建新鉄t。,t,96.54421806335449,t
|
68 |
-
建新鉄式,単位:建新鉄式。,式,94.90609765052795
|
69 |
-
建新鉄㎡,単位:建新鉄㎡。,m2,96.9235897064209,m2
|
70 |
-
建新鉄か所,単位:建新鉄か所。,か所,92.79875755310059
|
71 |
-
電新台,単位:電新台。,台,94.87690925598145
|
72 |
-
電新個,単位:電新個。,個,96.62517309188843
|
73 |
-
電新式,単位:電新式。,式,93.80441904067993
|
74 |
-
電新組,単位:電新組。,組,95.89881300926208
|
75 |
-
電新基,単位:電新基。,基,95.84553837776184
|
76 |
-
電新面,単位:電新面。,面,95.46024799346924
|
77 |
-
機新台,単位:機新台。,台,95.1753318309784
|
78 |
-
機新式,単位:機新式。,式,92.62499809265137
|
79 |
-
機新個,単位:機新個。,個,95.99997997283936
|
80 |
-
機新m,単位:機新m。,m,95.33501863479614,m
|
81 |
-
機新か所,単位:機新か所。,か所,96.62094712257385
|
82 |
-
機新組,単位:機新組。,組,96.22829556465149
|
83 |
-
機新枚,単位:機新枚。,枚,97.6352870464325
|
84 |
-
対,単位:対。,対,99.99999403953552
|
85 |
-
建新共m,単位:建新共m。,m2,88.79318833351135,m2
|
86 |
-
建新共㎡,単位:建新共㎡。,m2,97.5206732749939,m2
|
87 |
-
建新共台,単位:建新共台。,台,93.8029944896698
|
88 |
-
建新往復,単位:建新往復。,往復,98.01614284515381
|
89 |
-
建新組,単位:建新組。,組,96.89183235168457
|
90 |
-
建新回,単位:建新回。,回,96.91262245178223
|
91 |
-
建新鉄m,単位:建新鉄m。,m,94.20540928840637,m
|
92 |
-
建新鉄本,単位:建新鉄本。,鉄本,96.31228446960449
|
93 |
-
建新kg,単位:建新kg。,kg,97.65897393226624,kg
|
94 |
-
建新鉄,単位:建新鉄。,鉄,95.79679369926453
|
95 |
-
建新基,単位:建新基。,基,96.83878421783447
|
96 |
-
建新枚,単位:建新枚。,枚,97.5138783454895
|
97 |
-
建新1,単位:建新1。,1,91.50975942611694
|
98 |
-
建そ㎡,単位:建そ㎡。,m2,98.02416563034058,m2
|
99 |
-
建そか所,単位:建そか所。,か所,84.75893139839172
|
100 |
-
建そm,単位:建そm。,m,94.04593706130981,m
|
101 |
-
建そ式,単位:建そ式。,式,94.24981474876404
|
102 |
-
建そ本,単位:建そ本。,本,94.68826055526733
|
103 |
-
建そm3,単位:建そm3。,m3,97.72946834564209,m3
|
104 |
-
建そ株,単位:建そ株。,株,97.26487994194031
|
105 |
-
m3,単位:m3。,m3,100.00002384185791,m3
|
106 |
-
建発t,単位:建発t。,t,95.43354511260986,t
|
107 |
-
建発kg,単位:建発kg。,kg,95.56557536125183,kg
|
108 |
-
建そm3,単位:建そm3。,m3,97.72946834564209,m3
|
109 |
-
基日,単位:基日。,基日,100.0
|
110 |
-
人月,単位:人月。,人月,100.0
|
111 |
-
台日,単位:台日。,台日,100.00002384185791
|
112 |
-
箱,単位:箱。,箱,99.99998807907104
|
113 |
-
m3,単位:m3。,m3,100.00002384185791,m3
|
114 |
-
巻,単位:巻。,巻,99.99997615814209
|
115 |
-
型,単位:型。,型,99.99998211860657
|
116 |
-
F,単位:F。,F,99.99998211860657,F
|
|
|
1 |
+
単位,単位文,出力_基準単位,出力_単位類似度,出力_集計用単位,出力_標準単位
|
2 |
+
m,単位:m。,m,99.99995827674866,m,m
|
3 |
+
か所,単位:か所。,か所,99.99998807907104,か所・個・回・本・式,か所
|
4 |
+
人日,単位:人日。,人日,99.99998807907104,人日,人日
|
5 |
+
式,単位:式。,式,100.00001192092896,か所・個・回・本・式,式
|
6 |
+
建発m3,単位:建発m3。,m3,96.60412073135376,m3,m3
|
7 |
+
㎡,単位:㎡。,m2,100.00002384185791,m2,m2
|
8 |
+
往復,単位:往復。,往復,100.0,か所・個・回・本・式,往復
|
9 |
+
t,単位:t。,t,100.00001192092896,t,t
|
10 |
+
個,単位:個。,個,100.0,か所・個・回・本・式,個
|
11 |
+
組,単位:組。,組,99.99997019767761,か所・個・回・本・式,組
|
12 |
+
枚,単位:枚。,枚,99.99998211860657,か所・個・回・本・式,枚
|
13 |
+
本,単位:本。,本,100.0,か所・個・回・本・式,本
|
14 |
+
延㎡,単位:延㎡。,m2,97.71089553833008,m2,m2
|
15 |
+
㎏,単位:㎏。,kg,99.99998807907104,kg,kg
|
16 |
+
台,単位:台。,台,100.00001192092896,か所・個・回・本・式,台
|
17 |
+
回,単位:回。,回,99.99995231628418,か所・個・回・本・式,回
|
18 |
+
kg,単位:kg。,kg,99.99998807907104,kg,kg
|
19 |
+
トン,単位:トン。,トン,100.00001192092896,t,t
|
20 |
+
%,単位:%。,%,100.0,%,%
|
21 |
+
面,単位:面。,面,99.99995231628418,か所・個・回・本・式,面
|
22 |
+
基,単位:基。,基,100.0,か所・個・回・本・式,基
|
23 |
+
日,単位:日。,日,100.00001192092896,日,日
|
24 |
+
人,単位:人。,人,99.99998211860657,人,人
|
25 |
+
検体,単位:検体。,検体,99.99999403953552,か所・個・回・本・式,検体
|
26 |
+
株,単位:株。,株,99.99999403953552,か所・個・回・本・式,株
|
27 |
+
m3,単位:m3。,m3,100.00002384185791,m3,m3
|
28 |
+
セット,単位:セット。,セット,99.99998807907104,か所・個・回・本・式,セット
|
29 |
+
架㎡,単位:架㎡。,m2,98.18716049194336,m2,m2
|
30 |
+
床㎡,単位:床㎡。,m2,97.14738130569458,m2,m2
|
31 |
+
室,単位:室。,室,100.0,か所・個・回・本・式,室
|
32 |
+
m2,単位:m2。,m2,100.00004768371582,m2,m2
|
33 |
+
セット,単位:セット。,セット,99.99997615814209,か所・個・回・本・式,セット
|
34 |
+
箇所,単��:箇所。,箇所,100.00003576278687,か所・個・回・本・式,箇所
|
35 |
+
t,単位:t。,t,100.0,t,t
|
36 |
+
帖,単位:帖。,帖,100.0,か所・個・回・本・式,帖
|
37 |
+
L,単位:L。,L,99.99998807907104,L,L
|
38 |
+
業務,単位:業務。,業務,100.0,か所・個・回・本・式,業務
|
39 |
+
利権者,単位:利権者。,利権者,99.99999403953552,人,人
|
40 |
+
棟,単位:棟。,棟,100.0,か所・個・回・本・式,棟
|
41 |
+
部,単位:部。,部,100.00001192092896,か所・個・回・本・式,部
|
42 |
+
点,単位:点。,点,99.99998807907104,か所・個・回・本・式,点
|
43 |
+
月,単位:月。,月,100.0,月,月
|
44 |
+
1,単位:1。,1,100.00002384185791,か所・個・回・本・式,個
|
45 |
+
1工事,単位:1工事。,1工事,99.99998211860657,か所・個・回・本・式,工事
|
46 |
+
脚,単位:脚。,脚,99.99998807907104,か所・個・回・本・式,脚
|
47 |
+
台月,単位:台月。,台月,99.99998807907104,台日,台日
|
48 |
+
m,単位:m。,m,99.99996423721313,m,m
|
49 |
+
一式,単位:一式。,一式,100.0,か所・個・回・本・式,一式
|
50 |
+
ユニット,単位:ユニット。,ユニット,100.00001192092896,か所・個・回・本・式,ユニット
|
51 |
+
張,単位:張。,張,100.00001192092896,か所・個・回・本・式,張
|
52 |
+
POT,単位:POT。,POT,100.00001192092896,か所・個・回・本・式,POT
|
53 |
+
軒,単位:軒。,軒,100.00001192092896,か所・個・回・本・式,軒
|
54 |
+
鉢,単位:鉢。,鉢,100.00001192092896,か所・個・回・本・式,鉢
|
55 |
+
畳,単位:畳。,畳,100.00001192092896,か所・個・回・本・式,畳
|
56 |
+
建新共人日,単位:建新共人日。,人日,95.0712502002716,人日,人日
|
57 |
+
建新共日,単位:建新共日。,日,93.64779591560364,日,日
|
58 |
+
建新共か所,単位:建新共か所。,か所,95.1797604560852,か所・個・回・本・式,か所
|
59 |
+
件,単位:件。,件,99.99998807907104,か所・個・回・本・式,件
|
60 |
+
建新式,単位:建新式。,式,96.09522223472595,か所・個・回・本・式,式
|
61 |
+
建新㎡,単位:建新㎡。,m2,98.18736910820007,m2,m2
|
62 |
+
建新本,単位:建新本。,本,96.02546691894531,か所・個・回・本・式,本
|
63 |
+
建新m3,単位:建新m3。,m3,98.15722107887268,m3,m3
|
64 |
+
建新t,単位:建新t。,t,97.09546566009521,t,t
|
65 |
+
建新か所,単位:建新か所。,か所,97.41055965423584,か所・個・回・本・式,か所
|
66 |
+
建新m,単位:建新m。,m,94.88064050674438,m,m
|
67 |
+
建新鉄t,単位:建新鉄t。,t,96.54421806335449,t,t
|
68 |
+
建新鉄式,単位:建新鉄式。,式,94.90609765052795,か所・個・回・本・式,式
|
69 |
+
建新鉄㎡,単位:建新鉄㎡。,m2,96.9235897064209,m2,m2
|
70 |
+
建新鉄か所,単位:建新鉄か所。,か所,92.79875755310059,か所・個・回・本・式,か所
|
71 |
+
電新台,単位:電新台。,台,94.87690925598145,か所・個・回・本・式,台
|
72 |
+
電新個,単位:電新個。,個,96.62517309188843,か所・個・回・本・式,個
|
73 |
+
電新式,単位:電新式。,式,93.80441904067993,か所・個・回・本・式,式
|
74 |
+
電新組,単位:電新組。,組,95.89881300926208,か所・個・回・本・式,組
|
75 |
+
電新基,単位:電新基。,基,95.84553837776184,か所・個・回・本・式,基
|
76 |
+
電新面,単位:電新面。,面,95.46024799346924,か所・個・回・本・式,面
|
77 |
+
機新台,単位:機新台。,台,95.1753318309784,か所・個・回・本・式,台
|
78 |
+
機新式,単位:機新式。,式,92.62499809265137,か所・個・回・本・式,式
|
79 |
+
機新個,単位:機新個。,個,95.99997997283936,か所・個・回・本・式,個
|
80 |
+
機新m,単位:機新m。,m,95.33501863479614,m,m
|
81 |
+
機新か所,単位:機新か所。,か所,96.62094712257385,か所・個・回・本・式,か所
|
82 |
+
機新組,単位:機新組。,組,96.22829556465149,か所・個・回・本・式,組
|
83 |
+
機新枚,単位:機新枚。,枚,97.6352870464325,か所・個・回・本・式,枚
|
84 |
+
対,単位:対。,対,99.99999403953552,か所・個・回・本・式,対
|
85 |
+
建新共m,単位:建新共m。,m2,88.79318833351135,m2,m2
|
86 |
+
建新共㎡,単位:建新共㎡。,m2,97.5206732749939,m2,m2
|
87 |
+
建新共台,単位:建新共台。,台,93.8029944896698,か所・個・回・本・式,台
|
88 |
+
建新往復,単位:建新往復。,往復,98.01614284515381,か所・個・回・本・式,往復
|
89 |
+
建新組,単位:建新組。,組,96.89183235168457,か所・個・回・本・式,組
|
90 |
+
建新回,単位:建新回。,回,96.91262245178223,か所・個・回・本・式,回
|
91 |
+
建新鉄m,単位:建新鉄m。,m,94.20540928840637,m,m
|
92 |
+
建新鉄本,単位:建新鉄本。,鉄本,96.31228446960449,か所・個・回・本・式,鉄本
|
93 |
+
建新kg,単位:建新kg。,kg,97.65897393226624,kg,kg
|
94 |
+
建新鉄,単位:建新鉄。,鉄,95.79679369926453,か所・個・回・本・式,鉄
|
95 |
+
建新基,単位:建新基。,基,96.83878421783447,か所・個・回・本・式,基
|
96 |
+
建新枚,単位:建新枚。,枚,97.5138783454895,か所・個・回・本・式,枚
|
97 |
+
建新1,単位:建新1。,1,91.50975942611694,か所・個・回・本・式,個
|
98 |
+
建そ㎡,単位:建そ㎡。,m2,98.02416563034058,m2,m2
|
99 |
+
建そか所,単位:建そか所。,か所,84.75893139839172,か所・個・回・本・式,か所
|
100 |
+
建そm,単位:建そm。,m,94.04593706130981,m,m
|
101 |
+
建そ式,単位:建そ式。,式,94.24981474876404,か所・個・回・本・式,式
|
102 |
+
建そ本,単位:建そ本。,本,94.68826055526733,か所・個・回・本・式,本
|
103 |
+
建そm3,単位:建そm3。,m3,97.72946834564209,m3,m3
|
104 |
+
建そ株,単位:建そ株。,株,97.26487994194031,か所・個・回・本・式,株
|
105 |
+
m3,単位:m3。,m3,100.00002384185791,m3,m3
|
106 |
+
建発t,単位:建発t。,t,95.43354511260986,t,t
|
107 |
+
建発kg,単位:建発kg。,kg,95.56557536125183,kg,kg
|
108 |
+
建そm3,単位:建そm3。,m3,97.72946834564209,m3,m3
|
109 |
+
基日,単位:基日。,基日,100.0,基日,基日
|
110 |
+
人月,単位:人月。,人月,100.0,人日,人日
|
111 |
+
台日,単位:台日。,台日,100.00002384185791,台日,台日
|
112 |
+
箱,単位:箱。,箱,99.99998807907104,か所・個・回・本・式,箱
|
113 |
+
m3,単位:m3。,m3,100.00002384185791,m3,m3
|
114 |
+
巻,単位:巻。,巻,99.99997615814209,か所・個・回・本・式,巻
|
115 |
+
型,単位:型。,型,99.99998211860657,か所・個・回・本・式,型
|
116 |
+
F,単位:F。,F,99.99998211860657,F,F
|
data/unitMapData.csv
CHANGED
@@ -1,58 +1,58 @@
|
|
1 |
-
|
2 |
-
m,m
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
m3,m3
|
7 |
-
m2,m2
|
8 |
-
|
9 |
-
t,t
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
kg,kg
|
15 |
-
|
16 |
-
|
17 |
-
トン,t
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
L,L
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
1
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
m,m
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
POT,か所・個・回・本・式
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
F,F
|
|
|
1 |
+
基準単位,標準単位,集計用単位
|
2 |
+
m,m,m
|
3 |
+
か所,か所,か所・個・回・本・式
|
4 |
+
人日,人日,人日
|
5 |
+
式,式,か所・個・回・本・式
|
6 |
+
m3,m3,m3
|
7 |
+
m2,m2,m2
|
8 |
+
往復,往復,か所・個・回・本・式
|
9 |
+
t,t,t
|
10 |
+
個,個,か所・個・回・本・式
|
11 |
+
組,組,か所・個・回・本・式
|
12 |
+
枚,枚,か所・個・回・本・式
|
13 |
+
本,本,か所・個・回・本・式
|
14 |
+
kg,kg,kg
|
15 |
+
台,台,か所・個・回・本・式
|
16 |
+
回,回,か所・個・回・本・式
|
17 |
+
トン,t,t
|
18 |
+
%,%,%
|
19 |
+
面,面,か所・個・回・本・式
|
20 |
+
基,基,か所・個・回・本・式
|
21 |
+
日,日,日
|
22 |
+
人,人,人
|
23 |
+
検体,検体,か所・個・回・本・式
|
24 |
+
株,株,か所・個・回・本・式
|
25 |
+
セット,セット,か所・個・回・本・式
|
26 |
+
室,室,か所・個・回・本・式
|
27 |
+
箇所,箇所,か所・個・回・本・式
|
28 |
+
帖,帖,か所・個・回・本・式
|
29 |
+
L,L,L
|
30 |
+
業務,業務,か所・個・回・本・式
|
31 |
+
利権者,人,人
|
32 |
+
棟,棟,か所・個・回・本・式
|
33 |
+
部,部,か所・個・回・本・式
|
34 |
+
点,点,か所・個・回・本・式
|
35 |
+
月,月,月
|
36 |
+
1,個,か所・個・回・本・式
|
37 |
+
1工事,工事,か所・個・回・本・式
|
38 |
+
脚,脚,か所・個・回・本・式
|
39 |
+
台月,台日,台日
|
40 |
+
m,m,m
|
41 |
+
一式,一式,か所・個・回・本・式
|
42 |
+
ユニット,ユニット,か所・個・回・本・式
|
43 |
+
張,張,か所・個・回・本・式
|
44 |
+
POT,POT,か所・個・回・本・式
|
45 |
+
軒,軒,か所・個・回・本・式
|
46 |
+
鉢,鉢,か所・個・回・本・式
|
47 |
+
畳,畳,か所・個・回・本・式
|
48 |
+
件,件,か所・個・回・本・式
|
49 |
+
対,対,か所・個・回・本・式
|
50 |
+
鉄本,鉄本,か所・個・回・本・式
|
51 |
+
鉄,鉄,か所・個・回・本・式
|
52 |
+
基日,基日,基日
|
53 |
+
人月,人日,人日
|
54 |
+
台日,台日,台日
|
55 |
+
箱,箱,か所・個・回・本・式
|
56 |
+
巻,巻,か所・個・回・本・式
|
57 |
+
型,型,か所・個・回・本・式
|
58 |
+
F,F,F
|
routes/predict.py
CHANGED
@@ -257,7 +257,8 @@ async def predict(
|
|
257 |
"出力_科目": "",
|
258 |
"出力_中科目": "",
|
259 |
"出力_項目名": "",
|
260 |
-
"出力_
|
|
|
261 |
"出力_確率度": 0.0,
|
262 |
}
|
263 |
|
@@ -284,13 +285,13 @@ async def predict(
|
|
284 |
elif "出力_基準名称" in df_output_data.columns:
|
285 |
df_output_data["出力_項目名"] = df_output_data["出力_基準名称"]
|
286 |
|
287 |
-
# 出力_
|
|
|
|
|
|
|
|
|
288 |
if "出力_集計用単位" in df_output_data.columns:
|
289 |
-
df_output_data["出力_
|
290 |
-
elif "出力_標準単位" in df_output_data.columns:
|
291 |
-
df_output_data["出力_単位"] = df_output_data["出力_標準単位"]
|
292 |
-
elif "出力_基準単位" in df_output_data.columns:
|
293 |
-
df_output_data["出力_単位"] = df_output_data["出力_基準単位"]
|
294 |
|
295 |
# 出力_確率度 mapping - use the name similarity as main probability
|
296 |
if "出力_名称類似度" in df_output_data.columns:
|
@@ -347,8 +348,9 @@ async def predict(
|
|
347 |
"出力_科目",
|
348 |
"出力_中科目",
|
349 |
"出力_項目名",
|
350 |
-
"出力_単位",
|
351 |
"出力_確率度",
|
|
|
|
|
352 |
]
|
353 |
|
354 |
# Save with utf_8_sig encoding for Japanese Excel compatibility
|
|
|
257 |
"出力_科目": "",
|
258 |
"出力_中科目": "",
|
259 |
"出力_項目名": "",
|
260 |
+
"出力_標準単位": "",
|
261 |
+
"出力_集計用単位": "",
|
262 |
"出力_確率度": 0.0,
|
263 |
}
|
264 |
|
|
|
285 |
elif "出力_基準名称" in df_output_data.columns:
|
286 |
df_output_data["出力_項目名"] = df_output_data["出力_基準名称"]
|
287 |
|
288 |
+
# 出力_標準単位 mapping - use unit mapper result
|
289 |
+
if "出力_標準単位" in df_output_data.columns:
|
290 |
+
df_output_data["出力_標準単位"] = df_output_data["出力_標準単位"]
|
291 |
+
|
292 |
+
# 出力_集計用単位 mapping - use unit mapper result
|
293 |
if "出力_集計用単位" in df_output_data.columns:
|
294 |
+
df_output_data["出力_集計用単位"] = df_output_data["出力_集計用単位"]
|
|
|
|
|
|
|
|
|
295 |
|
296 |
# 出力_確率度 mapping - use the name similarity as main probability
|
297 |
if "出力_名称類似度" in df_output_data.columns:
|
|
|
348 |
"出力_科目",
|
349 |
"出力_中科目",
|
350 |
"出力_項目名",
|
|
|
351 |
"出力_確率度",
|
352 |
+
"出力_標準単位",
|
353 |
+
"出力_集計用単位",
|
354 |
]
|
355 |
|
356 |
# Save with utf_8_sig encoding for Japanese Excel compatibility
|
services/sentence_transformer_service.py
CHANGED
@@ -53,7 +53,7 @@ def load_cached_embeddings_by_type(cache_type):
|
|
53 |
cache_file = cache_files.get(cache_type)
|
54 |
if not cache_file:
|
55 |
print(f"Unknown cache type: {cache_type}")
|
56 |
-
return {}
|
57 |
|
58 |
if os.path.exists(cache_file):
|
59 |
try:
|
@@ -62,15 +62,15 @@ def load_cached_embeddings_by_type(cache_type):
|
|
62 |
print(
|
63 |
f"Loaded {cache_type} embeddings with {len(cached_embeddings)} entries from {cache_file}"
|
64 |
)
|
65 |
-
return cached_embeddings
|
66 |
except Exception as e:
|
67 |
print(f"Error loading {cache_type} embeddings: {e}")
|
68 |
-
return {}
|
69 |
else:
|
70 |
print(
|
71 |
f"No {cache_type} embeddings cache file found. Starting with empty cache."
|
72 |
)
|
73 |
-
return {}
|
74 |
|
75 |
|
76 |
def save_cached_embeddings_by_type(cached_embedding_helper, cache_type):
|
@@ -103,10 +103,10 @@ def save_cached_embeddings_by_type(cached_embedding_helper, cache_type):
|
|
103 |
|
104 |
def create_cached_embedding_helper_for_type(sentence_transformer, cache_type):
|
105 |
"""Create a CachedEmbeddingHelper for specific embedding type"""
|
106 |
-
cached_embeddings = load_cached_embeddings_by_type(cache_type)
|
107 |
return CachedEmbeddingHelper(
|
108 |
sentence_transformer, cached_sentence_embeddings=cached_embeddings
|
109 |
-
)
|
110 |
|
111 |
|
112 |
class SentenceTransformerService:
|
@@ -115,10 +115,15 @@ class SentenceTransformerService:
|
|
115 |
|
116 |
# Different cached embedding helpers for different types
|
117 |
self.unit_cached_embedding_helper = None
|
|
|
118 |
self.subject_cached_embedding_helper = None
|
|
|
119 |
self.sub_subject_cached_embedding_helper = None
|
|
|
120 |
self.name_cached_embedding_helper = None
|
|
|
121 |
self.abstract_cached_embedding_helper = None
|
|
|
122 |
|
123 |
# Map data holders
|
124 |
self.df_unit_map_data = None
|
@@ -144,21 +149,21 @@ class SentenceTransformerService:
|
|
144 |
)
|
145 |
|
146 |
# Create different cached embedding helpers for different types
|
147 |
-
self.unit_cached_embedding_helper = create_cached_embedding_helper_for_type(
|
148 |
self.sentenceTransformerHelper, "unit"
|
149 |
)
|
150 |
-
self.subject_cached_embedding_helper = create_cached_embedding_helper_for_type(
|
151 |
self.sentenceTransformerHelper, "subject"
|
152 |
)
|
153 |
-
self.sub_subject_cached_embedding_helper = (
|
154 |
create_cached_embedding_helper_for_type(
|
155 |
self.sentenceTransformerHelper, "sub_subject"
|
156 |
)
|
157 |
)
|
158 |
-
self.name_cached_embedding_helper = create_cached_embedding_helper_for_type(
|
159 |
self.sentenceTransformerHelper, "name"
|
160 |
)
|
161 |
-
self.abstract_cached_embedding_helper = create_cached_embedding_helper_for_type(
|
162 |
self.sentenceTransformerHelper, "abstract"
|
163 |
)
|
164 |
|
@@ -250,23 +255,23 @@ class SentenceTransformerService:
|
|
250 |
def save_all_caches(self):
|
251 |
"""Save all cached embeddings"""
|
252 |
try:
|
253 |
-
if self.
|
254 |
save_cached_embeddings_by_type(
|
255 |
self.unit_cached_embedding_helper, "unit"
|
256 |
)
|
257 |
-
if self.
|
258 |
save_cached_embeddings_by_type(
|
259 |
self.subject_cached_embedding_helper, "subject"
|
260 |
)
|
261 |
-
if self.
|
262 |
save_cached_embeddings_by_type(
|
263 |
self.sub_subject_cached_embedding_helper, "sub_subject"
|
264 |
)
|
265 |
-
if self.
|
266 |
save_cached_embeddings_by_type(
|
267 |
self.name_cached_embedding_helper, "name"
|
268 |
)
|
269 |
-
if self.
|
270 |
save_cached_embeddings_by_type(
|
271 |
self.abstract_cached_embedding_helper, "abstract"
|
272 |
)
|
@@ -277,35 +282,35 @@ class SentenceTransformerService:
|
|
277 |
print("=" * 60)
|
278 |
|
279 |
total_cache_size = 0
|
280 |
-
if self.
|
281 |
unit_size = len(
|
282 |
self.unit_cached_embedding_helper._cached_sentence_embeddings
|
283 |
)
|
284 |
total_cache_size += unit_size
|
285 |
print(f"Unit cache: {unit_size} embeddings")
|
286 |
|
287 |
-
if self.
|
288 |
subject_size = len(
|
289 |
self.subject_cached_embedding_helper._cached_sentence_embeddings
|
290 |
)
|
291 |
total_cache_size += subject_size
|
292 |
print(f"Subject cache: {subject_size} embeddings")
|
293 |
|
294 |
-
if self.
|
295 |
sub_subject_size = len(
|
296 |
self.sub_subject_cached_embedding_helper._cached_sentence_embeddings
|
297 |
)
|
298 |
total_cache_size += sub_subject_size
|
299 |
print(f"Sub-subject cache: {sub_subject_size} embeddings")
|
300 |
|
301 |
-
if self.
|
302 |
name_size = len(
|
303 |
self.name_cached_embedding_helper._cached_sentence_embeddings
|
304 |
)
|
305 |
total_cache_size += name_size
|
306 |
print(f"Name cache: {name_size} embeddings")
|
307 |
|
308 |
-
if self.
|
309 |
abstract_size = len(
|
310 |
self.abstract_cached_embedding_helper._cached_sentence_embeddings
|
311 |
)
|
|
|
53 |
cache_file = cache_files.get(cache_type)
|
54 |
if not cache_file:
|
55 |
print(f"Unknown cache type: {cache_type}")
|
56 |
+
return {}, False
|
57 |
|
58 |
if os.path.exists(cache_file):
|
59 |
try:
|
|
|
62 |
print(
|
63 |
f"Loaded {cache_type} embeddings with {len(cached_embeddings)} entries from {cache_file}"
|
64 |
)
|
65 |
+
return cached_embeddings, True
|
66 |
except Exception as e:
|
67 |
print(f"Error loading {cache_type} embeddings: {e}")
|
68 |
+
return {}, False
|
69 |
else:
|
70 |
print(
|
71 |
f"No {cache_type} embeddings cache file found. Starting with empty cache."
|
72 |
)
|
73 |
+
return {}, False
|
74 |
|
75 |
|
76 |
def save_cached_embeddings_by_type(cached_embedding_helper, cache_type):
|
|
|
103 |
|
104 |
def create_cached_embedding_helper_for_type(sentence_transformer, cache_type):
|
105 |
"""Create a CachedEmbeddingHelper for specific embedding type"""
|
106 |
+
cached_embeddings, is_loaded = load_cached_embeddings_by_type(cache_type)
|
107 |
return CachedEmbeddingHelper(
|
108 |
sentence_transformer, cached_sentence_embeddings=cached_embeddings
|
109 |
+
), is_loaded
|
110 |
|
111 |
|
112 |
class SentenceTransformerService:
|
|
|
115 |
|
116 |
# Different cached embedding helpers for different types
|
117 |
self.unit_cached_embedding_helper = None
|
118 |
+
self.unit_is_loaded = False
|
119 |
self.subject_cached_embedding_helper = None
|
120 |
+
self.subject_is_loaded = False
|
121 |
self.sub_subject_cached_embedding_helper = None
|
122 |
+
self.sub_subject_is_loaded = False
|
123 |
self.name_cached_embedding_helper = None
|
124 |
+
self.name_is_loaded = False
|
125 |
self.abstract_cached_embedding_helper = None
|
126 |
+
self.abstract_is_loaded = False
|
127 |
|
128 |
# Map data holders
|
129 |
self.df_unit_map_data = None
|
|
|
149 |
)
|
150 |
|
151 |
# Create different cached embedding helpers for different types
|
152 |
+
self.unit_cached_embedding_helper, self.unit_is_loaded = create_cached_embedding_helper_for_type(
|
153 |
self.sentenceTransformerHelper, "unit"
|
154 |
)
|
155 |
+
self.subject_cached_embedding_helper, self.subject_is_loaded = create_cached_embedding_helper_for_type(
|
156 |
self.sentenceTransformerHelper, "subject"
|
157 |
)
|
158 |
+
self.sub_subject_cached_embedding_helper, self.sub_subject_is_loaded = (
|
159 |
create_cached_embedding_helper_for_type(
|
160 |
self.sentenceTransformerHelper, "sub_subject"
|
161 |
)
|
162 |
)
|
163 |
+
self.name_cached_embedding_helper, self.name_is_loaded = create_cached_embedding_helper_for_type(
|
164 |
self.sentenceTransformerHelper, "name"
|
165 |
)
|
166 |
+
self.abstract_cached_embedding_helper, self.abstract_is_loaded = create_cached_embedding_helper_for_type(
|
167 |
self.sentenceTransformerHelper, "abstract"
|
168 |
)
|
169 |
|
|
|
255 |
def save_all_caches(self):
|
256 |
"""Save all cached embeddings"""
|
257 |
try:
|
258 |
+
if not self.unit_is_loaded:
|
259 |
save_cached_embeddings_by_type(
|
260 |
self.unit_cached_embedding_helper, "unit"
|
261 |
)
|
262 |
+
if not self.subject_is_loaded:
|
263 |
save_cached_embeddings_by_type(
|
264 |
self.subject_cached_embedding_helper, "subject"
|
265 |
)
|
266 |
+
if not self.sub_subject_is_loaded:
|
267 |
save_cached_embeddings_by_type(
|
268 |
self.sub_subject_cached_embedding_helper, "sub_subject"
|
269 |
)
|
270 |
+
if not self.name_is_loaded:
|
271 |
save_cached_embeddings_by_type(
|
272 |
self.name_cached_embedding_helper, "name"
|
273 |
)
|
274 |
+
if not self.abstract_is_loaded:
|
275 |
save_cached_embeddings_by_type(
|
276 |
self.abstract_cached_embedding_helper, "abstract"
|
277 |
)
|
|
|
282 |
print("=" * 60)
|
283 |
|
284 |
total_cache_size = 0
|
285 |
+
if not self.unit_is_loaded:
|
286 |
unit_size = len(
|
287 |
self.unit_cached_embedding_helper._cached_sentence_embeddings
|
288 |
)
|
289 |
total_cache_size += unit_size
|
290 |
print(f"Unit cache: {unit_size} embeddings")
|
291 |
|
292 |
+
if not self.subject_is_loaded:
|
293 |
subject_size = len(
|
294 |
self.subject_cached_embedding_helper._cached_sentence_embeddings
|
295 |
)
|
296 |
total_cache_size += subject_size
|
297 |
print(f"Subject cache: {subject_size} embeddings")
|
298 |
|
299 |
+
if not self.sub_subject_is_loaded:
|
300 |
sub_subject_size = len(
|
301 |
self.sub_subject_cached_embedding_helper._cached_sentence_embeddings
|
302 |
)
|
303 |
total_cache_size += sub_subject_size
|
304 |
print(f"Sub-subject cache: {sub_subject_size} embeddings")
|
305 |
|
306 |
+
if not self.name_is_loaded:
|
307 |
name_size = len(
|
308 |
self.name_cached_embedding_helper._cached_sentence_embeddings
|
309 |
)
|
310 |
total_cache_size += name_size
|
311 |
print(f"Name cache: {name_size} embeddings")
|
312 |
|
313 |
+
if not self.abstract_is_loaded:
|
314 |
abstract_size = len(
|
315 |
self.abstract_cached_embedding_helper._cached_sentence_embeddings
|
316 |
)
|