File size: 22,843 Bytes
b77c0a2
 
 
b26c508
 
 
b77c0a2
8600f2c
b77c0a2
6830bc7
b77c0a2
b26c508
 
 
 
 
 
 
 
dfb55d0
b26c508
 
3020335
ec235f7
b26c508
b77c0a2
8600f2c
 
 
 
 
 
 
b77c0a2
 
 
460e51f
b77c0a2
 
6830bc7
b77c0a2
460e51f
 
 
b77c0a2
 
6830bc7
b77c0a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b26c508
b6b3214
b26c508
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
887cb19
b26c508
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
892f887
b26c508
892f887
b26c508
892f887
b26c508
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
632ec54
b26c508
632ec54
b26c508
632ec54
b26c508
 
 
 
 
 
 
 
887cb19
b26c508
887cb19
b26c508
887cb19
b26c508
 
dfb55d0
b26c508
 
dfb55d0
b26c508
887cb19
dfb55d0
887cb19
b77c0a2
b26c508
 
 
0477818
aeda459
0477818
aeda459
0477818
 
 
aeda459
 
0477818
 
aeda459
0477818
 
 
 
 
 
 
 
 
 
b26c508
 
 
 
 
 
0477818
 
b26c508
 
0477818
 
 
 
 
 
b26c508
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3020335
b26c508
 
 
 
 
 
 
 
 
ec235f7
 
 
 
 
 
 
 
 
 
 
 
b26c508
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7fff61c
 
b26c508
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7fff61c
 
 
ec235f7
7fff61c
b26c508
7fff61c
b26c508
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7fff61c
 
b26c508
b77c0a2
 
b26c508
 
 
 
 
 
 
b6b3214
 
 
b26c508
b77c0a2
 
 
 
 
 
 
 
 
 
 
 
 
8600f2c
 
 
 
 
6830bc7
8600f2c
 
 
 
 
6830bc7
8600f2c
 
892f887
8600f2c
 
 
892f887
 
 
8600f2c
 
 
 
 
 
 
 
 
 
 
6830bc7
8600f2c
 
 
 
 
6830bc7
8600f2c
 
 
 
 
 
 
 
b26c508
8600f2c
 
 
 
 
 
 
 
 
 
 
b26c508
8600f2c
 
 
 
 
b26c508
8600f2c
b26c508
8600f2c
b26c508
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aeac4b2
b26c508
aeac4b2
b26c508
aeac4b2
b26c508
 
 
 
 
 
 
 
aeac4b2
b26c508
aeac4b2
b26c508
8600f2c
b26c508
 
3020335
b26c508
 
 
 
 
8600f2c
3020335
8600f2c
460e51f
b26c508
 
 
 
 
 
 
 
 
 
 
aeac4b2
b26c508
8600f2c
 
 
b26c508
8600f2c
aeac4b2
 
 
 
 
 
b26c508
 
 
 
8600f2c
 
 
b26c508
 
 
8600f2c
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
import os
import time
import shutil
import pandas as pd
import traceback
import sys
from pathlib import Path
from fastapi import APIRouter, UploadFile, File, HTTPException, Depends, Body
from fastapi.responses import FileResponse
from custom_auth import get_current_user_from_token
from services.sentence_transformer_service import SentenceTransformerService, sentence_transformer_service

# Add the path to import modules from meisai-check-ai
sys.path.append(os.path.join(os.path.dirname(__file__), "..", "meisai-check-ai"))

from mapping_lib.standard_subject_data_mapper import StandardSubjectDataMapper
from mapping_lib.subject_similarity_mapper import SubjectSimilarityMapper
from mapping_lib.sub_subject_similarity_mapper import SubSubjectSimilarityMapper
from mapping_lib.name_similarity_mapper import NameSimilarityMapper
from mapping_lib.sub_subject_and_name_data_mapper import SubSubjectAndNameDataMapper
from mapping_lib.abstract_similarity_mapper import AbstractSimilarityMapper
from mapping_lib.name_and_abstract_mapper import NameAndAbstractDataMapper
from mapping_lib.unit_similarity_mapper import UnitSimilarityMapper
from mapping_lib.standard_name_mapper import StandardNameMapper

from config import UPLOAD_DIR, OUTPUT_DIR
from models import (
    EmbeddingRequest,
    PredictRawRequest,
    PredictRawResponse,
    PredictRecord,
    PredictResult,
)

router = APIRouter()


@router.post("/predict")
async def predict(
    current_user=Depends(get_current_user_from_token),
    file: UploadFile = File(...),
    sentence_service: SentenceTransformerService = Depends(
        lambda: sentence_transformer_service
    ),
):
    """
    Process an input CSV file and return standardized names (requires authentication)
    """
    if not file.filename.endswith(".csv"):
        raise HTTPException(status_code=400, detail="Only CSV files are supported")

    # Save uploaded file
    timestamp = int(time.time())
    input_file_path = os.path.join(UPLOAD_DIR, f"input_{timestamp}_{current_user.username}.csv")
    output_file_path = os.path.join(OUTPUT_DIR, f"output_{timestamp}_{current_user.username}.csv")

    try:
        with open(input_file_path, "wb") as buffer:
            shutil.copyfileobj(file.file, buffer)
    finally:
        file.file.close()

    try:
        # Load input data
        start_time = time.time()
        df_input_data = pd.read_csv(input_file_path)

        # Ensure basic columns exist with default values
        basic_columns = {
            "シート名": "",
            "行": "",
            "科目": "",
            "中科目": "",
            "分類": "",
            "名称": "",
            "単位": "",
            "摘要": "",
            "備考": "",
        }

        for col, default_value in basic_columns.items():
            if col not in df_input_data.columns:
                df_input_data[col] = default_value

        # Process data using the new mapping system similar to predict.py
        try:
            # Subject mapping
            if sentence_service.df_subject_map_data is not None:
                subject_similarity_mapper = SubjectSimilarityMapper(
                    cached_embedding_helper=sentence_service.subject_cached_embedding_helper,
                    df_map_data=sentence_service.df_subject_map_data,
                )

                list_input_subject = df_input_data["科目"].unique()
                df_subject_data = pd.DataFrame({"科目": list_input_subject})

                subject_similarity_mapper.predict_input(df_input_data=df_subject_data)

                output_subject_map = dict(
                    zip(df_subject_data["科目"], df_subject_data["出力_科目"])
                )
                df_input_data["標準科目"] = df_input_data["科目"].map(
                    output_subject_map
                )
                df_input_data["出力_科目"] = df_input_data["科目"].map(
                    output_subject_map
                )

        except Exception as e:
            print(f"Error processing SubjectSimilarityMapper: {e}")
            raise HTTPException(status_code=500, detail=str(e))

        try:
            # Standard subject mapping
            if sentence_service.df_standard_subject_map_data is not None:
                standard_subject_data_mapper = StandardSubjectDataMapper(
                    df_map_data=sentence_service.df_standard_subject_map_data
                )
                df_output_data = standard_subject_data_mapper.map_data(
                    df_input_data=df_input_data,
                    input_key_columns=["出力_科目"],
                    in_place=True,
                )
            else:
                df_output_data = df_input_data.copy()

        except Exception as e:
            print(f"Error processing StandardSubjectDataMapper: {e}")
            # Continue with original data if standard subject mapping fails
            df_output_data = df_input_data.copy()

        try:
            # Sub subject mapping
            if sentence_service.df_sub_subject_map_data is not None:
                sub_subject_similarity_mapper = SubSubjectSimilarityMapper(
                    cached_embedding_helper=sentence_service.sub_subject_cached_embedding_helper,
                    df_map_data=sentence_service.df_sub_subject_map_data,
                )
                sub_subject_similarity_mapper.predict_input(
                    df_input_data=df_output_data
                )
                df_output_data = df_output_data.fillna("")

        except Exception as e:
            print(f"Error processing SubSubjectSimilarityMapper: {e}")
            raise HTTPException(status_code=500, detail=str(e))

        try:
            # Name mapping
            if sentence_service.df_name_map_data is not None:
                name_sentence_mapper = NameSimilarityMapper(
                    cached_embedding_helper=sentence_service.name_cached_embedding_helper,
                    df_map_data=sentence_service.df_name_map_data,
                )
                name_sentence_mapper.predict_input(df_input_data=df_output_data)

        except Exception as e:
            print(f"Error processing NameSimilarityMapper: {e}")
            raise HTTPException(status_code=500, detail=str(e))

        try:
            # Sub subject and name mapping
            if sentence_service.df_sub_subject_and_name_map_data is not None:
                sub_subject_and_name_mapper = SubSubjectAndNameDataMapper(
                    df_map_data=sentence_service.df_sub_subject_and_name_map_data
                )
                sub_subject_and_name_mapper.map_data(df_input_data=df_output_data)

        except Exception as e:
            print(f"Error processing SubSubjectAndNameDataMapper: {e}")
            raise HTTPException(status_code=500, detail=str(e))

        try:
            # Abstract mapping
            if sentence_service.df_abstract_map_data is not None:
                # Ensure required columns exist before AbstractSimilarityMapper
                required_columns_for_abstract = {
                    "標準科目": "",
                    "摘要グループ": "",
                    "確定": "未確定",
                    "摘要": "",
                    "備考": "",
                }

                # Add missing columns with appropriate defaults
                for col, default_val in required_columns_for_abstract.items():
                    if col not in df_output_data.columns:
                        df_output_data[col] = default_val
                        print(
                            f"DEBUG: Added missing column '{col}' with default value '{default_val}'"
                        )

                # Ensure data types are correct (convert to string to avoid type issues)
                for col in ["標準科目", "摘要グループ", "確定", "摘要", "備考"]:
                    if col in df_output_data.columns:
                        df_output_data[col] = df_output_data[col].astype(str).fillna("")

                abstract_similarity_mapper = AbstractSimilarityMapper(
                    cached_embedding_helper=sentence_service.abstract_cached_embedding_helper,
                    df_map_data=sentence_service.df_abstract_map_data,
                )
                abstract_similarity_mapper.predict_input(df_input_data=df_output_data)

                print(f"DEBUG: AbstractSimilarityMapper completed successfully")

        except Exception as e:
            print(f"Error processing AbstractSimilarityMapper: {e}")
            print(f"DEBUG: Full error traceback:")
            import traceback

            traceback.print_exc()
            # Don't raise the exception, continue processing
            print(f"DEBUG: Continuing without AbstractSimilarityMapper...")

        try:
            # Name and abstract mapping
            if sentence_service.df_name_and_subject_map_data is not None:
                name_and_abstract_mapper = NameAndAbstractDataMapper(
                    df_map_data=sentence_service.df_name_and_subject_map_data
                )
                df_output_data = name_and_abstract_mapper.map_data(df_output_data)

        except Exception as e:
            print(f"Error processing NameAndAbstractDataMapper: {e}")
            raise HTTPException(status_code=500, detail=str(e))

        try:
            # Unit mapping
            if sentence_service.df_unit_map_data is not None:
                unit_mapper = UnitSimilarityMapper(
                    cached_embedding_helper=sentence_service.unit_cached_embedding_helper,
                    df_map_data=sentence_service.df_unit_map_data,
                )
                unit_mapper.predict_input(df_input_data=df_output_data)

        except Exception as e:
            print(f"Error processing UnitMapper: {e}")
            raise HTTPException(status_code=500, detail=str(e))

        try:
            # Standard name mapping
            if sentence_service.df_standard_name_map_data is not None:
                standard_name_mapper = StandardNameMapper(
                    df_map_data=sentence_service.df_standard_name_map_data
                )
                df_output_data = standard_name_mapper.map_data(df_output_data)

        except Exception as e:
            print(f"Error processing StandardNameMapper: {e}")
            raise HTTPException(status_code=500, detail=str(e))

        # Create output columns and ensure they have proper values
        # Add ID column if not exists
        if "ID" not in df_output_data.columns:
            df_output_data.reset_index(drop=False, inplace=True)
            df_output_data.rename(columns={"index": "ID"}, inplace=True)
            df_output_data["ID"] = df_output_data["ID"] + 1  # Start from 1

        # Ensure required columns exist with default values
        required_columns = {
            "シート名": "",
            "行": "",
            "科目": "",
            "中科目": "",
            "分類": "",
            "名称": "",
            "単位": "",
            "摘要": "",
            "備考": "",
            "出力_科目": "",
            "出力_中科目": "",
            "出力_項目名": "",
            "出力_標準単位": "",
            "出力_集計用単位": "",
            "出力_確率度": 0.0,
        }

        for col, default_value in required_columns.items():
            if col not in df_output_data.columns:
                df_output_data[col] = default_value

        # Map output columns to match Excel structure
        # 出力_中科目 mapping - use the standard sub-subject from sub-subject mapper
        if "出力_基準中科目" in df_output_data.columns:
            df_output_data["出力_中科目"] = df_output_data["出力_基準中科目"]
        elif "標準中科目" in df_output_data.columns:
            df_output_data["出力_中科目"] = df_output_data["標準中科目"]

        # 出力_項目名 mapping - use the final item name from name and abstract mapper
        if (
            "出力_項目名" in df_output_data.columns
            and not df_output_data["出力_項目名"].isna().all()
        ):
            # Keep existing 出力_項目名 if it exists and has values
            pass
        elif "出力_標準名称" in df_output_data.columns:
            df_output_data["出力_項目名"] = df_output_data["出力_標準名称"]
        elif "出力_基準名称" in df_output_data.columns:
            df_output_data["出力_項目名"] = df_output_data["出力_基準名称"]

        # 出力_標準単位 mapping - use unit mapper result
        if "出力_標準単位" in df_output_data.columns:
            df_output_data["出力_標準単位"] = df_output_data["出力_標準単位"]

        # 出力_集計用単位 mapping - use unit mapper result
        if "出力_集計用単位" in df_output_data.columns:
            df_output_data["出力_集計用単位"] = df_output_data["出力_集計用単位"]

        # 出力_確率度 mapping - use the name similarity as main probability
        if "出力_名称類似度" in df_output_data.columns:
            df_output_data["出力_確率度"] = df_output_data["出力_名称類似度"]
        elif "出力_中科目類似度" in df_output_data.columns:
            df_output_data["出力_確率度"] = df_output_data["出力_中科目類似度"]
        elif "出力_摘要類似度" in df_output_data.columns:
            df_output_data["出力_確率度"] = df_output_data["出力_摘要類似度"]
        elif "出力_単位類似度" in df_output_data.columns:
            df_output_data["出力_確率度"] = df_output_data["出力_単位類似度"]
        else:
            df_output_data["出力_確率度"] = 0.0

        # Fill NaN values and ensure all output columns have proper values
        df_output_data = df_output_data.fillna("")

        # Debug: Print available columns to see what we have
        print(f"Available columns after processing: {list(df_output_data.columns)}")

        # Final check and fallback for missing output columns
        if (
            "出力_中科目" not in df_output_data.columns
            or df_output_data["出力_中科目"].eq("").all()
        ):
            df_output_data["出力_中科目"] = df_output_data.get("中科目", "")

        if (
            "出力_項目名" not in df_output_data.columns
            or df_output_data["出力_項目名"].eq("").all()
        ):
            df_output_data["出力_項目名"] = df_output_data.get("名称", "")

        if (
            "出力_単位" not in df_output_data.columns
            or df_output_data["出力_単位"].eq("").all()
        ):
            df_output_data["出力_単位"] = df_output_data.get("単位", "")

        if "出力_確率度" not in df_output_data.columns:
            df_output_data["出力_確率度"] = 0  # Default confidence score

        # Define output columns in exact order as shown in Excel
        output_columns = [
            "ID",
            "シート名",
            "行",
            "科目",
            "中科目",
            "分類",
            "名称",
            "単位",
            "摘要",
            "備考",
            "出力_科目",
            "出力_中科目",
            "出力_項目名",
            "出力_確率度",
            "出力_標準単位",
            "出力_集計用単位",
        ]

        # Save with utf_8_sig encoding for Japanese Excel compatibility
        df_output_data[output_columns].to_csv(
            output_file_path, index=False, encoding="utf_8_sig"
        )

        # Save all caches
        sentence_service.save_all_caches()

        end_time = time.time()
        execution_time = end_time - start_time
        print(f"Execution time: {execution_time} seconds")

        return FileResponse(
            path=output_file_path,
            filename=f"output_{Path(file.filename).stem}.csv",
            media_type="text/csv",
            headers={
                "Content-Disposition": f'attachment; filename="output_{Path(file.filename).stem}.csv"',
                "Content-Type": "application/x-www-form-urlencoded",
            },
        )

    except Exception as e:
        print(f"Error processing file: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/embeddings")
async def create_embeddings(
    request: EmbeddingRequest,
    current_user=Depends(get_current_user_from_token),
    sentence_service: SentenceTransformerService = Depends(
        lambda: sentence_transformer_service
    ),
):
    """
    Create embeddings for a list of input sentences (requires authentication)
    """
    try:
        start_time = time.time()
        embeddings = sentence_service.sentenceTransformerHelper.create_embeddings(
            request.sentences
        )
        end_time = time.time()
        execution_time = end_time - start_time
        print(f"Execution time: {execution_time} seconds")
        # Convert numpy array to list for JSON serialization
        embeddings_list = embeddings.tolist()
        return {"embeddings": embeddings_list}
    except Exception as e:
        print(f"Error creating embeddings: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/predict-raw", response_model=PredictRawResponse)
async def predict_raw(
    request: PredictRawRequest,
    current_user=Depends(get_current_user_from_token),
    sentence_service: SentenceTransformerService = Depends(
        lambda: sentence_transformer_service
    ),
):
    """
    Process raw input records and return standardized names (requires authentication)
    """
    try:
        # Convert input records to DataFrame
        records_dict = {
            "科目": [],
            "中科目": [],
            "分類": [],
            "名称": [],
            "単位": [],
            "摘要": [],
            "備考": [],
            "シート名": [],  # Required by BaseNameData but not used
            "行": [],  # Required by BaseNameData but not used
        }

        for record in request.records:
            records_dict["科目"].append(record.subject)
            records_dict["中科目"].append(record.sub_subject)
            records_dict["分類"].append(record.name_category)
            records_dict["名称"].append(record.name)
            records_dict["単位"].append("")  # Default empty
            records_dict["摘要"].append(record.abstract or "")
            records_dict["備考"].append(record.memo or "")
            records_dict["シート名"].append("")  # Placeholder
            records_dict["行"].append("")  # Placeholder

        df_input_data = pd.DataFrame(records_dict)

        # Process data similar to the main predict function
        try:
            # Subject mapping
            if sentence_service.df_subject_map_data is not None:
                subject_similarity_mapper = SubjectSimilarityMapper(
                    cached_embedding_helper=sentence_service.subject_cached_embedding_helper,
                    df_map_data=sentence_service.df_subject_map_data,
                )

                list_input_subject = df_input_data["科目"].unique()
                df_subject_data = pd.DataFrame({"科目": list_input_subject})

                subject_similarity_mapper.predict_input(df_input_data=df_subject_data)

                output_subject_map = dict(
                    zip(df_subject_data["科目"], df_subject_data["出力_科目"])
                )
                df_input_data["標準科目"] = df_input_data["科目"].map(
                    output_subject_map
                )
                df_input_data["出力_科目"] = df_input_data["科目"].map(
                    output_subject_map
                )
            else:
                df_input_data["標準科目"] = df_input_data["科目"]
                df_input_data["出力_科目"] = df_input_data["科目"]

        except Exception as e:
            print(f"Error processing SubjectSimilarityMapper: {e}")
            raise HTTPException(status_code=500, detail=str(e))

        try:
            # Name mapping (simplified for raw predict)
            if sentence_service.df_name_map_data is not None:
                name_sentence_mapper = NameSimilarityMapper(
                    cached_embedding_helper=sentence_service.name_cached_embedding_helper,
                    df_map_data=sentence_service.df_name_map_data,
                )
                name_sentence_mapper.predict_input(df_input_data=df_input_data)

        except Exception as e:
            print(f"Error processing NameSimilarityMapper: {e}")
            raise HTTPException(status_code=500, detail=str(e))

        try:
            # Unit mapping
            if sentence_service.df_unit_map_data is not None:
                unit_mapper = UnitSimilarityMapper(
                    cached_embedding_helper=sentence_service.unit_cached_embedding_helper,
                    df_map_data=sentence_service.df_unit_map_data,
                )
                unit_mapper.predict_input(df_input_data=df_input_data)

        except Exception as e:
            print(f"Error processing UnitSimilarityMapper: {e}")
            raise HTTPException(status_code=500, detail=str(e))

        # Ensure required columns exist
        for col in [
            "確定",
            "出力_標準名称",
            "出力_名称類似度",
            "出力_標準単位",
            "出力_単位類似度",
        ]:
            if col not in df_input_data.columns:
                if col in ["出力_名称類似度", "出力_単位類似度"]:
                    df_input_data[col] = 0.0
                else:
                    df_input_data[col] = ""

        # Convert results to response format
        results = []
        for _, row in df_input_data.iterrows():
            result = PredictResult(
                subject=row["科目"],
                sub_subject=row["中科目"],
                name_category=row["分類"],
                name=row["名称"],
                abstract=row["摘要"],
                memo=row["備考"],
                confirmed=row.get("確定", ""),
                standard_subject=row.get("出力_科目", row["科目"]),
                standard_name=row.get("出力_標準名称", ""),
                similarity_score=float(row.get("出力_名称類似度", 0.0)),
            )
            results.append(result)

        # Save all caches
        sentence_service.save_all_caches()

        return PredictRawResponse(results=results)

    except Exception as e:
        print(f"Error processing records: {e}")
        raise HTTPException(status_code=500, detail=str(e))