meisaicheck-api / services /sentence_transformer_service.py
vumichien's picture
change service
887cb19
raw
history blame
1.88 kB
import pickle
from config import (
MODEL_NAME,
SENTENCE_EMBEDDING_FILE,
STANDARD_NAME_MAP_DATA_FILE, SUBJECT_DATA_FILE
)
from sentence_transformer_lib.sentence_transformer_helper import SentenceTransformerHelper
from data_lib.subject_data import SubjectData
from data_lib.standard_name_map_data import StandardNameMapData
class SentenceTransformerService:
def __init__(self):
self.sentenceTransformerHelper = None
self.dic_standard_subject = None
self.anchor_name_sentence_embeddings = None
self.sampleData = None
def load_model_data(self):
"""Load model and data only once at startup"""
if self.sentenceTransformerHelper is not None:
print("Model already loaded. Skipping reload.")
return # Kh么ng load l岷 n岷縰 膽茫 c贸 model
print("Loading models and data...")
# Load sentence transformer model
self.sentenceTransformerHelper = SentenceTransformerHelper(
convert_to_zenkaku_flag=True, replace_words=None, keywords=None
)
self.sentenceTransformerHelper.load_model_by_name(MODEL_NAME)
# Load standard subject dictionary
self.dic_standard_subject = SubjectData.create_standard_subject_dic_from_file(SUBJECT_DATA_FILE)
# Load pre-computed embeddings and similarities
with open(SENTENCE_EMBEDDING_FILE, "rb") as f:
self.anchor_name_sentence_embeddings = pickle.load(f)
# Load and process sample data
self.standardNameMapData = StandardNameMapData()
self.standardNameMapData.load_data_from_csv(STANDARD_NAME_MAP_DATA_FILE)
self.standardNameMapData.process_data(self.anchor_name_sentence_embeddings)
print("Models and data loaded successfully")
# Global instance (singleton)
sentence_transformer_service = SentenceTransformerService()