from sentence_transformers import SentenceTransformer from langchain.text_splitter import RecursiveCharacterTextSplitter from backend.utils import logger logger = logger.get_logger() model = SentenceTransformer("all-MiniLM-L6-v2") def get_text_embedding(text): try: return model.encode(text, convert_to_tensor=True).cpu().numpy().tolist() except Exception as e: logger.error(f"Error generating embedding: {e}") raise def chunk_text(text, chunk_size=500, chunk_overlap=100): splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) return splitter.split_text(text)