Spaces:
Sleeping
Sleeping
File size: 644 Bytes
c899329 8853856 c899329 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
from sentence_transformers import SentenceTransformer
from langchain.text_splitter import RecursiveCharacterTextSplitter
from backend.utils import logger
logger = logger.get_logger()
model = SentenceTransformer("all-MiniLM-L6-v2")
def get_text_embedding(text):
try:
return model.encode(text, convert_to_tensor=True).cpu().numpy().tolist()
except Exception as e:
logger.error(f"Error generating embedding: {e}")
raise
def chunk_text(text, chunk_size=500, chunk_overlap=100):
splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
return splitter.split_text(text) |