sagar008's picture
Create embedder.py
8d078f8 verified
raw
history blame contribute delete
934 Bytes
from sentence_transformers import SentenceTransformer, models
word_embedding_model = models.Transformer('law-ai/InLegalBERT', max_seq_length=512)
pooling_model = models.Pooling(
word_embedding_model.get_word_embedding_dimension(),
pooling_mode_mean_tokens=True
)
model = SentenceTransformer(modules=[word_embedding_model, pooling_model])
def embed_clauses(clause_reference):
texts = []
for clause in clause_reference:
texts.append(clause['text'])
embeddings = model.encode(texts)
for i in range(len(clause_reference)):
clause_reference[i]['embedding'] = embeddings[i].tolist()
return clause_reference
def embed_chunks(chunks):
embeddings = model.encode(chunks)
result = []
for i in range(len(chunks)):
data = {
"chunk": chunks[i],
"embedding": embeddings[i].tolist()
}
result.append(data)
return result