Spaces:
Sleeping
Sleeping
File size: 934 Bytes
8d078f8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
from sentence_transformers import SentenceTransformer, models
word_embedding_model = models.Transformer('law-ai/InLegalBERT', max_seq_length=512)
pooling_model = models.Pooling(
word_embedding_model.get_word_embedding_dimension(),
pooling_mode_mean_tokens=True
)
model = SentenceTransformer(modules=[word_embedding_model, pooling_model])
def embed_clauses(clause_reference):
texts = []
for clause in clause_reference:
texts.append(clause['text'])
embeddings = model.encode(texts)
for i in range(len(clause_reference)):
clause_reference[i]['embedding'] = embeddings[i].tolist()
return clause_reference
def embed_chunks(chunks):
embeddings = model.encode(chunks)
result = []
for i in range(len(chunks)):
data = {
"chunk": chunks[i],
"embedding": embeddings[i].tolist()
}
result.append(data)
return result |