Spaces:
Sleeping
Sleeping
from sentence_transformers import SentenceTransformer, models | |
word_embedding_model = models.Transformer('law-ai/InLegalBERT', max_seq_length=512) | |
pooling_model = models.Pooling( | |
word_embedding_model.get_word_embedding_dimension(), | |
pooling_mode_mean_tokens=True | |
) | |
model = SentenceTransformer(modules=[word_embedding_model, pooling_model]) | |
def embed_clauses(clause_reference): | |
texts = [] | |
for clause in clause_reference: | |
texts.append(clause['text']) | |
embeddings = model.encode(texts) | |
for i in range(len(clause_reference)): | |
clause_reference[i]['embedding'] = embeddings[i].tolist() | |
return clause_reference | |
def embed_chunks(chunks): | |
embeddings = model.encode(chunks) | |
result = [] | |
for i in range(len(chunks)): | |
data = { | |
"chunk": chunks[i], | |
"embedding": embeddings[i].tolist() | |
} | |
result.append(data) | |
return result |