File size: 934 Bytes
8d078f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from sentence_transformers import SentenceTransformer, models

word_embedding_model = models.Transformer('law-ai/InLegalBERT', max_seq_length=512)
pooling_model = models.Pooling(
    word_embedding_model.get_word_embedding_dimension(),
    pooling_mode_mean_tokens=True
)
model = SentenceTransformer(modules=[word_embedding_model, pooling_model])

def embed_clauses(clause_reference):
    texts = []
    for clause in clause_reference:
        texts.append(clause['text'])
    
    embeddings = model.encode(texts)

    for i in range(len(clause_reference)):
        clause_reference[i]['embedding'] = embeddings[i].tolist()
    
    return clause_reference

def embed_chunks(chunks):
    embeddings = model.encode(chunks)
    
    result = []
    for i in range(len(chunks)):
        data = {
            "chunk": chunks[i],
            "embedding": embeddings[i].tolist()
        }
        result.append(data)
    
    return result