from sentence_transformers import SentenceTransformer, models word_embedding_model = models.Transformer('law-ai/InLegalBERT', max_seq_length=512) pooling_model = models.Pooling( word_embedding_model.get_word_embedding_dimension(), pooling_mode_mean_tokens=True ) model = SentenceTransformer(modules=[word_embedding_model, pooling_model]) def embed_clauses(clause_reference): texts = [] for clause in clause_reference: texts.append(clause['text']) embeddings = model.encode(texts) for i in range(len(clause_reference)): clause_reference[i]['embedding'] = embeddings[i].tolist() return clause_reference def embed_chunks(chunks): embeddings = model.encode(chunks) result = [] for i in range(len(chunks)): data = { "chunk": chunks[i], "embedding": embeddings[i].tolist() } result.append(data) return result