=
Add new embeddings and update data processing scripts; remove MiniEncoder
05a2a0c
raw
history blame contribute delete
768 Bytes
## Mini Encoder
from sentence_transformers import SentenceTransformer
## Model 1: mini-encoder
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
def get_embeddings(sentences):
embeddings = model.encode(sentences)
return embeddings
## Model 2: intfloat/multilingual-e5-large
from sentence_transformers import SentenceTransformer
modelbig = SentenceTransformer("sentence-transformers/gtr-t5-large")
"""
sentences = [
"That is a happy person",
"That is a happy dog",
"That is a very happy person",
"Today is a sunny day"
]
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [4, 4]
"""
def get_embeddings_big(sentences):
embeddings = modelbig.encode(sentences)
return embeddings