Charles Kabui
Initial Commit
41dd156
raw
history blame contribute delete
608 Bytes
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
def encode_sentences(tokenizer, sentence1, sentence2):
# Encode the sentences
embedding1 = tokenizer.encode(sentence1, convert_to_tensor=True).cpu()
embedding2 = tokenizer.encode(sentence2, convert_to_tensor=True).cpu()
# Compute the absolute difference of embeddings as features
feature = abs(embedding1 - embedding2).numpy().reshape(1, -1)
# Scale features (use the same scaler as used during training)
feature_scaled = scaler.fit_transform(feature)# scaler.transform(feature)
return feature_scaled