from sklearn.preprocessing import StandardScaler scaler = StandardScaler() def encode_sentences(tokenizer, sentence1, sentence2): # Encode the sentences embedding1 = tokenizer.encode(sentence1, convert_to_tensor=True).cpu() embedding2 = tokenizer.encode(sentence2, convert_to_tensor=True).cpu() # Compute the absolute difference of embeddings as features feature = abs(embedding1 - embedding2).numpy().reshape(1, -1) # Scale features (use the same scaler as used during training) feature_scaled = scaler.fit_transform(feature)# scaler.transform(feature) return feature_scaled