Spaces:
Sleeping
Sleeping
from sentence_transformers import SentenceTransformer | |
import hdbscan | |
from sklearn.metrics import silhouette_score, davies_bouldin_score | |
import numpy as np | |
model = SentenceTransformer("shibing624/text2vec-bge-large-chinese") | |
def cluster_sentences(sentences): | |
embeddings = model.encode(sentences) | |
clusterer = hdbscan.HDBSCAN(min_cluster_size=2, metric='euclidean') | |
labels = clusterer.fit_predict(embeddings) | |
valid_idxs = labels != -1 | |
if np.sum(valid_idxs) > 1: | |
silhouette = silhouette_score(embeddings[valid_idxs], labels[valid_idxs]) | |
db = davies_bouldin_score(embeddings[valid_idxs], labels[valid_idxs]) | |
else: | |
silhouette, db = -1, -1 | |
return labels, embeddings, {"silhouette": silhouette, "db": db} | |