File size: 899 Bytes
68e42b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
from sentence_transformers import SentenceTransformer
import hdbscan
model = SentenceTransformer("all-MiniLM-L6-v2")
def cluster_items(items, min_cluster_size=2):
texts = [item["title"] for item in items]
if not texts:
return []
embeddings = model.encode(texts)
clusterer = hdbscan.HDBSCAN(min_cluster_size=min_cluster_size, metric="euclidean")
labels = clusterer.fit_predict(embeddings)
clusters = {}
for label, text in zip(labels, texts):
if label == -1:
continue
clusters.setdefault(label, []).append(text)
cluster_text = []
for i, titles in clusters.items():
cluster_text.append(f"🔸 Cluster {i} ({len(titles)} items):")
cluster_text.extend(f"- {t}" for t in titles)
cluster_text.append("") # add space between clusters
return "\n".join(cluster_text) or "No meaningful clusters found."
|