from sentence_transformers import SentenceTransformer import hdbscan model = SentenceTransformer("all-MiniLM-L6-v2") def cluster_items(items, min_cluster_size=2): texts = [item["title"] for item in items] if not texts: return [] embeddings = model.encode(texts) clusterer = hdbscan.HDBSCAN(min_cluster_size=min_cluster_size, metric="euclidean") labels = clusterer.fit_predict(embeddings) clusters = {} for label, text in zip(labels, texts): if label == -1: continue clusters.setdefault(label, []).append(text) cluster_text = [] for i, titles in clusters.items(): cluster_text.append(f"🔸 Cluster {i} ({len(titles)} items):") cluster_text.extend(f"- {t}" for t in titles) cluster_text.append("") # add space between clusters return "\n".join(cluster_text) or "No meaningful clusters found."