File size: 899 Bytes
68e42b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from sentence_transformers import SentenceTransformer
import hdbscan

model = SentenceTransformer("all-MiniLM-L6-v2")

def cluster_items(items, min_cluster_size=2):
    texts = [item["title"] for item in items]
    if not texts:
        return []

    embeddings = model.encode(texts)
    clusterer = hdbscan.HDBSCAN(min_cluster_size=min_cluster_size, metric="euclidean")
    labels = clusterer.fit_predict(embeddings)

    clusters = {}
    for label, text in zip(labels, texts):
        if label == -1:
            continue
        clusters.setdefault(label, []).append(text)

    cluster_text = []
    for i, titles in clusters.items():
        cluster_text.append(f"🔸 Cluster {i} ({len(titles)} items):")
        cluster_text.extend(f"- {t}" for t in titles)
        cluster_text.append("")  # add space between clusters

    return "\n".join(cluster_text) or "No meaningful clusters found."