File size: 2,030 Bytes
5ddcb1d
 
 
 
 
 
 
eb5d45e
 
 
 
 
 
 
5ddcb1d
 
 
 
 
eb5d45e
 
5ddcb1d
 
eb5d45e
 
5ddcb1d
 
eb5d45e
 
5ddcb1d
eb5d45e
5ddcb1d
 
 
eb5d45e
 
 
5ddcb1d
 
 
 
 
 
eb5d45e
5ddcb1d
 
 
eb5d45e
5ddcb1d
 
eb5d45e
5ddcb1d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import numpy as np
import umap
from sklearn.preprocessing import MinMaxScaler
from collections import defaultdict
import random

def color_for_label(label):
    try:
        label_int = int(label)
    except:
        label_int = -1
    if label_int < 0:
        return "rgb(150,150,150)"  # 噪声点(-1)用灰色
    random.seed(label_int + 1000)
    return f"rgb({random.randint(50,200)}, {random.randint(50,200)}, {random.randint(50,200)})"

def generate_force_graph(sentences, labels):
    nodes = []
    links = []
    label_map = defaultdict(list)

    for i, (s, l) in enumerate(zip(sentences, labels)):
        color = color_for_label(l)
        nodes.append({"name": s, "symbolSize": 10, "category": int(l) if l >=0 else 0, "itemStyle": {"color": color}})
        label_map[l].append(i)

    for group in label_map.values():
        # 可选:限制边数,避免边太多
        max_edges_per_node = 10
        for i in group:
            connected = 0
            for j in group:
                if i < j:
                    links.append({"source": sentences[i], "target": sentences[j]})
                    connected += 1
                    if connected >= max_edges_per_node:
                        break
    return {"type": "force", "nodes": nodes, "links": links}

def generate_bubble_chart(sentences, labels):
    counts = defaultdict(int)
    for l in labels:
        counts[l] += 1
    data = [{"name": f"簇{l}" if l >=0 else "噪声", "value": v, "itemStyle": {"color": color_for_label(l)}} for l, v in counts.items()]
    return {"type": "bubble", "series": [{"type": "scatter", "data": data}]}

def generate_umap_plot(embeddings, labels):
    reducer = umap.UMAP(n_components=2, random_state=42)
    umap_emb = reducer.fit_transform(embeddings)
    scaled = MinMaxScaler().fit_transform(umap_emb)
    data = [{"x": float(x), "y": float(y), "label": int(l), "itemStyle": {"color": color_for_label(l)}} for (x, y), l in zip(scaled, labels)]
    return {"type": "scatter", "series": [{"data": data}]}