Spaces:
Sleeping
Sleeping
File size: 2,030 Bytes
5ddcb1d eb5d45e 5ddcb1d eb5d45e 5ddcb1d eb5d45e 5ddcb1d eb5d45e 5ddcb1d eb5d45e 5ddcb1d eb5d45e 5ddcb1d eb5d45e 5ddcb1d eb5d45e 5ddcb1d eb5d45e 5ddcb1d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import numpy as np
import umap
from sklearn.preprocessing import MinMaxScaler
from collections import defaultdict
import random
def color_for_label(label):
try:
label_int = int(label)
except:
label_int = -1
if label_int < 0:
return "rgb(150,150,150)" # 噪声点(-1)用灰色
random.seed(label_int + 1000)
return f"rgb({random.randint(50,200)}, {random.randint(50,200)}, {random.randint(50,200)})"
def generate_force_graph(sentences, labels):
nodes = []
links = []
label_map = defaultdict(list)
for i, (s, l) in enumerate(zip(sentences, labels)):
color = color_for_label(l)
nodes.append({"name": s, "symbolSize": 10, "category": int(l) if l >=0 else 0, "itemStyle": {"color": color}})
label_map[l].append(i)
for group in label_map.values():
# 可选:限制边数,避免边太多
max_edges_per_node = 10
for i in group:
connected = 0
for j in group:
if i < j:
links.append({"source": sentences[i], "target": sentences[j]})
connected += 1
if connected >= max_edges_per_node:
break
return {"type": "force", "nodes": nodes, "links": links}
def generate_bubble_chart(sentences, labels):
counts = defaultdict(int)
for l in labels:
counts[l] += 1
data = [{"name": f"簇{l}" if l >=0 else "噪声", "value": v, "itemStyle": {"color": color_for_label(l)}} for l, v in counts.items()]
return {"type": "bubble", "series": [{"type": "scatter", "data": data}]}
def generate_umap_plot(embeddings, labels):
reducer = umap.UMAP(n_components=2, random_state=42)
umap_emb = reducer.fit_transform(embeddings)
scaled = MinMaxScaler().fit_transform(umap_emb)
data = [{"x": float(x), "y": float(y), "label": int(l), "itemStyle": {"color": color_for_label(l)}} for (x, y), l in zip(scaled, labels)]
return {"type": "scatter", "series": [{"data": data}]}
|