Spaces:
Sleeping
Sleeping
import numpy as np | |
import umap | |
from sklearn.preprocessing import MinMaxScaler | |
from collections import defaultdict | |
import random | |
def color_for_label(label): | |
try: | |
label_int = int(label) | |
except: | |
label_int = -1 | |
if label_int < 0: | |
return "rgb(150,150,150)" # 噪声点(-1)用灰色 | |
random.seed(label_int + 1000) | |
return f"rgb({random.randint(50,200)}, {random.randint(50,200)}, {random.randint(50,200)})" | |
def generate_force_graph(sentences, labels): | |
nodes = [] | |
links = [] | |
label_map = defaultdict(list) | |
for i, (s, l) in enumerate(zip(sentences, labels)): | |
color = color_for_label(l) | |
nodes.append({"name": s, "symbolSize": 10, "category": int(l) if l >=0 else 0, "itemStyle": {"color": color}}) | |
label_map[l].append(i) | |
for group in label_map.values(): | |
# 可选:限制边数,避免边太多 | |
max_edges_per_node = 10 | |
for i in group: | |
connected = 0 | |
for j in group: | |
if i < j: | |
links.append({"source": sentences[i], "target": sentences[j]}) | |
connected += 1 | |
if connected >= max_edges_per_node: | |
break | |
return {"type": "force", "nodes": nodes, "links": links} | |
def generate_bubble_chart(sentences, labels): | |
counts = defaultdict(int) | |
for l in labels: | |
counts[l] += 1 | |
data = [{"name": f"簇{l}" if l >=0 else "噪声", "value": v, "itemStyle": {"color": color_for_label(l)}} for l, v in counts.items()] | |
return {"type": "bubble", "series": [{"type": "scatter", "data": data}]} | |
def generate_umap_plot(embeddings, labels): | |
reducer = umap.UMAP(n_components=2, random_state=42) | |
umap_emb = reducer.fit_transform(embeddings) | |
scaled = MinMaxScaler().fit_transform(umap_emb) | |
data = [{"x": float(x), "y": float(y), "label": int(l), "itemStyle": {"color": color_for_label(l)}} for (x, y), l in zip(scaled, labels)] | |
return {"type": "scatter", "series": [{"data": data}]} | |