Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
|
|
2 |
from clustering_utils import cluster_sentences
|
3 |
from viz_utils import generate_force_graph, generate_bubble_chart, generate_umap_plot
|
4 |
import pandas as pd
|
|
|
5 |
|
6 |
def process_input(sentences, file_obj):
|
7 |
if file_obj:
|
@@ -10,7 +11,7 @@ def process_input(sentences, file_obj):
|
|
10 |
sentences = sentences.strip().splitlines()
|
11 |
sentences = [s.strip() for s in sentences if s.strip()]
|
12 |
if not sentences:
|
13 |
-
return "请输入句子或上传文件", None, None, None, None
|
14 |
labels, embeddings, scores = cluster_sentences(sentences)
|
15 |
df = pd.DataFrame({
|
16 |
"句子": sentences,
|
@@ -19,7 +20,16 @@ def process_input(sentences, file_obj):
|
|
19 |
force_data = generate_force_graph(sentences, labels)
|
20 |
bubble_data = generate_bubble_chart(sentences, labels)
|
21 |
umap_data = generate_umap_plot(embeddings, labels)
|
22 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
with gr.Blocks(title="Text2Vec 语义聚类") as demo:
|
25 |
gr.Markdown("## 🧠 中文句子语义聚类(HDBSCAN + BGE)")
|
@@ -31,11 +41,14 @@ with gr.Blocks(title="Text2Vec 语义聚类") as demo:
|
|
31 |
df_output = gr.Dataframe(label="聚类结果", interactive=False)
|
32 |
with gr.Tabs():
|
33 |
with gr.Tab("力导图"):
|
34 |
-
force_plot = gr.JSON(label="
|
|
|
35 |
with gr.Tab("气泡图"):
|
36 |
-
bubble_plot = gr.JSON(label="
|
|
|
37 |
with gr.Tab("UMAP分布图"):
|
38 |
-
umap_plot = gr.JSON(label="
|
|
|
39 |
csv_btn = gr.File(label="下载 CSV", visible=False)
|
40 |
|
41 |
def export_csv(df):
|
@@ -43,7 +56,13 @@ with gr.Blocks(title="Text2Vec 语义聚类") as demo:
|
|
43 |
return "cluster_result.csv"
|
44 |
|
45 |
run_btn.click(process_input, inputs=[txt_input, file_input],
|
46 |
-
outputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
df_output.change(export_csv, inputs=[df_output], outputs=[csv_btn])
|
48 |
|
49 |
demo.launch()
|
|
|
2 |
from clustering_utils import cluster_sentences
|
3 |
from viz_utils import generate_force_graph, generate_bubble_chart, generate_umap_plot
|
4 |
import pandas as pd
|
5 |
+
import json
|
6 |
|
7 |
def process_input(sentences, file_obj):
|
8 |
if file_obj:
|
|
|
11 |
sentences = sentences.strip().splitlines()
|
12 |
sentences = [s.strip() for s in sentences if s.strip()]
|
13 |
if not sentences:
|
14 |
+
return "请输入句子或上传文件", None, None, None, None, None, None, None, None
|
15 |
labels, embeddings, scores = cluster_sentences(sentences)
|
16 |
df = pd.DataFrame({
|
17 |
"句子": sentences,
|
|
|
20 |
force_data = generate_force_graph(sentences, labels)
|
21 |
bubble_data = generate_bubble_chart(sentences, labels)
|
22 |
umap_data = generate_umap_plot(embeddings, labels)
|
23 |
+
return (
|
24 |
+
f"✅ Silhouette: {scores['silhouette']:.4f}, DB: {scores['db']:.4f}",
|
25 |
+
df,
|
26 |
+
force_data,
|
27 |
+
json.dumps(force_data, ensure_ascii=False, indent=2),
|
28 |
+
bubble_data,
|
29 |
+
json.dumps(bubble_data, ensure_ascii=False, indent=2),
|
30 |
+
umap_data,
|
31 |
+
json.dumps(umap_data, ensure_ascii=False, indent=2)
|
32 |
+
)
|
33 |
|
34 |
with gr.Blocks(title="Text2Vec 语义聚类") as demo:
|
35 |
gr.Markdown("## 🧠 中文句子语义聚类(HDBSCAN + BGE)")
|
|
|
41 |
df_output = gr.Dataframe(label="聚类结果", interactive=False)
|
42 |
with gr.Tabs():
|
43 |
with gr.Tab("力导图"):
|
44 |
+
force_plot = gr.JSON(label="力导图结构化数据")
|
45 |
+
force_raw = gr.Textbox(label="力导图 JSON 原文", lines=20)
|
46 |
with gr.Tab("气泡图"):
|
47 |
+
bubble_plot = gr.JSON(label="气泡图结构化数据")
|
48 |
+
bubble_raw = gr.Textbox(label="气泡图 JSON 原文", lines=20)
|
49 |
with gr.Tab("UMAP分布图"):
|
50 |
+
umap_plot = gr.JSON(label="UMAP结构化数据")
|
51 |
+
umap_raw = gr.Textbox(label="UMAP JSON 原文", lines=20)
|
52 |
csv_btn = gr.File(label="下载 CSV", visible=False)
|
53 |
|
54 |
def export_csv(df):
|
|
|
56 |
return "cluster_result.csv"
|
57 |
|
58 |
run_btn.click(process_input, inputs=[txt_input, file_input],
|
59 |
+
outputs=[
|
60 |
+
status,
|
61 |
+
df_output,
|
62 |
+
force_plot, force_raw,
|
63 |
+
bubble_plot, bubble_raw,
|
64 |
+
umap_plot, umap_raw
|
65 |
+
])
|
66 |
df_output.change(export_csv, inputs=[df_output], outputs=[csv_btn])
|
67 |
|
68 |
demo.launch()
|