strongeryongchao commited on
Commit
8a11c8b
·
verified ·
1 Parent(s): 4cb843b

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -6
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  from clustering_utils import cluster_sentences
3
  from viz_utils import generate_force_graph, generate_bubble_chart, generate_umap_plot
4
  import pandas as pd
 
5
 
6
  def process_input(sentences, file_obj):
7
  if file_obj:
@@ -10,7 +11,7 @@ def process_input(sentences, file_obj):
10
  sentences = sentences.strip().splitlines()
11
  sentences = [s.strip() for s in sentences if s.strip()]
12
  if not sentences:
13
- return "请输入句子或上传文件", None, None, None, None
14
  labels, embeddings, scores = cluster_sentences(sentences)
15
  df = pd.DataFrame({
16
  "句子": sentences,
@@ -19,7 +20,16 @@ def process_input(sentences, file_obj):
19
  force_data = generate_force_graph(sentences, labels)
20
  bubble_data = generate_bubble_chart(sentences, labels)
21
  umap_data = generate_umap_plot(embeddings, labels)
22
- return f"✅ Silhouette: {scores['silhouette']:.4f}, DB: {scores['db']:.4f}", df, force_data, bubble_data, umap_data
 
 
 
 
 
 
 
 
 
23
 
24
  with gr.Blocks(title="Text2Vec 语义聚类") as demo:
25
  gr.Markdown("## 🧠 中文句子语义聚类(HDBSCAN + BGE)")
@@ -31,11 +41,14 @@ with gr.Blocks(title="Text2Vec 语义聚类") as demo:
31
  df_output = gr.Dataframe(label="聚类结果", interactive=False)
32
  with gr.Tabs():
33
  with gr.Tab("力导图"):
34
- force_plot = gr.JSON(label="Echarts 数据")
 
35
  with gr.Tab("气泡图"):
36
- bubble_plot = gr.JSON(label="Echarts 数据")
 
37
  with gr.Tab("UMAP分布图"):
38
- umap_plot = gr.JSON(label="Echarts 数据")
 
39
  csv_btn = gr.File(label="下载 CSV", visible=False)
40
 
41
  def export_csv(df):
@@ -43,7 +56,13 @@ with gr.Blocks(title="Text2Vec 语义聚类") as demo:
43
  return "cluster_result.csv"
44
 
45
  run_btn.click(process_input, inputs=[txt_input, file_input],
46
- outputs=[status, df_output, force_plot, bubble_plot, umap_plot])
 
 
 
 
 
 
47
  df_output.change(export_csv, inputs=[df_output], outputs=[csv_btn])
48
 
49
  demo.launch()
 
2
  from clustering_utils import cluster_sentences
3
  from viz_utils import generate_force_graph, generate_bubble_chart, generate_umap_plot
4
  import pandas as pd
5
+ import json
6
 
7
  def process_input(sentences, file_obj):
8
  if file_obj:
 
11
  sentences = sentences.strip().splitlines()
12
  sentences = [s.strip() for s in sentences if s.strip()]
13
  if not sentences:
14
+ return "请输入句子或上传文件", None, None, None, None, None, None, None, None
15
  labels, embeddings, scores = cluster_sentences(sentences)
16
  df = pd.DataFrame({
17
  "句子": sentences,
 
20
  force_data = generate_force_graph(sentences, labels)
21
  bubble_data = generate_bubble_chart(sentences, labels)
22
  umap_data = generate_umap_plot(embeddings, labels)
23
+ return (
24
+ f"✅ Silhouette: {scores['silhouette']:.4f}, DB: {scores['db']:.4f}",
25
+ df,
26
+ force_data,
27
+ json.dumps(force_data, ensure_ascii=False, indent=2),
28
+ bubble_data,
29
+ json.dumps(bubble_data, ensure_ascii=False, indent=2),
30
+ umap_data,
31
+ json.dumps(umap_data, ensure_ascii=False, indent=2)
32
+ )
33
 
34
  with gr.Blocks(title="Text2Vec 语义聚类") as demo:
35
  gr.Markdown("## 🧠 中文句子语义聚类(HDBSCAN + BGE)")
 
41
  df_output = gr.Dataframe(label="聚类结果", interactive=False)
42
  with gr.Tabs():
43
  with gr.Tab("力导图"):
44
+ force_plot = gr.JSON(label="力导图结构化数据")
45
+ force_raw = gr.Textbox(label="力导图 JSON 原文", lines=20)
46
  with gr.Tab("气泡图"):
47
+ bubble_plot = gr.JSON(label="气泡图结构化数据")
48
+ bubble_raw = gr.Textbox(label="气泡图 JSON 原文", lines=20)
49
  with gr.Tab("UMAP分布图"):
50
+ umap_plot = gr.JSON(label="UMAP结构化数据")
51
+ umap_raw = gr.Textbox(label="UMAP JSON 原文", lines=20)
52
  csv_btn = gr.File(label="下载 CSV", visible=False)
53
 
54
  def export_csv(df):
 
56
  return "cluster_result.csv"
57
 
58
  run_btn.click(process_input, inputs=[txt_input, file_input],
59
+ outputs=[
60
+ status,
61
+ df_output,
62
+ force_plot, force_raw,
63
+ bubble_plot, bubble_raw,
64
+ umap_plot, umap_raw
65
+ ])
66
  df_output.change(export_csv, inputs=[df_output], outputs=[csv_btn])
67
 
68
  demo.launch()