Spaces:

strongeryongchao
/

sentcluster

Sleeping

App Files Files Community

strongeryongchao commited on Jun 14

Commit

8a11c8b

verified ·

1 Parent(s): 4cb843b

Upload app.py

Browse files

Files changed (1) hide show

app.py +25 -6

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import gradio as gr
 from clustering_utils import cluster_sentences
 from viz_utils import generate_force_graph, generate_bubble_chart, generate_umap_plot
 import pandas as pd
 def process_input(sentences, file_obj):
     if file_obj:
@@ -10,7 +11,7 @@ def process_input(sentences, file_obj):
         sentences = sentences.strip().splitlines()
     sentences = [s.strip() for s in sentences if s.strip()]
     if not sentences:
-        return "请输入句子或上传文件", None, None, None, None
     labels, embeddings, scores = cluster_sentences(sentences)
     df = pd.DataFrame({
         "句子": sentences,
@@ -19,7 +20,16 @@ def process_input(sentences, file_obj):
     force_data = generate_force_graph(sentences, labels)
     bubble_data = generate_bubble_chart(sentences, labels)
     umap_data = generate_umap_plot(embeddings, labels)
-    return f"✅ Silhouette: {scores['silhouette']:.4f}, DB: {scores['db']:.4f}", df, force_data, bubble_data, umap_data
 with gr.Blocks(title="Text2Vec 语义聚类") as demo:
     gr.Markdown("## 🧠 中文句子语义聚类（HDBSCAN + BGE）")
@@ -31,11 +41,14 @@ with gr.Blocks(title="Text2Vec 语义聚类") as demo:
     df_output = gr.Dataframe(label="聚类结果", interactive=False)
     with gr.Tabs():
         with gr.Tab("力导图"):
-            force_plot = gr.JSON(label="Echarts 数据")
         with gr.Tab("气泡图"):
-            bubble_plot = gr.JSON(label="Echarts 数据")
         with gr.Tab("UMAP分布图"):
-            umap_plot = gr.JSON(label="Echarts 数据")
     csv_btn = gr.File(label="下载 CSV", visible=False)
     def export_csv(df):
@@ -43,7 +56,13 @@ with gr.Blocks(title="Text2Vec 语义聚类") as demo:
         return "cluster_result.csv"
     run_btn.click(process_input, inputs=[txt_input, file_input],
-                  outputs=[status, df_output, force_plot, bubble_plot, umap_plot])
     df_output.change(export_csv, inputs=[df_output], outputs=[csv_btn])
 demo.launch()

 from clustering_utils import cluster_sentences
 from viz_utils import generate_force_graph, generate_bubble_chart, generate_umap_plot
 import pandas as pd
+import json
 def process_input(sentences, file_obj):
     if file_obj:
         sentences = sentences.strip().splitlines()
     sentences = [s.strip() for s in sentences if s.strip()]
     if not sentences:
+        return "请输入句子或上传文件", None, None, None, None, None, None, None, None
     labels, embeddings, scores = cluster_sentences(sentences)
     df = pd.DataFrame({
         "句子": sentences,
     force_data = generate_force_graph(sentences, labels)
     bubble_data = generate_bubble_chart(sentences, labels)
     umap_data = generate_umap_plot(embeddings, labels)
+    return (
+        f"✅ Silhouette: {scores['silhouette']:.4f}, DB: {scores['db']:.4f}",
+        df,
+        force_data,
+        json.dumps(force_data, ensure_ascii=False, indent=2),
+        bubble_data,
+        json.dumps(bubble_data, ensure_ascii=False, indent=2),
+        umap_data,
+        json.dumps(umap_data, ensure_ascii=False, indent=2)
+    )
 with gr.Blocks(title="Text2Vec 语义聚类") as demo:
     gr.Markdown("## 🧠 中文句子语义聚类（HDBSCAN + BGE）")
     df_output = gr.Dataframe(label="聚类结果", interactive=False)
     with gr.Tabs():
         with gr.Tab("力导图"):
+            force_plot = gr.JSON(label="力导图结构化数据")
+            force_raw = gr.Textbox(label="力导图 JSON 原文", lines=20)
         with gr.Tab("气泡图"):
+            bubble_plot = gr.JSON(label="气泡图结构化数据")
+            bubble_raw = gr.Textbox(label="气泡图 JSON 原文", lines=20)
         with gr.Tab("UMAP分布图"):
+            umap_plot = gr.JSON(label="UMAP结构化数据")
+            umap_raw = gr.Textbox(label="UMAP JSON 原文", lines=20)
     csv_btn = gr.File(label="下载 CSV", visible=False)
     def export_csv(df):
         return "cluster_result.csv"
     run_btn.click(process_input, inputs=[txt_input, file_input],
+                  outputs=[
+                      status,
+                      df_output,
+                      force_plot, force_raw,
+                      bubble_plot, bubble_raw,
+                      umap_plot, umap_raw
+                  ])
     df_output.change(export_csv, inputs=[df_output], outputs=[csv_btn])
 demo.launch()