Spaces:
Sleeping
Sleeping
Update ui/callbacks.py
Browse files- ui/callbacks.py +105 -34
ui/callbacks.py
CHANGED
@@ -2,35 +2,50 @@
|
|
2 |
|
3 |
# -*- coding: utf-8 -*-
|
4 |
#
|
5 |
-
# PROJECT: CognitiveEDA v5.
|
6 |
#
|
7 |
-
# DESCRIPTION:
|
8 |
-
#
|
9 |
-
#
|
|
|
10 |
|
11 |
import gradio as gr
|
12 |
import pandas as pd
|
13 |
import logging
|
14 |
from threading import Thread
|
15 |
|
|
|
|
|
|
|
16 |
from core.analyzer import DataAnalyzer
|
17 |
from core.llm import GeminiNarrativeGenerator
|
18 |
from core.config import settings
|
19 |
from core.exceptions import DataProcessingError
|
20 |
from modules.clustering import perform_clustering
|
21 |
-
|
|
|
|
|
|
|
|
|
22 |
|
23 |
-
|
24 |
-
def run_full_analysis(file_obj, progress=gr.Progress(track_tqdm=True)):
|
25 |
"""
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
"""
|
29 |
-
# 1. Input Validation
|
30 |
if file_obj is None:
|
31 |
raise gr.Error("No file uploaded. Please upload a CSV or Excel file.")
|
32 |
|
33 |
-
# 2. Runtime Configuration Validation
|
34 |
progress(0, desc="Validating configuration...")
|
35 |
if not settings.GOOGLE_API_KEY:
|
36 |
logging.error("Analysis attempted without GOOGLE_API_KEY set.")
|
@@ -40,37 +55,47 @@ def run_full_analysis(file_obj, progress=gr.Progress(track_tqdm=True)):
|
|
40 |
)
|
41 |
|
42 |
try:
|
43 |
-
# 3. Data Loading
|
44 |
-
progress(0.
|
45 |
df = pd.read_csv(file_obj.name) if file_obj.name.endswith('.csv') else pd.read_excel(file_obj.name)
|
46 |
if len(df) > settings.MAX_UI_ROWS:
|
47 |
df = df.sample(n=settings.MAX_UI_ROWS, random_state=42)
|
|
|
48 |
|
49 |
-
|
|
|
50 |
analyzer = DataAnalyzer(df)
|
51 |
-
|
|
|
52 |
|
53 |
except DataProcessingError as e:
|
54 |
logging.error(f"User-facing data processing error: {e}", exc_info=True)
|
55 |
raise gr.Error(str(e))
|
56 |
except Exception as e:
|
57 |
-
logging.error(f"A critical unhandled error occurred: {e}", exc_info=True)
|
58 |
raise gr.Error(f"Analysis Failed! An unexpected error occurred: {str(e)}")
|
59 |
|
60 |
|
61 |
def generate_reports_and_visuals(analyzer, progress=gr.Progress(track_tqdm=True)):
|
62 |
"""
|
63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
"""
|
65 |
-
if
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
ai_report_queue = [""]
|
74 |
def generate_ai_report_threaded(analyzer_instance):
|
75 |
narrative_generator = GeminiNarrativeGenerator(api_key=settings.GOOGLE_API_KEY)
|
76 |
ai_report_queue[0] = narrative_generator.generate_narrative(analyzer_instance)
|
@@ -78,16 +103,16 @@ def generate_reports_and_visuals(analyzer, progress=gr.Progress(track_tqdm=True)
|
|
78 |
thread = Thread(target=generate_ai_report_threaded, args=(analyzer,))
|
79 |
thread.start()
|
80 |
|
81 |
-
# 2. Generate standard reports
|
82 |
-
progress(0.4, desc="Generating data profiles...")
|
83 |
meta = analyzer.metadata
|
84 |
missing_df, num_df, cat_df = analyzer.get_profiling_reports()
|
85 |
fig_types, fig_missing, fig_corr = analyzer.get_overview_visuals()
|
86 |
|
87 |
-
# 3. Yield
|
88 |
progress(0.8, desc="Building initial dashboard...")
|
89 |
initial_updates = {
|
90 |
-
"ai_report_output": gr.update(value="⏳ Generating AI report...
|
91 |
"profile_missing_df": gr.update(value=missing_df),
|
92 |
"profile_numeric_df": gr.update(value=num_df),
|
93 |
"profile_categorical_df": gr.update(value=cat_df),
|
@@ -104,15 +129,61 @@ def generate_reports_and_visuals(analyzer, progress=gr.Progress(track_tqdm=True)
|
|
104 |
}
|
105 |
yield initial_updates
|
106 |
|
107 |
-
# 4. Wait for
|
108 |
thread.join()
|
109 |
progress(1.0, desc="AI Report complete!")
|
|
|
|
|
110 |
final_updates = initial_updates.copy()
|
111 |
final_updates["ai_report_output"] = ai_report_queue[0]
|
112 |
yield final_updates
|
113 |
|
114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
def update_clustering(analyzer, k):
|
116 |
-
|
|
|
|
|
|
|
|
|
117 |
fig_cluster, fig_elbow, summary = perform_clustering(analyzer.df, analyzer.metadata['numeric_cols'], k)
|
118 |
-
return fig_cluster, fig_elbow, summary
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
# -*- coding: utf-8 -*-
|
4 |
#
|
5 |
+
# PROJECT: CognitiveEDA v5.2 - The QuantumLeap Intelligence Platform
|
6 |
#
|
7 |
+
# DESCRIPTION: The "Controller" logic of the application. This module contains
|
8 |
+
# the Python functions that execute when Gradio events are triggered.
|
9 |
+
# It is designed to be completely decoupled from the UI definition
|
10 |
+
# and event attachment process.
|
11 |
|
12 |
import gradio as gr
|
13 |
import pandas as pd
|
14 |
import logging
|
15 |
from threading import Thread
|
16 |
|
17 |
+
import plotly.graph_objects as go
|
18 |
+
import plotly.express as px
|
19 |
+
|
20 |
from core.analyzer import DataAnalyzer
|
21 |
from core.llm import GeminiNarrativeGenerator
|
22 |
from core.config import settings
|
23 |
from core.exceptions import DataProcessingError
|
24 |
from modules.clustering import perform_clustering
|
25 |
+
from modules.text import generate_word_cloud
|
26 |
+
from modules.timeseries import analyze_time_series
|
27 |
+
|
28 |
+
|
29 |
+
# --- Primary Analysis Chain ---
|
30 |
|
31 |
+
def run_initial_analysis(file_obj, progress=gr.Progress(track_tqdm=True)):
|
|
|
32 |
"""
|
33 |
+
Phase 1 of the analysis: Fast, synchronous tasks.
|
34 |
+
Validates inputs, loads data, and creates the core DataAnalyzer object.
|
35 |
+
|
36 |
+
Args:
|
37 |
+
file_obj: The uploaded file object from Gradio.
|
38 |
+
progress: The Gradio progress tracker.
|
39 |
+
|
40 |
+
Returns:
|
41 |
+
The instantiated DataAnalyzer object, which will update the gr.State.
|
42 |
+
Returns None if any validation or processing fails.
|
43 |
"""
|
44 |
+
# 1. Input Validation
|
45 |
if file_obj is None:
|
46 |
raise gr.Error("No file uploaded. Please upload a CSV or Excel file.")
|
47 |
|
48 |
+
# 2. Runtime Configuration Validation
|
49 |
progress(0, desc="Validating configuration...")
|
50 |
if not settings.GOOGLE_API_KEY:
|
51 |
logging.error("Analysis attempted without GOOGLE_API_KEY set.")
|
|
|
55 |
)
|
56 |
|
57 |
try:
|
58 |
+
# 3. Data Loading
|
59 |
+
progress(0.2, desc="Loading and parsing data file...")
|
60 |
df = pd.read_csv(file_obj.name) if file_obj.name.endswith('.csv') else pd.read_excel(file_obj.name)
|
61 |
if len(df) > settings.MAX_UI_ROWS:
|
62 |
df = df.sample(n=settings.MAX_UI_ROWS, random_state=42)
|
63 |
+
logging.info(f"DataFrame sampled down to {settings.MAX_UI_ROWS} rows.")
|
64 |
|
65 |
+
# 4. Core Analyzer Instantiation
|
66 |
+
progress(0.7, desc="Instantiating analysis engine...")
|
67 |
analyzer = DataAnalyzer(df)
|
68 |
+
progress(1.0, desc="Initial analysis complete.")
|
69 |
+
return analyzer
|
70 |
|
71 |
except DataProcessingError as e:
|
72 |
logging.error(f"User-facing data processing error: {e}", exc_info=True)
|
73 |
raise gr.Error(str(e))
|
74 |
except Exception as e:
|
75 |
+
logging.error(f"A critical unhandled error occurred during initial analysis: {e}", exc_info=True)
|
76 |
raise gr.Error(f"Analysis Failed! An unexpected error occurred: {str(e)}")
|
77 |
|
78 |
|
79 |
def generate_reports_and_visuals(analyzer, progress=gr.Progress(track_tqdm=True)):
|
80 |
"""
|
81 |
+
Phase 2 of the analysis: Slower, multi-stage tasks.
|
82 |
+
This generator function yields UI updates as they become available.
|
83 |
+
|
84 |
+
Args:
|
85 |
+
analyzer: The DataAnalyzer object from the gr.State.
|
86 |
+
progress: The Gradio progress tracker.
|
87 |
+
|
88 |
+
Yields:
|
89 |
+
A dictionary of Gradio updates to populate the dashboard.
|
90 |
"""
|
91 |
+
# Guard clause: Do nothing if the initial analysis failed.
|
92 |
+
if not isinstance(analyzer, DataAnalyzer):
|
93 |
+
logging.warning("generate_reports_and_visuals called without a valid analyzer. Aborting.")
|
94 |
+
return {}
|
95 |
+
|
96 |
+
# 1. Start AI narrative generation in a background thread
|
97 |
+
progress(0, desc="Spawning AI report thread...")
|
98 |
+
ai_report_queue = [""] # Use a mutable list to pass string by reference
|
|
|
99 |
def generate_ai_report_threaded(analyzer_instance):
|
100 |
narrative_generator = GeminiNarrativeGenerator(api_key=settings.GOOGLE_API_KEY)
|
101 |
ai_report_queue[0] = narrative_generator.generate_narrative(analyzer_instance)
|
|
|
103 |
thread = Thread(target=generate_ai_report_threaded, args=(analyzer,))
|
104 |
thread.start()
|
105 |
|
106 |
+
# 2. Generate standard reports and visuals (this is fast)
|
107 |
+
progress(0.4, desc="Generating data profiles and visuals...")
|
108 |
meta = analyzer.metadata
|
109 |
missing_df, num_df, cat_df = analyzer.get_profiling_reports()
|
110 |
fig_types, fig_missing, fig_corr = analyzer.get_overview_visuals()
|
111 |
|
112 |
+
# 3. Yield the first set of updates to populate the main dashboard immediately
|
113 |
progress(0.8, desc="Building initial dashboard...")
|
114 |
initial_updates = {
|
115 |
+
"ai_report_output": gr.update(value="⏳ Generating AI-powered report in the background... The main dashboard is ready now."),
|
116 |
"profile_missing_df": gr.update(value=missing_df),
|
117 |
"profile_numeric_df": gr.update(value=num_df),
|
118 |
"profile_categorical_df": gr.update(value=cat_df),
|
|
|
129 |
}
|
130 |
yield initial_updates
|
131 |
|
132 |
+
# 4. Wait for the AI thread to complete
|
133 |
thread.join()
|
134 |
progress(1.0, desc="AI Report complete!")
|
135 |
+
|
136 |
+
# 5. Yield the final update, now including the AI-generated report
|
137 |
final_updates = initial_updates.copy()
|
138 |
final_updates["ai_report_output"] = ai_report_queue[0]
|
139 |
yield final_updates
|
140 |
|
141 |
+
|
142 |
+
# --- Interactive Explorer Callbacks ---
|
143 |
+
|
144 |
+
def create_histogram(analyzer, col):
|
145 |
+
"""Generates a histogram for a selected numeric column."""
|
146 |
+
if not isinstance(analyzer, DataAnalyzer) or not col:
|
147 |
+
return go.Figure().update_layout(title="Select a column to generate a histogram")
|
148 |
+
return px.histogram(analyzer.df, x=col, title=f"<b>Distribution of {col}</b>", marginal="box", template="plotly_white")
|
149 |
+
|
150 |
+
def create_scatterplot(analyzer, x_col, y_col, color_col):
|
151 |
+
"""Generates a scatter plot for selected X, Y, and optional color columns."""
|
152 |
+
if not isinstance(analyzer, DataAnalyzer) or not x_col or not y_col:
|
153 |
+
return go.Figure().update_layout(title="Select X and Y axes to generate a scatter plot")
|
154 |
+
|
155 |
+
# Use a subset for performance on large datasets
|
156 |
+
df_sample = analyzer.df
|
157 |
+
if len(analyzer.df) > 10000:
|
158 |
+
df_sample = analyzer.df.sample(n=10000, random_state=42)
|
159 |
+
|
160 |
+
return px.scatter(
|
161 |
+
df_sample, x=x_col, y=y_col, color=color_col if color_col else None,
|
162 |
+
title=f"<b>Scatter Plot: {x_col} vs. {y_col}</b>", template="plotly_white"
|
163 |
+
)
|
164 |
+
|
165 |
+
|
166 |
+
# --- Specialized Module Callbacks ---
|
167 |
+
|
168 |
def update_clustering(analyzer, k):
|
169 |
+
"""Callback for the clustering module."""
|
170 |
+
if not isinstance(analyzer, DataAnalyzer):
|
171 |
+
return gr.update(), gr.update(), gr.update(value="Run analysis first.")
|
172 |
+
|
173 |
+
# Delegate the heavy lifting to the specialized module
|
174 |
fig_cluster, fig_elbow, summary = perform_clustering(analyzer.df, analyzer.metadata['numeric_cols'], k)
|
175 |
+
return fig_cluster, fig_elbow, summary
|
176 |
+
|
177 |
+
# Add other specialized callbacks for text and time-series here if needed.
|
178 |
+
# For example, if you add the dropdowns and plots to the layout:
|
179 |
+
#
|
180 |
+
# def update_timeseries(analyzer, date_col, value_col):
|
181 |
+
# if not isinstance(analyzer, DataAnalyzer):
|
182 |
+
# return gr.update(), gr.update(value="Run analysis first.")
|
183 |
+
# fig, md = analyze_time_series(analyzer.df, date_col, value_col)
|
184 |
+
# return fig, md
|
185 |
+
#
|
186 |
+
# def update_text(analyzer, text_col):
|
187 |
+
# if not isinstance(analyzer, DataAnalyzer):
|
188 |
+
# return gr.update()
|
189 |
+
# return generate_word_cloud(analyzer.df, text_col)
|