mgbam commited on
Commit
7d40c30
·
verified ·
1 Parent(s): 86695d8

Update ui/callbacks.py

Browse files
Files changed (1) hide show
  1. ui/callbacks.py +105 -34
ui/callbacks.py CHANGED
@@ -2,35 +2,50 @@
2
 
3
  # -*- coding: utf-8 -*-
4
  #
5
- # PROJECT: CognitiveEDA v5.0 - The QuantumLeap Intelligence Platform
6
  #
7
- # DESCRIPTION: This module now contains only the CORE LOGIC for the Gradio
8
- # event handlers. It exports these functions to be attached to
9
- # listeners within the main application context.
 
10
 
11
  import gradio as gr
12
  import pandas as pd
13
  import logging
14
  from threading import Thread
15
 
 
 
 
16
  from core.analyzer import DataAnalyzer
17
  from core.llm import GeminiNarrativeGenerator
18
  from core.config import settings
19
  from core.exceptions import DataProcessingError
20
  from modules.clustering import perform_clustering
21
- # ... other module imports
 
 
 
 
22
 
23
- # --- Main Analysis Logic ---
24
- def run_full_analysis(file_obj, progress=gr.Progress(track_tqdm=True)):
25
  """
26
- The primary orchestration function. This is the logic that will be called
27
- by the 'analyze_button.click' event.
 
 
 
 
 
 
 
 
28
  """
29
- # 1. Input Validation (File)
30
  if file_obj is None:
31
  raise gr.Error("No file uploaded. Please upload a CSV or Excel file.")
32
 
33
- # 2. Runtime Configuration Validation (API Key)
34
  progress(0, desc="Validating configuration...")
35
  if not settings.GOOGLE_API_KEY:
36
  logging.error("Analysis attempted without GOOGLE_API_KEY set.")
@@ -40,37 +55,47 @@ def run_full_analysis(file_obj, progress=gr.Progress(track_tqdm=True)):
40
  )
41
 
42
  try:
43
- # 3. Data Loading & Core Analysis
44
- progress(0.1, desc="Loading and parsing data...")
45
  df = pd.read_csv(file_obj.name) if file_obj.name.endswith('.csv') else pd.read_excel(file_obj.name)
46
  if len(df) > settings.MAX_UI_ROWS:
47
  df = df.sample(n=settings.MAX_UI_ROWS, random_state=42)
 
48
 
49
- progress(0.3, desc="Instantiating analysis engine...")
 
50
  analyzer = DataAnalyzer(df)
51
- return analyzer # We will return the analyzer and handle the rest in a subsequent step
 
52
 
53
  except DataProcessingError as e:
54
  logging.error(f"User-facing data processing error: {e}", exc_info=True)
55
  raise gr.Error(str(e))
56
  except Exception as e:
57
- logging.error(f"A critical unhandled error occurred: {e}", exc_info=True)
58
  raise gr.Error(f"Analysis Failed! An unexpected error occurred: {str(e)}")
59
 
60
 
61
  def generate_reports_and_visuals(analyzer, progress=gr.Progress(track_tqdm=True)):
62
  """
63
- A generator function that yields UI updates. Triggered after the analyzer is created.
 
 
 
 
 
 
 
 
64
  """
65
- if not analyzer:
66
- # This prevents errors if the initial analysis failed.
67
- # Create an empty dictionary that matches the structure of `updates`
68
- # so Gradio has something to unpack.
69
- return { "state_analyzer": None }
70
-
71
- # 1. Start AI thread
72
- progress(0.1, desc="Spawning AI report thread...")
73
- ai_report_queue = [""]
74
  def generate_ai_report_threaded(analyzer_instance):
75
  narrative_generator = GeminiNarrativeGenerator(api_key=settings.GOOGLE_API_KEY)
76
  ai_report_queue[0] = narrative_generator.generate_narrative(analyzer_instance)
@@ -78,16 +103,16 @@ def generate_reports_and_visuals(analyzer, progress=gr.Progress(track_tqdm=True)
78
  thread = Thread(target=generate_ai_report_threaded, args=(analyzer,))
79
  thread.start()
80
 
81
- # 2. Generate standard reports
82
- progress(0.4, desc="Generating data profiles...")
83
  meta = analyzer.metadata
84
  missing_df, num_df, cat_df = analyzer.get_profiling_reports()
85
  fig_types, fig_missing, fig_corr = analyzer.get_overview_visuals()
86
 
87
- # 3. Yield initial updates
88
  progress(0.8, desc="Building initial dashboard...")
89
  initial_updates = {
90
- "ai_report_output": gr.update(value="⏳ Generating AI report... Main dashboard is ready."),
91
  "profile_missing_df": gr.update(value=missing_df),
92
  "profile_numeric_df": gr.update(value=num_df),
93
  "profile_categorical_df": gr.update(value=cat_df),
@@ -104,15 +129,61 @@ def generate_reports_and_visuals(analyzer, progress=gr.Progress(track_tqdm=True)
104
  }
105
  yield initial_updates
106
 
107
- # 4. Wait for thread and yield final AI report
108
  thread.join()
109
  progress(1.0, desc="AI Report complete!")
 
 
110
  final_updates = initial_updates.copy()
111
  final_updates["ai_report_output"] = ai_report_queue[0]
112
  yield final_updates
113
 
114
- # --- Other Interactive Callback Logic ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  def update_clustering(analyzer, k):
116
- if not analyzer: return gr.update(), gr.update(), gr.update()
 
 
 
 
117
  fig_cluster, fig_elbow, summary = perform_clustering(analyzer.df, analyzer.metadata['numeric_cols'], k)
118
- return fig_cluster, fig_elbow, summary
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  # -*- coding: utf-8 -*-
4
  #
5
+ # PROJECT: CognitiveEDA v5.2 - The QuantumLeap Intelligence Platform
6
  #
7
+ # DESCRIPTION: The "Controller" logic of the application. This module contains
8
+ # the Python functions that execute when Gradio events are triggered.
9
+ # It is designed to be completely decoupled from the UI definition
10
+ # and event attachment process.
11
 
12
  import gradio as gr
13
  import pandas as pd
14
  import logging
15
  from threading import Thread
16
 
17
+ import plotly.graph_objects as go
18
+ import plotly.express as px
19
+
20
  from core.analyzer import DataAnalyzer
21
  from core.llm import GeminiNarrativeGenerator
22
  from core.config import settings
23
  from core.exceptions import DataProcessingError
24
  from modules.clustering import perform_clustering
25
+ from modules.text import generate_word_cloud
26
+ from modules.timeseries import analyze_time_series
27
+
28
+
29
+ # --- Primary Analysis Chain ---
30
 
31
+ def run_initial_analysis(file_obj, progress=gr.Progress(track_tqdm=True)):
 
32
  """
33
+ Phase 1 of the analysis: Fast, synchronous tasks.
34
+ Validates inputs, loads data, and creates the core DataAnalyzer object.
35
+
36
+ Args:
37
+ file_obj: The uploaded file object from Gradio.
38
+ progress: The Gradio progress tracker.
39
+
40
+ Returns:
41
+ The instantiated DataAnalyzer object, which will update the gr.State.
42
+ Returns None if any validation or processing fails.
43
  """
44
+ # 1. Input Validation
45
  if file_obj is None:
46
  raise gr.Error("No file uploaded. Please upload a CSV or Excel file.")
47
 
48
+ # 2. Runtime Configuration Validation
49
  progress(0, desc="Validating configuration...")
50
  if not settings.GOOGLE_API_KEY:
51
  logging.error("Analysis attempted without GOOGLE_API_KEY set.")
 
55
  )
56
 
57
  try:
58
+ # 3. Data Loading
59
+ progress(0.2, desc="Loading and parsing data file...")
60
  df = pd.read_csv(file_obj.name) if file_obj.name.endswith('.csv') else pd.read_excel(file_obj.name)
61
  if len(df) > settings.MAX_UI_ROWS:
62
  df = df.sample(n=settings.MAX_UI_ROWS, random_state=42)
63
+ logging.info(f"DataFrame sampled down to {settings.MAX_UI_ROWS} rows.")
64
 
65
+ # 4. Core Analyzer Instantiation
66
+ progress(0.7, desc="Instantiating analysis engine...")
67
  analyzer = DataAnalyzer(df)
68
+ progress(1.0, desc="Initial analysis complete.")
69
+ return analyzer
70
 
71
  except DataProcessingError as e:
72
  logging.error(f"User-facing data processing error: {e}", exc_info=True)
73
  raise gr.Error(str(e))
74
  except Exception as e:
75
+ logging.error(f"A critical unhandled error occurred during initial analysis: {e}", exc_info=True)
76
  raise gr.Error(f"Analysis Failed! An unexpected error occurred: {str(e)}")
77
 
78
 
79
  def generate_reports_and_visuals(analyzer, progress=gr.Progress(track_tqdm=True)):
80
  """
81
+ Phase 2 of the analysis: Slower, multi-stage tasks.
82
+ This generator function yields UI updates as they become available.
83
+
84
+ Args:
85
+ analyzer: The DataAnalyzer object from the gr.State.
86
+ progress: The Gradio progress tracker.
87
+
88
+ Yields:
89
+ A dictionary of Gradio updates to populate the dashboard.
90
  """
91
+ # Guard clause: Do nothing if the initial analysis failed.
92
+ if not isinstance(analyzer, DataAnalyzer):
93
+ logging.warning("generate_reports_and_visuals called without a valid analyzer. Aborting.")
94
+ return {}
95
+
96
+ # 1. Start AI narrative generation in a background thread
97
+ progress(0, desc="Spawning AI report thread...")
98
+ ai_report_queue = [""] # Use a mutable list to pass string by reference
 
99
  def generate_ai_report_threaded(analyzer_instance):
100
  narrative_generator = GeminiNarrativeGenerator(api_key=settings.GOOGLE_API_KEY)
101
  ai_report_queue[0] = narrative_generator.generate_narrative(analyzer_instance)
 
103
  thread = Thread(target=generate_ai_report_threaded, args=(analyzer,))
104
  thread.start()
105
 
106
+ # 2. Generate standard reports and visuals (this is fast)
107
+ progress(0.4, desc="Generating data profiles and visuals...")
108
  meta = analyzer.metadata
109
  missing_df, num_df, cat_df = analyzer.get_profiling_reports()
110
  fig_types, fig_missing, fig_corr = analyzer.get_overview_visuals()
111
 
112
+ # 3. Yield the first set of updates to populate the main dashboard immediately
113
  progress(0.8, desc="Building initial dashboard...")
114
  initial_updates = {
115
+ "ai_report_output": gr.update(value="⏳ Generating AI-powered report in the background... The main dashboard is ready now."),
116
  "profile_missing_df": gr.update(value=missing_df),
117
  "profile_numeric_df": gr.update(value=num_df),
118
  "profile_categorical_df": gr.update(value=cat_df),
 
129
  }
130
  yield initial_updates
131
 
132
+ # 4. Wait for the AI thread to complete
133
  thread.join()
134
  progress(1.0, desc="AI Report complete!")
135
+
136
+ # 5. Yield the final update, now including the AI-generated report
137
  final_updates = initial_updates.copy()
138
  final_updates["ai_report_output"] = ai_report_queue[0]
139
  yield final_updates
140
 
141
+
142
+ # --- Interactive Explorer Callbacks ---
143
+
144
+ def create_histogram(analyzer, col):
145
+ """Generates a histogram for a selected numeric column."""
146
+ if not isinstance(analyzer, DataAnalyzer) or not col:
147
+ return go.Figure().update_layout(title="Select a column to generate a histogram")
148
+ return px.histogram(analyzer.df, x=col, title=f"<b>Distribution of {col}</b>", marginal="box", template="plotly_white")
149
+
150
+ def create_scatterplot(analyzer, x_col, y_col, color_col):
151
+ """Generates a scatter plot for selected X, Y, and optional color columns."""
152
+ if not isinstance(analyzer, DataAnalyzer) or not x_col or not y_col:
153
+ return go.Figure().update_layout(title="Select X and Y axes to generate a scatter plot")
154
+
155
+ # Use a subset for performance on large datasets
156
+ df_sample = analyzer.df
157
+ if len(analyzer.df) > 10000:
158
+ df_sample = analyzer.df.sample(n=10000, random_state=42)
159
+
160
+ return px.scatter(
161
+ df_sample, x=x_col, y=y_col, color=color_col if color_col else None,
162
+ title=f"<b>Scatter Plot: {x_col} vs. {y_col}</b>", template="plotly_white"
163
+ )
164
+
165
+
166
+ # --- Specialized Module Callbacks ---
167
+
168
  def update_clustering(analyzer, k):
169
+ """Callback for the clustering module."""
170
+ if not isinstance(analyzer, DataAnalyzer):
171
+ return gr.update(), gr.update(), gr.update(value="Run analysis first.")
172
+
173
+ # Delegate the heavy lifting to the specialized module
174
  fig_cluster, fig_elbow, summary = perform_clustering(analyzer.df, analyzer.metadata['numeric_cols'], k)
175
+ return fig_cluster, fig_elbow, summary
176
+
177
+ # Add other specialized callbacks for text and time-series here if needed.
178
+ # For example, if you add the dropdowns and plots to the layout:
179
+ #
180
+ # def update_timeseries(analyzer, date_col, value_col):
181
+ # if not isinstance(analyzer, DataAnalyzer):
182
+ # return gr.update(), gr.update(value="Run analysis first.")
183
+ # fig, md = analyze_time_series(analyzer.df, date_col, value_col)
184
+ # return fig, md
185
+ #
186
+ # def update_text(analyzer, text_col):
187
+ # if not isinstance(analyzer, DataAnalyzer):
188
+ # return gr.update()
189
+ # return generate_word_cloud(analyzer.df, text_col)