Spaces:
Sleeping
Sleeping
File size: 5,215 Bytes
d1943e0 12fa967 fcc261b 12fa967 d1943e0 12fa967 d1943e0 fcc261b d1943e0 fcc261b 12fa967 fcc261b 12fa967 fcc261b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
# ui/callbacks.py
# -*- coding: utf-8 -*-
#
# PROJECT: CognitiveEDA v5.0 - The QuantumLeap Intelligence Platform
#
# DESCRIPTION: This module now contains only the CORE LOGIC for the Gradio
# event handlers. It exports these functions to be attached to
# listeners within the main application context.
import gradio as gr
import pandas as pd
import logging
from threading import Thread
from core.analyzer import DataAnalyzer
from core.llm import GeminiNarrativeGenerator
from core.config import settings
from core.exceptions import DataProcessingError
from modules.clustering import perform_clustering
# ... other module imports
# --- Main Analysis Logic ---
def run_full_analysis(file_obj, progress=gr.Progress(track_tqdm=True)):
"""
The primary orchestration function. This is the logic that will be called
by the 'analyze_button.click' event.
"""
# 1. Input Validation (File)
if file_obj is None:
raise gr.Error("No file uploaded. Please upload a CSV or Excel file.")
# 2. Runtime Configuration Validation (API Key)
progress(0, desc="Validating configuration...")
if not settings.GOOGLE_API_KEY:
logging.error("Analysis attempted without GOOGLE_API_KEY set.")
raise gr.Error(
"CRITICAL: GOOGLE_API_KEY is not configured. "
"Please add it to your .env file or as a platform secret and restart."
)
try:
# 3. Data Loading & Core Analysis
progress(0.1, desc="Loading and parsing data...")
df = pd.read_csv(file_obj.name) if file_obj.name.endswith('.csv') else pd.read_excel(file_obj.name)
if len(df) > settings.MAX_UI_ROWS:
df = df.sample(n=settings.MAX_UI_ROWS, random_state=42)
progress(0.3, desc="Instantiating analysis engine...")
analyzer = DataAnalyzer(df)
return analyzer # We will return the analyzer and handle the rest in a subsequent step
except DataProcessingError as e:
logging.error(f"User-facing data processing error: {e}", exc_info=True)
raise gr.Error(str(e))
except Exception as e:
logging.error(f"A critical unhandled error occurred: {e}", exc_info=True)
raise gr.Error(f"Analysis Failed! An unexpected error occurred: {str(e)}")
def generate_reports_and_visuals(analyzer, progress=gr.Progress(track_tqdm=True)):
"""
A generator function that yields UI updates. Triggered after the analyzer is created.
"""
if not analyzer:
# This prevents errors if the initial analysis failed.
# Create an empty dictionary that matches the structure of `updates`
# so Gradio has something to unpack.
return { "state_analyzer": None }
# 1. Start AI thread
progress(0.1, desc="Spawning AI report thread...")
ai_report_queue = [""]
def generate_ai_report_threaded(analyzer_instance):
narrative_generator = GeminiNarrativeGenerator(api_key=settings.GOOGLE_API_KEY)
ai_report_queue[0] = narrative_generator.generate_narrative(analyzer_instance)
thread = Thread(target=generate_ai_report_threaded, args=(analyzer,))
thread.start()
# 2. Generate standard reports
progress(0.4, desc="Generating data profiles...")
meta = analyzer.metadata
missing_df, num_df, cat_df = analyzer.get_profiling_reports()
fig_types, fig_missing, fig_corr = analyzer.get_overview_visuals()
# 3. Yield initial updates
progress(0.8, desc="Building initial dashboard...")
initial_updates = {
"ai_report_output": gr.update(value="⏳ Generating AI report... Main dashboard is ready."),
"profile_missing_df": gr.update(value=missing_df),
"profile_numeric_df": gr.update(value=num_df),
"profile_categorical_df": gr.update(value=cat_df),
"plot_types": gr.update(value=fig_types),
"plot_missing": gr.update(value=fig_missing),
"plot_correlation": gr.update(value=fig_corr),
"dd_hist_col": gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][0] if meta['numeric_cols'] else None),
"dd_scatter_x": gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][0] if meta['numeric_cols'] else None),
"dd_scatter_y": gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][1] if len(meta['numeric_cols']) > 1 else None),
"dd_scatter_color": gr.update(choices=meta['columns']),
"tab_timeseries": gr.update(visible=bool(meta['datetime_cols'])),
"tab_text": gr.update(visible=bool(meta['text_cols'])),
"tab_cluster": gr.update(visible=len(meta['numeric_cols']) > 1),
}
yield initial_updates
# 4. Wait for thread and yield final AI report
thread.join()
progress(1.0, desc="AI Report complete!")
final_updates = initial_updates.copy()
final_updates["ai_report_output"] = ai_report_queue[0]
yield final_updates
# --- Other Interactive Callback Logic ---
def update_clustering(analyzer, k):
if not analyzer: return gr.update(), gr.update(), gr.update()
fig_cluster, fig_elbow, summary = perform_clustering(analyzer.df, analyzer.metadata['numeric_cols'], k)
return fig_cluster, fig_elbow, summary |