Spaces:

NTU-Peak-2
/

Singtel_Use_Case1

Runtime error

File size: 8,314 Bytes

5269c7e

import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from smolagents import CodeAgent, DuckDuckGoSearchTool, PythonCodeTool
from smolagents.models import OpenAIServerModel
import io
import base64
from PIL import Image

# Configure the CSV file path
CSV_FILE_PATH = "C:/Users/Cosmo/Desktop/NTU Peak Singtel/outsystems_sample_logs_6months.csv"

class DataAnalysisAgent:
    def __init__(self):
        """Initialize the data analysis agent with SmoLagent"""
        # Initialize tools
        self.python_tool = PythonCodeTool()
        self.search_tool = DuckDuckGoSearchTool()
        
        # Note: You'll need to set up your LLM model here
        # For this example, I'm using a placeholder - replace with your actual model
        try:
            # Replace with your actual model configuration
            # model = OpenAIServerModel(model_id="gpt-4", api_key="your-api-key")
            # self.agent = CodeAgent(tools=[self.python_tool, self.search_tool], model=model)
            pass
        except:
            self.agent = None
        
        self.df = None
        self.load_data()
    
    def load_data(self):
        """Load the CSV data"""
        try:
            self.df = pd.read_csv(CSV_FILE_PATH)
            return f"Data loaded successfully! Shape: {self.df.shape}"
        except Exception as e:
            return f"Error loading data: {str(e)}"
    
    def get_data_overview(self):
        """Get basic overview of the dataset"""
        if self.df is None:
            return "No data loaded"
        
        overview = {
            "shape": self.df.shape,
            "columns": list(self.df.columns),
            "dtypes": self.df.dtypes.to_dict(),
            "missing_values": self.df.isnull().sum().to_dict(),
            "memory_usage": f"{self.df.memory_usage(deep=True).sum() / 1024**2:.2f} MB"
        }
        
        return overview
    
    def generate_basic_stats(self):
        """Generate basic statistical summary"""
        if self.df is None:
            return "No data loaded"
        
        return self.df.describe(include='all').to_html()
    
    def create_correlation_heatmap(self):
        """Create correlation heatmap for numerical columns"""
        if self.df is None:
            return None
        
        numeric_cols = self.df.select_dtypes(include=[np.number]).columns
        if len(numeric_cols) < 2:
            return "Not enough numerical columns for correlation analysis"
        
        plt.figure(figsize=(12, 8))
        correlation_matrix = self.df[numeric_cols].corr()
        sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
        plt.title('Correlation Heatmap')
        plt.tight_layout()
        
        # Save plot to bytes
        img_buffer = io.BytesIO()
        plt.savefig(img_buffer, format='png', dpi=300, bbox_inches='tight')
        img_buffer.seek(0)
        plt.close()
        
        return img_buffer
    
    def create_distribution_plots(self):
        """Create distribution plots for numerical columns"""
        if self.df is None:
            return None
        
        numeric_cols = self.df.select_dtypes(include=[np.number]).columns
        if len(numeric_cols) == 0:
            return "No numerical columns found"
        
        n_cols = min(3, len(numeric_cols))
        n_rows = (len(numeric_cols) + n_cols - 1) // n_cols
        
        fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 5*n_rows))
        if n_rows == 1 and n_cols == 1:
            axes = [axes]
        elif n_rows == 1 or n_cols == 1:
            axes = axes.flatten()
        else:
            axes = axes.flatten()
        
        for i, col in enumerate(numeric_cols):
            if i < len(axes):
                self.df[col].hist(bins=30, ax=axes[i], alpha=0.7)
                axes[i].set_title(f'Distribution of {col}')
                axes[i].set_xlabel(col)
                axes[i].set_ylabel('Frequency')
        
        # Hide empty subplots
        for i in range(len(numeric_cols), len(axes)):
            axes[i].set_visible(False)
        
        plt.tight_layout()
        
        img_buffer = io.BytesIO()
        plt.savefig(img_buffer, format='png', dpi=300, bbox_inches='tight')
        img_buffer.seek(0)
        plt.close()
        
        return img_buffer
    
    def analyze_with_smolagent(self, query):
        """Use SmoLagent to analyze data based on user query"""
        if self.agent is None:
            return "SmoLagent not configured. Please set up your LLM model."
        
        # Prepare context about the dataset
        data_context = f"""
        Dataset shape: {self.df.shape}
        Columns: {list(self.df.columns)}
        Data types: {self.df.dtypes.to_dict()}
        First few rows: {self.df.head().to_string()}
        """
        
        prompt = f"""
        You have access to a pandas DataFrame with the following information:
        {data_context}
        
        User query: {query}
        
        Please analyze the data and provide insights. Use the PythonCodeTool to write and execute code for analysis.
        """
        
        try:
            response = self.agent.run(prompt)
            return response
        except Exception as e:
            return f"Error in SmoLagent analysis: {str(e)}"

# Initialize the agent
data_agent = DataAnalysisAgent()

def analyze_data_overview():
    """Gradio function for data overview"""
    overview = data_agent.get_data_overview()
    return str(overview)

def generate_statistics():
    """Gradio function for basic statistics"""
    return data_agent.generate_basic_stats()

def create_correlation_plot():
    """Gradio function for correlation heatmap"""
    img_buffer = data_agent.create_correlation_heatmap()
    if isinstance(img_buffer, str):
        return None
    return Image.open(img_buffer)

def create_distribution_plot():
    """Gradio function for distribution plots"""
    img_buffer = data_agent.create_distribution_plots()
    if isinstance(img_buffer, str):
        return None
    return Image.open(img_buffer)

def smolagent_analysis(query):
    """Gradio function for SmoLagent analysis"""
    return data_agent.analyze_with_smolagent(query)

# Create Gradio interface
with gr.Blocks(title="AI Data Analysis with SmoLagent") as demo:
    gr.Markdown("# AI Data Analysis Dashboard")
    gr.Markdown("Analyze your CSV data using AI-powered insights with SmoLagent")
    
    with gr.Tab("Data Overview"):
        gr.Markdown("## Dataset Overview")
        overview_btn = gr.Button("Get Data Overview")
        overview_output = gr.Textbox(label="Dataset Information", lines=10)
        overview_btn.click(analyze_data_overview, outputs=overview_output)
    
    with gr.Tab("Basic Statistics"):
        gr.Markdown("## Statistical Summary")
        stats_btn = gr.Button("Generate Statistics")
        stats_output = gr.HTML(label="Statistical Summary")
        stats_btn.click(generate_statistics, outputs=stats_output)
    
    with gr.Tab("Visualizations"):
        gr.Markdown("## Data Visualizations")
        
        with gr.Row():
            corr_btn = gr.Button("Generate Correlation Heatmap")
            dist_btn = gr.Button("Generate Distribution Plots")
        
        with gr.Row():
            corr_plot = gr.Image(label="Correlation Heatmap")
            dist_plot = gr.Image(label="Distribution Plots")
        
        corr_btn.click(create_correlation_plot, outputs=corr_plot)
        dist_btn.click(create_distribution_plot, outputs=dist_plot)
    
    with gr.Tab("AI Analysis"):
        gr.Markdown("## SmoLagent AI Analysis")
        gr.Markdown("Ask questions about your data and get AI-powered insights")
        
        query_input = gr.Textbox(
            label="Enter your analysis question",
            placeholder="e.g., 'What are the main trends in this data?' or 'Find outliers and anomalies'",
            lines=3
        )
        analyze_btn = gr.Button("Analyze with AI")
        ai_output = gr.Textbox(label="AI Analysis Results", lines=15)
        
        analyze_btn.click(smolagent_analysis, inputs=query_input, outputs=ai_output)

if __name__ == "__main__":
    demo.launch()