File size: 4,690 Bytes
5269c7e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
"""
Example script demonstrating SmoLagent data analysis
===================================================

This script shows how to use SmoLagent for automated data analysis
"""

import pandas as pd
from smolagents import CodeAgent, PythonCodeTool
import matplotlib.pyplot as plt
import seaborn as sns

# Configuration
CSV_FILE_PATH = "C:/Users/Cosmo/Desktop/NTU Peak Singtel/outsystems_sample_logs_6months.csv"

def simple_data_analysis():
    """Perform basic data analysis without AI agent"""
    print("=== LOADING DATA ===")
    try:
        df = pd.read_csv(CSV_FILE_PATH)
        print(f"βœ… Data loaded successfully! Shape: {df.shape}")
    except Exception as e:
        print(f"❌ Error loading data: {e}")
        return
    
    print("\n=== BASIC INFO ===")
    print(f"Columns: {list(df.columns)}")
    print(f"Data types:\n{df.dtypes}")
    print(f"\nMissing values:\n{df.isnull().sum()}")
    
    print("\n=== STATISTICAL SUMMARY ===")
    print(df.describe())
    
    # Create some basic plots
    numeric_columns = df.select_dtypes(include=['number']).columns
    
    if len(numeric_columns) > 0:
        print(f"\n=== CREATING PLOTS FOR {len(numeric_columns)} NUMERIC COLUMNS ===")
        
        # Distribution plots
        plt.figure(figsize=(15, 10))
        for i, col in enumerate(numeric_columns[:6]):  # Limit to first 6 columns
            plt.subplot(2, 3, i+1)
            df[col].hist(bins=30, alpha=0.7)
            plt.title(f'Distribution of {col}')
            plt.xlabel(col)
            plt.ylabel('Frequency')
        
        plt.tight_layout()
        plt.savefig('distributions.png', dpi=300, bbox_inches='tight')
        plt.show()
        print("βœ… Distribution plots saved as 'distributions.png'")
        
        # Correlation heatmap
        if len(numeric_columns) > 1:
            plt.figure(figsize=(12, 8))
            correlation_matrix = df[numeric_columns].corr()
            sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
            plt.title('Correlation Heatmap')
            plt.tight_layout()
            plt.savefig('correlation_heatmap.png', dpi=300, bbox_inches='tight')
            plt.show()
            print("βœ… Correlation heatmap saved as 'correlation_heatmap.png'")
    
    return df

def analyze_with_smolagent_example():
    """Example of how to use SmoLagent (requires model configuration)"""
    print("\n=== SMOLAGENT ANALYSIS EXAMPLE ===")
    print("Note: This requires proper model configuration in config.py")
    
    # This is a template - you need to configure your model
    try:
        # Uncomment and configure based on your model choice:
        
        # For OpenAI:
        # from smolagents.models import OpenAIServerModel
        # model = OpenAIServerModel(model_id="gpt-4", api_key="your-api-key")
        
        # For local Ollama:
        # from smolagents.models import LiteLLMModel
        # model = LiteLLMModel(model_id="ollama/llama2", api_base="http://localhost:11434")
        
        # Create agent
        # python_tool = PythonCodeTool()
        # agent = CodeAgent(tools=[python_tool], model=model)
        
        # Load data for analysis
        df = pd.read_csv(CSV_FILE_PATH)
        
        # Example queries you could ask:
        example_queries = [
            "Analyze the distribution of numerical columns and identify any outliers",
            "Find correlations between variables and suggest interesting patterns",
            "Perform clustering analysis on the data",
            "Identify trends and seasonality in time-series data",
            "Suggest data quality improvements",
        ]
        
        print("Example queries you can ask SmoLagent:")
        for i, query in enumerate(example_queries, 1):
            print(f"{i}. {query}")
        
        print("\nTo use SmoLagent:")
        print("1. Configure your model in config.py")
        print("2. Uncomment the model initialization code above")
        print("3. Run the agent with your queries")
        
        # Example usage (commented out until model is configured):
        # response = agent.run(f"Analyze this dataset: {df.head().to_string()}")
        # print(f"AI Analysis: {response}")
        
    except Exception as e:
        print(f"SmoLagent setup needed: {e}")

if __name__ == "__main__":
    # Run basic analysis
    df = simple_data_analysis()
    
    # Show SmoLagent example
    analyze_with_smolagent_example()
    
    print("\n=== NEXT STEPS ===")
    print("1. Configure your AI model in config.py")
    print("2. Run 'python app.py' to start the Gradio interface")
    print("3. Use the web interface for interactive analysis")