Singtel_Use_Case1 / analyze.py
cosmoruler
first draft
5269c7e
"""
Example script demonstrating SmoLagent data analysis
===================================================
This script shows how to use SmoLagent for automated data analysis
"""
import pandas as pd
from smolagents import CodeAgent, PythonCodeTool
import matplotlib.pyplot as plt
import seaborn as sns
# Configuration
CSV_FILE_PATH = "C:/Users/Cosmo/Desktop/NTU Peak Singtel/outsystems_sample_logs_6months.csv"
def simple_data_analysis():
"""Perform basic data analysis without AI agent"""
print("=== LOADING DATA ===")
try:
df = pd.read_csv(CSV_FILE_PATH)
print(f"βœ… Data loaded successfully! Shape: {df.shape}")
except Exception as e:
print(f"❌ Error loading data: {e}")
return
print("\n=== BASIC INFO ===")
print(f"Columns: {list(df.columns)}")
print(f"Data types:\n{df.dtypes}")
print(f"\nMissing values:\n{df.isnull().sum()}")
print("\n=== STATISTICAL SUMMARY ===")
print(df.describe())
# Create some basic plots
numeric_columns = df.select_dtypes(include=['number']).columns
if len(numeric_columns) > 0:
print(f"\n=== CREATING PLOTS FOR {len(numeric_columns)} NUMERIC COLUMNS ===")
# Distribution plots
plt.figure(figsize=(15, 10))
for i, col in enumerate(numeric_columns[:6]): # Limit to first 6 columns
plt.subplot(2, 3, i+1)
df[col].hist(bins=30, alpha=0.7)
plt.title(f'Distribution of {col}')
plt.xlabel(col)
plt.ylabel('Frequency')
plt.tight_layout()
plt.savefig('distributions.png', dpi=300, bbox_inches='tight')
plt.show()
print("βœ… Distribution plots saved as 'distributions.png'")
# Correlation heatmap
if len(numeric_columns) > 1:
plt.figure(figsize=(12, 8))
correlation_matrix = df[numeric_columns].corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
plt.title('Correlation Heatmap')
plt.tight_layout()
plt.savefig('correlation_heatmap.png', dpi=300, bbox_inches='tight')
plt.show()
print("βœ… Correlation heatmap saved as 'correlation_heatmap.png'")
return df
def analyze_with_smolagent_example():
"""Example of how to use SmoLagent (requires model configuration)"""
print("\n=== SMOLAGENT ANALYSIS EXAMPLE ===")
print("Note: This requires proper model configuration in config.py")
# This is a template - you need to configure your model
try:
# Uncomment and configure based on your model choice:
# For OpenAI:
# from smolagents.models import OpenAIServerModel
# model = OpenAIServerModel(model_id="gpt-4", api_key="your-api-key")
# For local Ollama:
# from smolagents.models import LiteLLMModel
# model = LiteLLMModel(model_id="ollama/llama2", api_base="http://localhost:11434")
# Create agent
# python_tool = PythonCodeTool()
# agent = CodeAgent(tools=[python_tool], model=model)
# Load data for analysis
df = pd.read_csv(CSV_FILE_PATH)
# Example queries you could ask:
example_queries = [
"Analyze the distribution of numerical columns and identify any outliers",
"Find correlations between variables and suggest interesting patterns",
"Perform clustering analysis on the data",
"Identify trends and seasonality in time-series data",
"Suggest data quality improvements",
]
print("Example queries you can ask SmoLagent:")
for i, query in enumerate(example_queries, 1):
print(f"{i}. {query}")
print("\nTo use SmoLagent:")
print("1. Configure your model in config.py")
print("2. Uncomment the model initialization code above")
print("3. Run the agent with your queries")
# Example usage (commented out until model is configured):
# response = agent.run(f"Analyze this dataset: {df.head().to_string()}")
# print(f"AI Analysis: {response}")
except Exception as e:
print(f"SmoLagent setup needed: {e}")
if __name__ == "__main__":
# Run basic analysis
df = simple_data_analysis()
# Show SmoLagent example
analyze_with_smolagent_example()
print("\n=== NEXT STEPS ===")
print("1. Configure your AI model in config.py")
print("2. Run 'python app.py' to start the Gradio interface")
print("3. Use the web interface for interactive analysis")