Spaces:

NTU-Peak-2
/

Singtel_Use_Case1

Runtime error

cosmoruler

first draft

5269c7e about 1 month ago

8.31 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	import plotly.express as px
	import plotly.graph_objects as go
	from smolagents import CodeAgent, DuckDuckGoSearchTool, PythonCodeTool
	from smolagents.models import OpenAIServerModel
	import io
	import base64
	from PIL import Image

	# Configure the CSV file path
	CSV_FILE_PATH = "C:/Users/Cosmo/Desktop/NTU Peak Singtel/outsystems_sample_logs_6months.csv"

	class DataAnalysisAgent:
	def __init__(self):
	"""Initialize the data analysis agent with SmoLagent"""
	# Initialize tools
	self.python_tool = PythonCodeTool()
	self.search_tool = DuckDuckGoSearchTool()

	# Note: You'll need to set up your LLM model here
	# For this example, I'm using a placeholder - replace with your actual model
	try:
	# Replace with your actual model configuration
	# model = OpenAIServerModel(model_id="gpt-4", api_key="your-api-key")
	# self.agent = CodeAgent(tools=[self.python_tool, self.search_tool], model=model)
	pass
	except:
	self.agent = None

	self.df = None
	self.load_data()

	def load_data(self):
	"""Load the CSV data"""
	try:
	self.df = pd.read_csv(CSV_FILE_PATH)
	return f"Data loaded successfully! Shape: {self.df.shape}"
	except Exception as e:
	return f"Error loading data: {str(e)}"

	def get_data_overview(self):
	"""Get basic overview of the dataset"""
	if self.df is None:
	return "No data loaded"

	overview = {
	"shape": self.df.shape,
	"columns": list(self.df.columns),
	"dtypes": self.df.dtypes.to_dict(),
	"missing_values": self.df.isnull().sum().to_dict(),
	"memory_usage": f"{self.df.memory_usage(deep=True).sum() / 1024**2:.2f} MB"
	}

	return overview

	def generate_basic_stats(self):
	"""Generate basic statistical summary"""
	if self.df is None:
	return "No data loaded"

	return self.df.describe(include='all').to_html()

	def create_correlation_heatmap(self):
	"""Create correlation heatmap for numerical columns"""
	if self.df is None:
	return None

	numeric_cols = self.df.select_dtypes(include=[np.number]).columns
	if len(numeric_cols) < 2:
	return "Not enough numerical columns for correlation analysis"

	plt.figure(figsize=(12, 8))
	correlation_matrix = self.df[numeric_cols].corr()
	sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
	plt.title('Correlation Heatmap')
	plt.tight_layout()

	# Save plot to bytes
	img_buffer = io.BytesIO()
	plt.savefig(img_buffer, format='png', dpi=300, bbox_inches='tight')
	img_buffer.seek(0)
	plt.close()

	return img_buffer

	def create_distribution_plots(self):
	"""Create distribution plots for numerical columns"""
	if self.df is None:
	return None

	numeric_cols = self.df.select_dtypes(include=[np.number]).columns
	if len(numeric_cols) == 0:
	return "No numerical columns found"

	n_cols = min(3, len(numeric_cols))
	n_rows = (len(numeric_cols) + n_cols - 1) // n_cols

	fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 5*n_rows))
	if n_rows == 1 and n_cols == 1:
	axes = [axes]
	elif n_rows == 1 or n_cols == 1:
	axes = axes.flatten()
	else:
	axes = axes.flatten()

	for i, col in enumerate(numeric_cols):
	if i < len(axes):
	self.df[col].hist(bins=30, ax=axes[i], alpha=0.7)
	axes[i].set_title(f'Distribution of {col}')
	axes[i].set_xlabel(col)
	axes[i].set_ylabel('Frequency')

	# Hide empty subplots
	for i in range(len(numeric_cols), len(axes)):
	axes[i].set_visible(False)

	plt.tight_layout()

	img_buffer = io.BytesIO()
	plt.savefig(img_buffer, format='png', dpi=300, bbox_inches='tight')
	img_buffer.seek(0)
	plt.close()

	return img_buffer

	def analyze_with_smolagent(self, query):
	"""Use SmoLagent to analyze data based on user query"""
	if self.agent is None:
	return "SmoLagent not configured. Please set up your LLM model."

	# Prepare context about the dataset
	data_context = f"""
	Dataset shape: {self.df.shape}
	Columns: {list(self.df.columns)}
	Data types: {self.df.dtypes.to_dict()}
	First few rows: {self.df.head().to_string()}
	"""

	prompt = f"""
	You have access to a pandas DataFrame with the following information:
	{data_context}

	User query: {query}

	Please analyze the data and provide insights. Use the PythonCodeTool to write and execute code for analysis.
	"""

	try:
	response = self.agent.run(prompt)
	return response
	except Exception as e:
	return f"Error in SmoLagent analysis: {str(e)}"

	# Initialize the agent
	data_agent = DataAnalysisAgent()

	def analyze_data_overview():
	"""Gradio function for data overview"""
	overview = data_agent.get_data_overview()
	return str(overview)

	def generate_statistics():
	"""Gradio function for basic statistics"""
	return data_agent.generate_basic_stats()

	def create_correlation_plot():
	"""Gradio function for correlation heatmap"""
	img_buffer = data_agent.create_correlation_heatmap()
	if isinstance(img_buffer, str):
	return None
	return Image.open(img_buffer)

	def create_distribution_plot():
	"""Gradio function for distribution plots"""
	img_buffer = data_agent.create_distribution_plots()
	if isinstance(img_buffer, str):
	return None
	return Image.open(img_buffer)

	def smolagent_analysis(query):
	"""Gradio function for SmoLagent analysis"""
	return data_agent.analyze_with_smolagent(query)

	# Create Gradio interface
	with gr.Blocks(title="AI Data Analysis with SmoLagent") as demo:
	gr.Markdown("# AI Data Analysis Dashboard")
	gr.Markdown("Analyze your CSV data using AI-powered insights with SmoLagent")

	with gr.Tab("Data Overview"):
	gr.Markdown("## Dataset Overview")
	overview_btn = gr.Button("Get Data Overview")
	overview_output = gr.Textbox(label="Dataset Information", lines=10)
	overview_btn.click(analyze_data_overview, outputs=overview_output)

	with gr.Tab("Basic Statistics"):
	gr.Markdown("## Statistical Summary")
	stats_btn = gr.Button("Generate Statistics")
	stats_output = gr.HTML(label="Statistical Summary")
	stats_btn.click(generate_statistics, outputs=stats_output)

	with gr.Tab("Visualizations"):
	gr.Markdown("## Data Visualizations")

	with gr.Row():
	corr_btn = gr.Button("Generate Correlation Heatmap")
	dist_btn = gr.Button("Generate Distribution Plots")

	with gr.Row():
	corr_plot = gr.Image(label="Correlation Heatmap")
	dist_plot = gr.Image(label="Distribution Plots")

	corr_btn.click(create_correlation_plot, outputs=corr_plot)
	dist_btn.click(create_distribution_plot, outputs=dist_plot)

	with gr.Tab("AI Analysis"):
	gr.Markdown("## SmoLagent AI Analysis")
	gr.Markdown("Ask questions about your data and get AI-powered insights")

	query_input = gr.Textbox(
	label="Enter your analysis question",
	placeholder="e.g., 'What are the main trends in this data?' or 'Find outliers and anomalies'",
	lines=3
	)
	analyze_btn = gr.Button("Analyze with AI")
	ai_output = gr.Textbox(label="AI Analysis Results", lines=15)

	analyze_btn.click(smolagent_analysis, inputs=query_input, outputs=ai_output)

	if __name__ == "__main__":
	demo.launch()