Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import plotly.express as px | |
import plotly.graph_objects as go | |
from plotly.subplots import make_subplots | |
import numpy as np | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
# Set page config | |
st.set_page_config( | |
page_title="Customer Churn Analysis", | |
page_icon="π", | |
layout="wide", | |
initial_sidebar_state="expanded" | |
) | |
def load_data(): | |
"""Load and combine the churn datasets""" | |
try: | |
df1 = pd.read_csv('churn-bigml-20.csv') | |
df2 = pd.read_csv('churn-bigml-80.csv') | |
# Add dataset identifier | |
df1['Dataset'] = 'Test (20%)' | |
df2['Dataset'] = 'Train (80%)' | |
# Combine datasets | |
df_combined = pd.concat([df1, df2], ignore_index=True) | |
return df1, df2, df_combined | |
except Exception as e: | |
st.error(f"Error loading data: {str(e)}") | |
return None, None, None | |
def main(): | |
st.title("π Customer Churn Analysis - Exploratory Data Analysis") | |
st.markdown("---") | |
# Load data | |
df_test, df_train, df_combined = load_data() | |
# Check if data loading was successful | |
if df_test is None or df_train is None or df_combined is None: | |
st.error("Failed to load data. Please check the CSV files exist and are properly formatted.") | |
return | |
# Sidebar | |
st.sidebar.title("π Analysis Options") | |
# Dataset selection | |
dataset_option = st.sidebar.selectbox( | |
"Select Dataset:", | |
["Combined Dataset", "Training Set (80%)", "Test Set (20%)"] | |
) | |
if dataset_option == "Combined Dataset": | |
df = df_combined | |
st.sidebar.info(f"π Total Records: {len(df):,}") | |
elif dataset_option == "Training Set (80%)": | |
df = df_train | |
st.sidebar.info(f"π Training Records: {len(df):,}") | |
else: | |
df = df_test | |
st.sidebar.info(f"π Test Records: {len(df):,}") | |
# Analysis sections | |
analysis_type = st.sidebar.selectbox( | |
"Choose Analysis Type:", | |
[ | |
"π Dataset Overview", | |
"π― Churn Analysis", | |
"π Geographic Analysis", | |
"π Usage Patterns", | |
"π° Revenue Analysis", | |
"π Correlation Analysis", | |
"π Advanced Insights" | |
] | |
) | |
if analysis_type == "π Dataset Overview": | |
dataset_overview(df) | |
elif analysis_type == "π― Churn Analysis": | |
churn_analysis(df) | |
elif analysis_type == "π Geographic Analysis": | |
geographic_analysis(df) | |
elif analysis_type == "π Usage Patterns": | |
usage_patterns(df) | |
elif analysis_type == "π° Revenue Analysis": | |
revenue_analysis(df) | |
elif analysis_type == "π Correlation Analysis": | |
correlation_analysis(df) | |
elif analysis_type == "π Advanced Insights": | |
advanced_insights(df) | |
def dataset_overview(df): | |
st.header("π Dataset Overview") | |
# Dataset description | |
st.markdown(""" | |
### π About This Dataset | |
This is a **telecommunications customer churn dataset** that contains information about customers of a telecom company and whether they churned (cancelled their service) or were retained. The dataset is commonly used for predictive modeling to identify customers at risk of churning. | |
**Key Characteristics:** | |
- **Domain**: Telecommunications industry | |
- **Target Variable**: `Churn` (True/False) - indicates if customer cancelled service | |
- **Time Period**: Historical customer data with usage patterns and service details | |
- **Geographic Coverage**: Multiple US states (51 unique states) | |
- **Use Case**: Customer retention analysis, churn prediction modeling, and business intelligence | |
**Feature Categories:** | |
- π **Demographics**: State, account length, area code | |
- π **Service Plans**: International calling plan, voice mail plan | |
- π **Usage Patterns**: Day/evening/night/international minutes, calls, and charges | |
- π§ **Service Interactions**: Customer service call frequency | |
- π° **Billing**: Detailed breakdown of charges by time period and service type | |
This dataset enables analysis of customer behavior patterns, identification of churn risk factors, and development of retention strategies. | |
""") | |
st.markdown("---") | |
# Basic info | |
col1, col2, col3, col4 = st.columns(4) | |
with col1: | |
st.metric("Total Records", f"{len(df):,}") | |
with col2: | |
st.metric("Total Features", len(df.columns)) | |
with col3: | |
if 'Churn' in df.columns: | |
churn_rate = (df['Churn'] == True).mean() * 100 | |
st.metric("Churn Rate", f"{churn_rate:.1f}%") | |
else: | |
st.metric("Churn Rate", "N/A") | |
with col4: | |
if 'State' in df.columns: | |
st.metric("Unique States", df['State'].nunique()) | |
else: | |
st.metric("Unique States", "N/A") | |
# Dataset structure | |
st.subheader("π Dataset Structure") | |
col1, col2 = st.columns(2) | |
with col1: | |
st.write("**Data Types:**") | |
data_types = df.dtypes.value_counts() | |
# Convert index to string to avoid JSON serialization issues | |
fig = px.pie( | |
values=data_types.values, | |
names=[str(dtype) for dtype in data_types.index], | |
title="Distribution of Data Types" | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
with col2: | |
st.write("**Missing Values:**") | |
missing_data = df.isnull().sum() | |
if missing_data.sum() == 0: | |
st.success("β No missing values found!") | |
else: | |
st.write(missing_data[missing_data > 0]) | |
# Sample data | |
st.subheader("π Sample Data") | |
st.dataframe(df.head(10), use_container_width=True) | |
# Statistical summary | |
st.subheader("π Statistical Summary") | |
st.dataframe(df.describe(), use_container_width=True) | |
def churn_analysis(df): | |
st.header("π― Churn Analysis") | |
# Check if Churn column exists | |
if 'Churn' not in df.columns: | |
st.error("'Churn' column not found in the dataset. Please check your data.") | |
return | |
# Churn distribution | |
col1, col2 = st.columns(2) | |
with col1: | |
churn_counts = df['Churn'].value_counts() | |
fig = px.pie( | |
values=churn_counts.values, | |
names=['Retained', 'Churned'], | |
title="Overall Churn Distribution", | |
color_discrete_sequence=['lightgreen', 'lightcoral'] | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
with col2: | |
# Churn by categorical variables | |
categorical_vars = ['International plan', 'Voice mail plan'] | |
selected_var = st.selectbox("Select categorical variable:", categorical_vars) | |
churn_by_cat = df.groupby([selected_var, 'Churn']).size().unstack() | |
churn_rate_cat = df.groupby(selected_var)['Churn'].mean() * 100 | |
fig = px.bar( | |
x=churn_rate_cat.index, | |
y=churn_rate_cat.values, | |
title=f"Churn Rate by {selected_var}", | |
labels={'x': selected_var, 'y': 'Churn Rate (%)'}, | |
color=churn_rate_cat.values, | |
color_continuous_scale='Reds' | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
# Churn by numerical variables | |
st.subheader("π Churn Analysis by Numerical Features") | |
numerical_vars = [ | |
'Account length', 'Total day minutes', 'Total day calls', 'Total day charge', | |
'Total eve minutes', 'Total eve calls', 'Total eve charge', | |
'Total night minutes', 'Total night calls', 'Total night charge', | |
'Total intl minutes', 'Total intl calls', 'Total intl charge', | |
'Customer service calls' | |
] | |
selected_num_var = st.selectbox("Select numerical variable:", numerical_vars) | |
col1, col2 = st.columns(2) | |
with col1: | |
# Box plot | |
fig = px.box( | |
df, x='Churn', y=selected_num_var, | |
title=f"{selected_num_var} Distribution by Churn Status", | |
color='Churn', | |
color_discrete_sequence=['lightgreen', 'lightcoral'] | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
with col2: | |
# Histogram | |
fig = px.histogram( | |
df, x=selected_num_var, color='Churn', | |
title=f"{selected_num_var} Distribution", | |
marginal="box", | |
color_discrete_sequence=['lightgreen', 'lightcoral'] | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
def geographic_analysis(df): | |
st.header("π Geographic Analysis") | |
# Check required columns | |
required_cols = ['State', 'Churn'] | |
missing_cols = [col for col in required_cols if col not in df.columns] | |
if missing_cols: | |
st.error(f"Required columns missing: {missing_cols}") | |
return | |
# State-wise analysis | |
state_analysis = df.groupby('State').agg({ | |
'Churn': ['count', 'sum', 'mean'], | |
'Total day charge': 'mean', | |
'Total eve charge': 'mean', | |
'Total night charge': 'mean', | |
'Total intl charge': 'mean' | |
}).round(2) | |
state_analysis.columns = ['Total_Customers', 'Churned_Customers', 'Churn_Rate', | |
'Avg_Day_Charge', 'Avg_Eve_Charge', 'Avg_Night_Charge', 'Avg_Intl_Charge'] | |
state_analysis = state_analysis.reset_index() | |
col1, col2 = st.columns(2) | |
with col1: | |
# Churn rate by state | |
fig = px.choropleth( | |
state_analysis, | |
locations='State', | |
color='Churn_Rate', | |
hover_name='State', | |
hover_data=['Total_Customers', 'Churned_Customers'], | |
locationmode='USA-states', | |
title="Churn Rate by State", | |
color_continuous_scale='Reds' | |
) | |
fig.update_layout(geo_scope="usa") | |
st.plotly_chart(fig, use_container_width=True) | |
with col2: | |
# Top 10 states by churn rate | |
top_churn_states = state_analysis.nlargest(10, 'Churn_Rate') | |
fig = px.bar( | |
top_churn_states, | |
x='Churn_Rate', | |
y='State', | |
orientation='h', | |
title="Top 10 States by Churn Rate", | |
color='Churn_Rate', | |
color_continuous_scale='Reds' | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
# Area code analysis | |
st.subheader("π Area Code Analysis") | |
area_code_analysis = df.groupby('Area code').agg({ | |
'Churn': ['count', 'mean'], | |
'Total day charge': 'mean' | |
}).round(2) | |
area_code_analysis.columns = ['Total_Customers', 'Churn_Rate', 'Avg_Day_Charge'] | |
area_code_analysis = area_code_analysis.reset_index() | |
fig = px.bar( | |
area_code_analysis, | |
x='Area code', | |
y='Churn_Rate', | |
title="Churn Rate by Area Code", | |
color='Churn_Rate', | |
color_continuous_scale='Reds', | |
hover_data=['Total_Customers'] | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
def usage_patterns(df): | |
st.header("π Usage Patterns Analysis") | |
# Check if Churn column exists | |
if 'Churn' not in df.columns: | |
st.error("'Churn' column not found in the dataset.") | |
return | |
# Time-based usage analysis | |
usage_metrics = ['Total day minutes', 'Total eve minutes', 'Total night minutes', 'Total intl minutes'] | |
col1, col2 = st.columns(2) | |
with col1: | |
# Usage patterns by churn | |
usage_by_churn = df.groupby('Churn')[usage_metrics].mean() | |
fig = go.Figure() | |
for metric in usage_metrics: | |
fig.add_trace(go.Bar( | |
name=metric.replace('Total ', '').replace(' minutes', ''), | |
x=['Retained', 'Churned'], | |
y=[usage_by_churn.loc[False, metric], usage_by_churn.loc[True, metric]] | |
)) | |
fig.update_layout( | |
title="Average Usage Patterns by Churn Status", | |
xaxis_title="Customer Status", | |
yaxis_title="Average Minutes", | |
barmode='group' | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
with col2: | |
# Call frequency analysis | |
call_metrics = ['Total day calls', 'Total eve calls', 'Total night calls', 'Total intl calls'] | |
call_by_churn = df.groupby('Churn')[call_metrics].mean() | |
fig = go.Figure() | |
for metric in call_metrics: | |
fig.add_trace(go.Bar( | |
name=metric.replace('Total ', '').replace(' calls', ''), | |
x=['Retained', 'Churned'], | |
y=[call_by_churn.loc[False, metric], call_by_churn.loc[True, metric]] | |
)) | |
fig.update_layout( | |
title="Average Call Frequency by Churn Status", | |
xaxis_title="Customer Status", | |
yaxis_title="Average Number of Calls", | |
barmode='group' | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
# Customer service calls analysis | |
st.subheader("π§ Customer Service Analysis") | |
col1, col2 = st.columns(2) | |
with col1: | |
cs_calls_churn = df.groupby('Customer service calls')['Churn'].agg(['count', 'sum', 'mean']).reset_index() | |
cs_calls_churn['churn_rate'] = cs_calls_churn['mean'] * 100 | |
fig = px.bar( | |
cs_calls_churn, | |
x='Customer service calls', | |
y='churn_rate', | |
title="Churn Rate by Number of Customer Service Calls", | |
color='churn_rate', | |
color_continuous_scale='Reds', | |
hover_data=['count'] | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
with col2: | |
# Distribution of customer service calls | |
fig = px.histogram( | |
df, | |
x='Customer service calls', | |
color='Churn', | |
title="Distribution of Customer Service Calls", | |
color_discrete_sequence=['lightgreen', 'lightcoral'] | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
def revenue_analysis(df): | |
st.header("π° Revenue Analysis") | |
# Check if Churn column exists | |
if 'Churn' not in df.columns: | |
st.error("'Churn' column not found in the dataset.") | |
return | |
# Calculate total revenue per customer | |
df['Total_Revenue'] = (df['Total day charge'] + df['Total eve charge'] + | |
df['Total night charge'] + df['Total intl charge']) | |
col1, col2 = st.columns(2) | |
with col1: | |
# Revenue by churn status | |
revenue_by_churn = df.groupby('Churn')['Total_Revenue'].agg(['mean', 'median', 'std']).round(2) | |
fig = px.bar( | |
x=['Retained', 'Churned'], | |
y=[revenue_by_churn.loc[False, 'mean'], revenue_by_churn.loc[True, 'mean']], | |
title="Average Revenue by Churn Status", | |
color=['Retained', 'Churned'], | |
color_discrete_sequence=['lightgreen', 'lightcoral'] | |
) | |
fig.update_layout(yaxis_title="Average Revenue ($)") | |
st.plotly_chart(fig, use_container_width=True) | |
with col2: | |
# Revenue distribution | |
fig = px.box( | |
df, | |
x='Churn', | |
y='Total_Revenue', | |
title="Revenue Distribution by Churn Status", | |
color='Churn', | |
color_discrete_sequence=['lightgreen', 'lightcoral'] | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
# Revenue breakdown analysis | |
st.subheader("π Revenue Breakdown") | |
revenue_components = ['Total day charge', 'Total eve charge', 'Total night charge', 'Total intl charge'] | |
col1, col2 = st.columns(2) | |
with col1: | |
# Average revenue components | |
avg_components = df[revenue_components].mean() | |
fig = px.pie( | |
values=avg_components.values, | |
names=[comp.replace('Total ', '').replace(' charge', '') for comp in revenue_components], | |
title="Average Revenue Composition" | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
with col2: | |
# Revenue components by churn | |
components_by_churn = df.groupby('Churn')[revenue_components].mean() | |
fig = go.Figure() | |
for component in revenue_components: | |
fig.add_trace(go.Bar( | |
name=component.replace('Total ', '').replace(' charge', ''), | |
x=['Retained', 'Churned'], | |
y=[components_by_churn.loc[False, component], components_by_churn.loc[True, component]] | |
)) | |
fig.update_layout( | |
title="Revenue Components by Churn Status", | |
xaxis_title="Customer Status", | |
yaxis_title="Average Charge ($)", | |
barmode='group' | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
def correlation_analysis(df): | |
st.header("π Correlation Analysis") | |
# Check if Churn column exists | |
if 'Churn' not in df.columns: | |
st.error("'Churn' column not found in the dataset.") | |
return | |
# Select numerical columns for correlation | |
numerical_cols = df.select_dtypes(include=[np.number]).columns.tolist() | |
if 'Dataset' in numerical_cols: | |
numerical_cols.remove('Dataset') | |
# Convert boolean to numerical for correlation | |
df_corr = df.copy() | |
df_corr['Churn'] = df_corr['Churn'].astype(int) | |
df_corr['International plan'] = (df_corr['International plan'] == 'Yes').astype(int) | |
df_corr['Voice mail plan'] = (df_corr['Voice mail plan'] == 'Yes').astype(int) | |
# Add converted columns to numerical_cols for correlation | |
correlation_cols = numerical_cols.copy() | |
if 'Churn' not in correlation_cols: | |
correlation_cols.append('Churn') | |
if 'International plan' in df_corr.columns and 'International plan' not in correlation_cols: | |
correlation_cols.append('International plan') | |
if 'Voice mail plan' in df_corr.columns and 'Voice mail plan' not in correlation_cols: | |
correlation_cols.append('Voice mail plan') | |
# Calculate correlation matrix | |
corr_matrix = df_corr[correlation_cols].corr() | |
col1, col2 = st.columns(2) | |
with col1: | |
# Correlation heatmap | |
fig = px.imshow( | |
corr_matrix, | |
title="Feature Correlation Heatmap", | |
color_continuous_scale='RdBu_r', | |
aspect="auto" | |
) | |
fig.update_layout(width=600, height=600) | |
st.plotly_chart(fig, use_container_width=True) | |
with col2: | |
# Correlation with churn | |
if 'Churn' in corr_matrix.columns: | |
churn_corr = corr_matrix['Churn'].abs().sort_values(ascending=False)[1:] # Exclude self-correlation | |
fig = px.bar( | |
x=churn_corr.values, | |
y=churn_corr.index, | |
orientation='h', | |
title="Features Most Correlated with Churn", | |
color=churn_corr.values, | |
color_continuous_scale='Reds' | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
else: | |
st.error("Churn column not found in correlation matrix.") | |
# Top correlations | |
st.subheader("π Key Correlations") | |
if 'Churn' in corr_matrix.columns: | |
# Find top positive and negative correlations with churn | |
churn_corr_full = corr_matrix['Churn'].sort_values(ascending=False) | |
col1, col2 = st.columns(2) | |
with col1: | |
st.write("**Top Positive Correlations with Churn:**") | |
top_positive = churn_corr_full[churn_corr_full > 0][1:6] # Top 5, excluding self | |
if len(top_positive) > 0: | |
for feature, corr in top_positive.items(): | |
st.write(f"β’ {feature}: {corr:.3f}") | |
else: | |
st.write("No positive correlations found.") | |
with col2: | |
st.write("**Top Negative Correlations with Churn:**") | |
top_negative = churn_corr_full[churn_corr_full < 0][-5:] # Bottom 5 | |
if len(top_negative) > 0: | |
for feature, corr in top_negative.items(): | |
st.write(f"β’ {feature}: {corr:.3f}") | |
else: | |
st.write("No negative correlations found.") | |
else: | |
st.warning("Cannot display correlation insights without Churn column in correlation matrix.") | |
def advanced_insights(df): | |
st.header("π Advanced Insights") | |
# Check if Churn column exists | |
if 'Churn' not in df.columns: | |
st.error("'Churn' column not found in the dataset.") | |
return | |
# Customer segments analysis | |
st.subheader("π₯ Customer Segmentation") | |
# Calculate total usage and revenue | |
df['Total_Usage'] = (df['Total day minutes'] + df['Total eve minutes'] + | |
df['Total night minutes'] + df['Total intl minutes']) | |
df['Total_Revenue'] = (df['Total day charge'] + df['Total eve charge'] + | |
df['Total night charge'] + df['Total intl charge']) | |
# Create usage vs revenue scatter plot | |
fig = px.scatter( | |
df, | |
x='Total_Usage', | |
y='Total_Revenue', | |
color='Churn', | |
title="Customer Segmentation: Usage vs Revenue", | |
labels={'Total_Usage': 'Total Usage (minutes)', 'Total_Revenue': 'Total Revenue ($)'}, | |
color_discrete_sequence=['lightgreen', 'lightcoral'], | |
opacity=0.6 | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
# High-risk customer identification | |
st.subheader("β οΈ High-Risk Customer Analysis") | |
# Identify patterns in churned customers | |
churned_customers = df[df['Churn'] == True] | |
col1, col2 = st.columns(2) | |
with col1: | |
# Customer service calls for churned customers | |
cs_calls_dist = churned_customers['Customer service calls'].value_counts().sort_index() | |
fig = px.bar( | |
x=cs_calls_dist.index, | |
y=cs_calls_dist.values, | |
title="Customer Service Calls Distribution (Churned Customers)", | |
labels={'x': 'Number of CS Calls', 'y': 'Number of Customers'}, | |
color=cs_calls_dist.values, | |
color_continuous_scale='Reds' | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
with col2: | |
# Account length distribution for churned customers | |
fig = px.histogram( | |
churned_customers, | |
x='Account length', | |
title="Account Length Distribution (Churned Customers)", | |
color_discrete_sequence=['lightcoral'] | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
# Plan analysis | |
st.subheader("π Plan Analysis") | |
plan_analysis = df.groupby(['International plan', 'Voice mail plan']).agg({ | |
'Churn': ['count', 'sum', 'mean'] | |
}).round(3) | |
plan_analysis.columns = ['Total_Customers', 'Churned_Customers', 'Churn_Rate'] | |
plan_analysis = plan_analysis.reset_index() | |
plan_analysis['Plan_Combination'] = (plan_analysis['International plan'] + ' Intl, ' + | |
plan_analysis['Voice mail plan'] + ' VM') | |
fig = px.bar( | |
plan_analysis, | |
x='Plan_Combination', | |
y='Churn_Rate', | |
title="Churn Rate by Plan Combinations", | |
color='Churn_Rate', | |
color_continuous_scale='Reds', | |
hover_data=['Total_Customers'] | |
) | |
fig.update_xaxes(tickangle=45) | |
st.plotly_chart(fig, use_container_width=True) | |
# Key insights summary | |
st.subheader("π‘ Key Insights Summary") | |
try: | |
insights = [] | |
# Overall churn rate | |
if 'Churn' in df.columns: | |
churn_rate = (df['Churn'] == True).mean() * 100 | |
insights.append(f"π Overall churn rate: {churn_rate:.1f}%") | |
# Customer service calls analysis | |
if 'Customer service calls' in df.columns and 'Churn' in df.columns: | |
high_service_calls = df[df['Customer service calls'] >= 4] | |
if len(high_service_calls) > 0: | |
high_cs_churn_rate = high_service_calls['Churn'].mean() * 100 | |
insights.append(f"π Customers with 4+ service calls have {high_cs_churn_rate:.1f}% churn rate") | |
# International plan analysis | |
if 'International plan' in df.columns and 'Churn' in df.columns: | |
intl_customers = df[df['International plan'] == 'Yes'] | |
if len(intl_customers) > 0: | |
intl_churn_rate = intl_customers['Churn'].mean() * 100 | |
insights.append(f"π International plan customers have {intl_churn_rate:.1f}% churn rate") | |
# Voice mail plan analysis | |
if 'Voice mail plan' in df.columns and 'Churn' in df.columns: | |
vm_customers = df[df['Voice mail plan'] == 'Yes'] | |
if len(vm_customers) > 0: | |
vm_churn_rate = vm_customers['Churn'].mean() * 100 | |
insights.append(f"π§ Voice mail plan customers have {vm_churn_rate:.1f}% churn rate") | |
# Revenue analysis | |
if 'Total_Revenue' in df.columns and 'Churn' in df.columns: | |
churned_customers = df[df['Churn'] == True] | |
if len(churned_customers) > 0: | |
avg_revenue_churned = churned_customers['Total_Revenue'].mean() | |
insights.append(f"π° Average revenue per churned customer: ${avg_revenue_churned:.2f}") | |
# Account length analysis | |
if 'Account length' in df.columns and 'Churn' in df.columns: | |
churned_customers = df[df['Churn'] == True] | |
if len(churned_customers) > 0: | |
avg_account_length = churned_customers['Account length'].mean() | |
insights.append(f"β° Average account length of churned customers: {avg_account_length:.0f} days") | |
# Display insights | |
for insight in insights: | |
st.info(insight) | |
if not insights: | |
st.warning("No insights could be generated due to missing required columns.") | |
except Exception as e: | |
st.error(f"Error generating insights: {str(e)}") | |
st.warning("Please check that all required columns are present in your dataset.") | |
if __name__ == "__main__": | |
main() |