import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.cluster import KMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score
from statsmodels.datasets import get_rdataset
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster

# Set up the style for all plots
plt.style.use('default')
sns.set_theme(style="whitegrid", palette="husl")

def load_arrests_data():
    """Load and return the US Arrests dataset"""
    USArrests = get_rdataset('USArrests').data
    return USArrests

def create_categorical_plot(df, column, target='Survived'):
    """Create an interactive plot for categorical variables"""
    fig = px.bar(
        df.groupby(column)[target].mean().reset_index(),
        x=column,
        y=target,
        title=f'Survival Rate by {column}',
        labels={target: 'Survival Rate', column: column},
        color=target,
        color_continuous_scale='RdBu'
    )
    fig.update_layout(
        plot_bgcolor='rgb(30, 30, 30)',
        paper_bgcolor='rgb(30, 30, 30)',
        font=dict(color='white')
    )
    return fig

def create_numeric_plot(df, column, target='Survived'):
    """Create an interactive plot for numeric variables"""
    fig = px.box(
        df,
        x=target,
        y=column,
        title=f'{column} Distribution by Survival',
        labels={target: 'Survived', column: column},
        color=target,
        color_discrete_sequence=px.colors.qualitative.Set1
    )
    fig.update_layout(
        plot_bgcolor='rgb(30, 30, 30)',
        paper_bgcolor='rgb(30, 30, 30)',
        font=dict(color='white')
    )
    return fig

def show():
    st.title("Week 7: Clustering Lab - State Crime Pattern Analysis")
    
    # Code Example: Loading and Basic Data Exploration
    with st.expander("Code Example: Loading and Exploring Data"):
        st.code("""
# Load the data
from statsmodels.datasets import get_rdataset
USArrests = get_rdataset('USArrests').data

# Basic data exploration
print("Dataset shape:", USArrests.shape)
print("\\nVariables:", USArrests.columns.tolist())
print("\\nFirst 5 states:")
print(USArrests.head())

# Basic statistics
print("\\nData Summary:")
print(USArrests.describe())
        """, language="python")
    
    # Introduction Section with Learning Objectives
    st.header("Learning Objectives")
    st.markdown("""
    In this week, you'll master:
    1. **Unsupervised Learning**: Discover hidden patterns in crime data without predefined categories
    2. **K-Means Clustering**: Learn to divide states into distinct safety profiles
    3. **Hierarchical Clustering**: Create a "family tree" of state crime patterns
    4. **Data Preprocessing**: Understand why scaling is crucial for fair comparisons
    """)
    
    # Interactive Overview
    st.header("Lab Overview")
    st.write("""
    Welcome to your hands-on clustering lab! You'll be working as a policy analyst for the Department of Justice, 
    analyzing crime patterns across US states. Your mission: discover hidden safety profiles that could inform 
    federal resource allocation and crime prevention strategies.
    """)
    
    # Load Data
    st.header("Exercise 1: Data Detective Work")
    st.write("Let's start by understanding our dataset - the US Arrests data.")
    
    df = load_arrests_data()
    
    # Code Example: Data Visualization
    with st.expander("Code Example: Creating Visualizations"):
        st.code("""
# Create correlation heatmap
import plotly.express as px
fig = px.imshow(df.corr(), 
                labels=dict(color="Correlation"),
                color_continuous_scale="RdBu")
fig.show()

# Create box plots
fig = px.box(df, title="Data Distribution")
fig.show()
        """, language="python")
    
    # Interactive Data Exploration
    col1, col2 = st.columns(2)
    
    with col1:
        st.subheader("Dataset Overview")
        st.write(f"Number of states: {len(df)}")
        st.write(f"Number of variables: {len(df.columns)}")
        st.write("\nVariables:", df.columns.tolist())
        
        # Interactive data summary
        st.subheader("Data Summary")
        summary = df.describe()
        st.dataframe(summary)
    
    with col2:
        st.subheader("First 5 States")
        st.dataframe(df.head())
        
        # Interactive correlation heatmap
        st.subheader("Correlation Heatmap")
        fig = px.imshow(df.corr(), 
                       labels=dict(color="Correlation"),
                       color_continuous_scale="RdBu")
        st.plotly_chart(fig)
    
    # Exercise 2: Scaling Challenge
    st.header("Exercise 2: The Scaling Challenge")
    
    # Code Example: Data Scaling
    with st.expander("Code Example: Scaling Data"):
        st.code("""
# Import StandardScaler
from sklearn.preprocessing import StandardScaler

# Create and fit the scaler
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df)

# Convert back to DataFrame
df_scaled = pd.DataFrame(df_scaled, 
                        columns=df.columns, 
                        index=df.index)

# Compare original vs scaled data
print("Original data ranges:")
print(df.describe())
print("\\nScaled data ranges:")
print(df_scaled.describe())
        """, language="python")
    
    # Explanation of scaling
    st.markdown("""
    ### Why Do We Need Scaling?
    
    In our crime data, we have variables measured in very different scales:
    - Murder rates: typically 0-20 per 100,000
    - Assault rates: typically 50-350 per 100,000
    - Urban population: 0-100 percentage
    - Rape rates: typically 0-50 per 100,000
    
    Without scaling, variables with larger numbers (like Assault) would dominate our analysis, 
    making smaller-scale variables (like Murder) less influential. This would be like comparing 
    dollars to cents - the cents would seem insignificant even if they were important!
    """)
    
    # Show original data ranges
    st.subheader("Original Data Ranges")
    col1, col2 = st.columns(2)
    
    with col1:
        # Create a bar chart of variances
        fig_var = px.bar(
            x=df.columns,
            y=df.var(),
            title="Variance of Each Variable (Before Scaling)",
            labels={'x': 'Crime Variables', 'y': 'Variance'},
            color=df.var(),
            color_continuous_scale='Viridis'
        )
        st.plotly_chart(fig_var)
        
        st.write("""
        Notice how Assault has a much larger variance (6,945) compared to Murder (19).
        This means Assault would dominate our clustering if we didn't scale the data!
        """)
    
    with col2:
        # Create box plots of original data
        fig_box = px.box(df, title="Original Data Distribution")
        fig_box.update_layout(
            xaxis_title="Crime Variables",
            yaxis_title="Rate per 100,000"
        )
        st.plotly_chart(fig_box)
    
    # Explain standardization
    st.markdown("""
    ### What is Standardization?
    
    Standardization (also called Z-score normalization) transforms our data so that:
    1. Each variable has a mean of 0
    2. Each variable has a standard deviation of 1
    
    The formula is: z = (x - μ) / σ
    - x is the original value
    - μ is the mean of the variable
    - σ is the standard deviation of the variable
    """)
    
    # Scale the data
    scaler = StandardScaler()
    df_scaled = scaler.fit_transform(df)
    df_scaled = pd.DataFrame(df_scaled, columns=df.columns, index=df.index)
    
    # Show scaled data
    st.subheader("After Scaling")
    
    # Create box plots of scaled data
    fig_scaled = px.box(df_scaled, title="Scaled Data Distribution")
    fig_scaled.update_layout(
        xaxis_title="Crime Variables",
        yaxis_title="Standardized Values"
    )
    st.plotly_chart(fig_scaled)
    
    st.write("""
    After scaling, all variables are on the same scale:
    - Mean = 0
    - Standard Deviation = 1
    - Values typically range from -3 to +3
    """)
    
    # Show before/after comparison for a few states
    st.write("### Before vs After Scaling (Sample States)")
    comparison_df = pd.DataFrame({
        'State': df.index[:5],
        'Original Murder': df['Murder'][:5],
        'Scaled Murder': df_scaled['Murder'][:5],
        'Original Assault': df['Assault'][:5],
        'Scaled Assault': df_scaled['Assault'][:5]
    })
    st.dataframe(comparison_df)
    
    st.write("""
    Notice how the relative differences between states are preserved,
    but now all variables contribute equally to our analysis!
    """)
    
    # Why scaling matters for clustering
    st.markdown("""
    ### Why Scaling Matters for Clustering
    
    In clustering, we measure distances between data points. Without scaling:
    - States might be grouped together just because they have similar assault rates
    - Important differences in murder rates might be ignored
    
    With scaling:
    - All variables contribute equally to the distance calculations
    - We can find true patterns in the data, not just patterns in the largest numbers
    """)
    
    # Exercise 3: Finding Optimal Clusters
    st.header("Exercise 3: Finding the Right Number of Groups")
    
    # Code Example: Elbow Method
    with st.expander("Code Example: Finding Optimal K"):
        st.code("""
# Calculate inertias for different K values
inertias = []
K_values = range(1, 11)

for k in K_values:
    kmeans = KMeans(n_clusters=k, random_state=42, n_init=20)
    kmeans.fit(df_scaled)
    inertias.append(kmeans.inertia_)

# Create elbow plot
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=list(K_values),
    y=inertias,
    mode='lines+markers',
    name='Inertia'
))
fig.update_layout(
    title='Finding the Optimal Number of Clusters',
    xaxis_title='Number of Clusters (K)',
    yaxis_title='Within-Cluster Sum of Squares'
)
fig.show()
        """, language="python")
    
    st.markdown("""
    ### The Elbow Method Explained
    
    The elbow method helps us find the optimal number of clusters (K) by looking at how the "within-cluster sum of squares" 
    (WCSS) changes as we increase the number of clusters. Think of it like this:
    
    - **What is WCSS?** It's a measure of how spread out the points are within each cluster
    - **Lower WCSS** means points are closer to their cluster center (better clustering)
    - **Higher WCSS** means points are more spread out from their cluster center
    
    As we increase K:
    1. WCSS always decreases (more clusters = tighter groups)
    2. The rate of decrease slows down
    3. We look for the "elbow" - where adding more clusters doesn't help much anymore
    """)
    
    # Calculate inertias for different K values
    inertias = []
    K_values = range(1, 11)
    
    for k in K_values:
        kmeans = KMeans(n_clusters=k, random_state=42, n_init=20)
        kmeans.fit(df_scaled)
        inertias.append(kmeans.inertia_)
    
    # Create interactive elbow plot
    fig_elbow = go.Figure()
    fig_elbow.add_trace(go.Scatter(
        x=list(K_values),
        y=inertias,
        mode='lines+markers',
        name='Inertia'
    ))
    fig_elbow.update_layout(
        title='Finding the Optimal Number of State Crime Profiles',
        xaxis_title='Number of Clusters (K)',
        yaxis_title='Within-Cluster Sum of Squares',
        plot_bgcolor='rgb(30, 30, 30)',
        paper_bgcolor='rgb(30, 30, 30)',
        font=dict(color='white')
    )
    st.plotly_chart(fig_elbow)
    
    # Interpretation guide
    st.markdown("""
    ### How to Interpret the Elbow Plot
    
    Look at the plot above and ask yourself:
    1. **Where is the "elbow"?** 
       - The point where the line starts to level off
       - Adding more clusters doesn't give much improvement
       - In our case, it's around K=4
    
    2. **What do the numbers mean?**
       - K=1: All states in one group (not useful)
       - K=2: Basic high/low crime split
       - K=3: More nuanced grouping
       - K=4: Our "elbow" - good balance of detail and simplicity
       - K>4: Diminishing returns - more complexity without much benefit
    
    3. **Why not just use more clusters?**
       - More clusters = more complex to interpret
       - Small clusters might not be meaningful
       - Goal is to find the simplest model that captures the main patterns
    """)
    
    # Show the actual values
    st.write("### WCSS Values for Each K")
    wcss_df = pd.DataFrame({
        'Number of Clusters (K)': K_values,
        'Within-Cluster Sum of Squares': inertias,
        'Improvement from Previous K': [0] + [inertias[i-1] - inertias[i] for i in range(1, len(inertias))]
    })
    st.dataframe(wcss_df)
    
    st.markdown("""
    ### Making the Decision
    
    Based on our elbow plot and the numbers above:
    1. The biggest improvements happen from K=1 to K=4
    2. After K=4, the improvements get much smaller
    3. K=4 gives us a good balance of:
       - Capturing meaningful patterns
       - Keeping the model simple enough to interpret
       - Having enough states in each cluster to be meaningful
    
    This is why we'll use K=4 for our clustering analysis!
    """)
    
    # Exercise 4: K-Means Clustering
    st.header("Exercise 4: K-Means State Profiling")
    
    # Code Example: K-Means Clustering
    with st.expander("Code Example: K-Means Implementation"):
        st.code("""
# Perform K-means clustering
from sklearn.cluster import KMeans

# Create and fit the model
kmeans = KMeans(
    n_clusters=4,        # Number of clusters
    random_state=42,     # For reproducibility
    n_init=20           # Number of times to run with different centroids
)
cluster_labels = kmeans.fit_predict(df_scaled)

# Add cluster labels to original data
df_clustered = df.copy()
df_clustered['Cluster'] = cluster_labels

# Visualize the clusters
import plotly.express as px
fig = px.scatter(df_clustered, 
                x='Murder', 
                y='Assault',
                color='Cluster',
                hover_data=['UrbanPop', 'Rape'],
                title='State Crime Profiles')
fig.show()

# Show cluster centers
centers_df = pd.DataFrame(
    kmeans.cluster_centers_,
    columns=df.columns
)
print("Cluster Centers:")
print(centers_df)
        """, language="python")
    
    st.markdown("""
    ### What is K-Means Clustering?
    
    K-means is an unsupervised learning algorithm that groups similar data points together. Think of it like organizing 
    students into study groups based on their interests:
    
    1. **Initialization**: 
       - We randomly place K "centers" (centroids) in our data space
       - Each center represents the "average" of its cluster
       - In our case, each center represents a typical crime profile
    
    2. **Assignment**:
       - Each state is assigned to its nearest center
       - "Nearest" is measured by Euclidean distance
       - States with similar crime patterns end up in the same cluster
    
    3. **Update**:
       - Centers move to the average position of their assigned states
       - This process repeats until centers stop moving
       - The algorithm converges when states are optimally grouped
    """)
    
    # Visualize the process
    st.subheader("K-Means in Action")
    st.write("""
    Let's see how K-means works with our state crime data. We'll use K=4 clusters to find distinct crime profiles.
    """)
    
    # Let user choose number of clusters
    k = st.slider("Choose number of clusters (K)", 2, 6, 4)
    
    # Perform K-means clustering
    kmeans = KMeans(n_clusters=k, random_state=42, n_init=20)
    cluster_labels = kmeans.fit_predict(df_scaled)
    
    # Add cluster labels to original data
    df_clustered = df.copy()
    df_clustered['Cluster'] = cluster_labels
    
    # Create interactive scatter plot
    fig = px.scatter(df_clustered, 
                    x='Murder', 
                    y='Assault',
                    color='Cluster',
                    hover_data=['UrbanPop', 'Rape'],
                    title='State Crime Profiles')
    st.plotly_chart(fig)
    
    # Explain hyperparameters
    st.markdown("""
    ### K-Means Hyperparameters Explained
    
    1. **n_clusters (K)**
       - The number of groups we want to create
       - We chose K=4 based on the elbow method
       - Each cluster represents a distinct crime profile
    
    2. **random_state**
       - Controls the random initialization of centroids
       - Setting it to 42 ensures reproducible results
       - Different values might give slightly different clusters
    
    3. **n_init**
       - Number of times to run the algorithm with different initial centroids
       - We use 20 to find the best possible clustering
       - Higher values give more reliable results but take longer
    
    4. **max_iter**
       - Maximum number of iterations for each run
       - Default is 300, which is usually enough
       - Algorithm stops earlier if it converges
    
    5. **algorithm**
       - 'auto': Automatically chooses the best algorithm
       - 'full': Traditional K-means
       - 'elkan': More efficient for well-separated clusters
    """)
    
    # Show cluster centers
    st.subheader("Cluster Centers (Typical Crime Profiles)")
    centers_df = pd.DataFrame(
        kmeans.cluster_centers_,
        columns=df.columns
    )
    st.dataframe(centers_df)
    
    st.write("""
    Each row represents the "average" crime profile for that cluster. For example:
    - High values in Murder and Assault indicate a high-crime cluster
    - High UrbanPop with low crime rates might indicate urban safety
    - Low values across all metrics might indicate rural safety
    """)
    
    # Display cluster analysis
    st.subheader("State Crime Profiles Analysis")
    
    for cluster_num in range(k):
        cluster_states = df_clustered[df_clustered['Cluster'] == cluster_num]
        st.write(f"\n**CLUSTER {cluster_num}: {len(cluster_states)} states**")
        st.write("States:", ", ".join(cluster_states.index.tolist()))
        st.write("Average characteristics:")
        avg_profile = cluster_states[['Murder', 'Assault', 'UrbanPop', 'Rape']].mean()
        st.write(avg_profile)
    
    # Explain the results
    st.markdown("""
    ### Interpreting the Results
    
    Each cluster represents a distinct crime profile:
    1. **Cluster Characteristics**
       - Look at the average values for each crime type
       - Compare urban population percentages
       - Identify the defining features of each cluster
    
    2. **State Groupings**
       - States in the same cluster have similar crime patterns
       - Geographic proximity doesn't always mean similar profiles
       - Some states might surprise you with their cluster membership
    
    3. **Policy Implications**
       - Clusters help identify states with similar challenges
       - Can guide resource allocation and policy development
       - Enables targeted interventions based on crime profiles
    """)
    
    # Exercise 5: Hierarchical Clustering
    st.header("Exercise 5: Hierarchical Clustering Exploration")
    
    # Code Example: Hierarchical Clustering
    with st.expander("Code Example: Hierarchical Clustering"):
        st.code("""
# Create hierarchical clustering
from scipy.cluster.hierarchy import linkage, dendrogram

# Create linkage matrix
linkage_matrix = linkage(df_scaled, method='complete')

# Plot dendrogram
import plotly.graph_objects as go
dendro = dendrogram(linkage_matrix, labels=df.index.tolist(), no_plot=True)

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=dendro['icoord'],
    y=dendro['dcoord'],
    mode='lines',
    line=dict(color='white')
))
fig.update_layout(
    title='State Crime Pattern Family Tree',
    xaxis_title='States',
    yaxis_title='Distance Between Groups'
)
fig.show()

# Cut the tree to get clusters
from scipy.cluster.hierarchy import fcluster
hierarchical_labels = fcluster(linkage_matrix, k, criterion='maxclust') - 1
        """, language="python")
    
    st.markdown("""
    ### What is Hierarchical Clustering?
    
    Hierarchical clustering creates a tree-like structure (dendrogram) that shows how data points are related at different levels. 
    Think of it like building a family tree for states based on their crime patterns:
    
    1. **Bottom-Up Approach (Agglomerative)**:
       - Start with each state as its own cluster
       - Find the two closest states and merge them
       - Continue merging until all states are in one cluster
       - Creates a complete hierarchy of relationships
    
    2. **Distance Measurement**:
       - Complete Linkage: Uses the maximum distance between states
       - Average Linkage: Uses the average distance between states
       - Single Linkage: Uses the minimum distance between states
       - We use complete linkage for more distinct clusters
    """)
    
    # Create hierarchical clustering
    linkage_matrix = linkage(df_scaled, method='complete')
    
    # Create interactive dendrogram
    fig_dendro = go.Figure()
    dendro = dendrogram(linkage_matrix, labels=df.index.tolist(), no_plot=True)
    
    fig_dendro.add_trace(go.Scatter(
        x=dendro['icoord'],
        y=dendro['dcoord'],
        mode='lines',
        line=dict(color='white')
    ))
    
    fig_dendro.update_layout(
        title='State Crime Pattern Family Tree',
        xaxis_title='States',
        yaxis_title='Distance Between Groups',
        plot_bgcolor='rgb(30, 30, 30)',
        paper_bgcolor='rgb(30, 30, 30)',
        font=dict(color='white')
    )
    st.plotly_chart(fig_dendro)
    
    # Explain how to read the dendrogram
    st.markdown("""
    ### How to Read the Dendrogram
    
    1. **Height of Connections**:
       - Higher connections = more different groups
       - Lower connections = more similar groups
       - The height shows how different two groups are
    
    2. **Cutting the Tree**:
       - Draw a horizontal line to create clusters
       - Where you cut determines the number of clusters
       - We'll cut at a height that gives us 4 clusters (like K-means)
    """)
    
    # Cut the tree to get clusters
    hierarchical_labels = fcluster(linkage_matrix, k, criterion='maxclust') - 1
    
    # Compare K-means and Hierarchical Clustering
    st.header("Comparing K-Means and Hierarchical Clustering")
    
    # Create side-by-side comparison
    col1, col2 = st.columns(2)
    
    with col1:
        st.subheader("K-Means Clustering")
        fig_kmeans = px.scatter(df_clustered, 
                              x='Murder', 
                              y='Assault',
                              color='Cluster',
                              title='K-Means Clustering (K=4)',
                              hover_data=['UrbanPop', 'Rape'])
        st.plotly_chart(fig_kmeans)
        
        st.markdown("""
        **K-Means Characteristics**:
        - Requires specifying number of clusters upfront
        - Creates clusters of similar size
        - Works well with spherical clusters
        - Faster for large datasets
        - Can be sensitive to outliers
        """)
    
    with col2:
        st.subheader("Hierarchical Clustering")
        df_hierarchical = df.copy()
        df_hierarchical['Cluster'] = hierarchical_labels
        fig_hierarchical = px.scatter(df_hierarchical, 
                                    x='Murder', 
                                    y='Assault',
                                    color='Cluster',
                                    title='Hierarchical Clustering (4 clusters)',
                                    hover_data=['UrbanPop', 'Rape'])
        st.plotly_chart(fig_hierarchical)
        
        st.markdown("""
        **Hierarchical Clustering Characteristics**:
        - Creates a complete hierarchy of clusters
        - Can handle non-spherical clusters
        - More flexible in cluster shapes
        - Slower for large datasets
        - Less sensitive to outliers
        """)
    
    # Show agreement between methods
    st.subheader("Comparing the Results")
    
    # Create comparison dataframe
    comparison_df = pd.DataFrame({
        'State': df.index,
        'K-Means Cluster': cluster_labels,
        'Hierarchical Cluster': hierarchical_labels
    })
    
    # Count agreements
    agreements = sum(comparison_df['K-Means Cluster'] == comparison_df['Hierarchical Cluster'])
    agreement_percentage = (agreements / len(comparison_df)) * 100
    
    st.write(f"Methods agreed on {agreements} out of {len(comparison_df)} states ({agreement_percentage:.1f}%)")
    
    # Show states where methods disagree
    disagreements = comparison_df[comparison_df['K-Means Cluster'] != comparison_df['Hierarchical Cluster']]
    if not disagreements.empty:
        st.write("States where the methods disagreed:")
        st.dataframe(disagreements)
    
    st.markdown("""
    ### When to Use Each Method
    
    1. **Use K-Means when**:
       - You know the number of clusters
       - Your data has spherical clusters
       - You need fast computation
       - You want clusters of similar size
    
    2. **Use Hierarchical Clustering when**:
       - You don't know the number of clusters
       - You want to explore the hierarchy
       - Your clusters might be non-spherical
       - You need to handle outliers carefully
    
    In our case, both methods found similar patterns, suggesting our clusters are robust!
    """)
    
    # Exercise 6: Policy Brief
    st.header("Exercise 6: Policy Brief Creation")
    
    # Code Example: Creating Final Visualizations
    with st.expander("Code Example: Creating Policy Brief Visualizations"):
        st.code("""
# Create a comprehensive visualization
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create subplots
fig = make_subplots(rows=2, cols=2)

# Plot 1: Murder vs Assault by cluster
for i in range(k):
    cluster_data = df_clustered[df_clustered['Cluster'] == i]
    fig.add_trace(
        go.Scatter(
            x=cluster_data['Murder'],
            y=cluster_data['Assault'],
            mode='markers',
            name=f'Cluster {i}'
        ),
        row=1, col=1
    )

# Plot 2: Urban Population vs Rape by cluster
for i in range(k):
    cluster_data = df_clustered[df_clustered['Cluster'] == i]
    fig.add_trace(
        go.Scatter(
            x=cluster_data['UrbanPop'],
            y=cluster_data['Rape'],
            mode='markers',
            name=f'Cluster {i}'
        ),
        row=1, col=2
    )

# Update layout
fig.update_layout(
    title_text="State Crime Profile Analysis",
    showlegend=True
)
fig.show()
        """, language="python")
    
    st.write("""
    Based on our analysis, here's a summary of findings and recommendations:
    
    **Key Findings:**
    - We identified distinct crime profiles among US states
    - Each cluster represents a unique pattern of crime rates and urban population
    - Some states show surprising similarities despite geographic distance
    
    **Policy Recommendations:**
    1. High-Priority States: Focus on states in high-crime clusters
    2. Resource Allocation: Distribute federal crime prevention funds based on cluster profiles
    3. Best Practice Sharing: Encourage states within the same cluster to share successful strategies
    """)
    
    # Additional Resources
    st.header("Additional Resources")
    st.write("""
    - [Scikit-learn Clustering Documentation](https://scikit-learn.org/stable/modules/clustering.html)
    - [KNN Documentation](https://scikit-learn.org/stable/modules/neighbors.html)
    """)