Spaces:

sunbal7
/

PublicWiFiAnomalyDetection

Sleeping

File size: 7,032 Bytes

import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pyod.models.iforest import IForest
from pyod.models.lof import LOF
from pyod.models.ocsvm import OCSVM
from pyod.models.combination import aom, moa, average
from pyod.utils.utility import standardizer
from sklearn.decomposition import PCA
from sklearn.metrics import precision_score, recall_score
import base64
from datetime import datetime

# Configuration
st.set_option('deprecation.showPyplotGlobalUse', False)

def generate_report(data, predictions, model_names, metrics):
    report = f"""
    Network Anomaly Detection Report
    Generated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
    -----------------------------------------------
    Total Data Points: {len(data)}
    Features Analyzed: {', '.join(data.columns)}
    
    Detection Results:
    - Total Anomalies Detected: {sum(predictions)}
    - Anomaly Percentage: {sum(predictions)/len(data):.2%}
    
    Model Performance:
    {metrics.to_markdown()}
    
    Conclusion:
    The system detected {sum(predictions)} potential anomalies using ensemble of {len(model_names)} models.
    Recommended actions: Investigate flagged points, check network equipment logs, and verify traffic patterns.
    """
    return report

def plot_3d_projections(data, predictions):
    pca = PCA(n_components=3)
    projections = pca.fit_transform(data)
    
    fig = plt.figure(figsize=(10, 7))
    ax = fig.add_subplot(111, projection='3d')
    
    normal = projections[predictions == 0]
    anomalies = projections[predictions == 1]
    
    ax.scatter(normal[:,0], normal[:,1], normal[:,2], c='b', label='Normal')
    ax.scatter(anomalies[:,0], anomalies[:,1], anomalies[:,2], c='r', marker='x', label='Anomaly')
    
    ax.set_xlabel('PC1')
    ax.set_ylabel('PC2')
    ax.set_zlabel('PC3')
    plt.title('3D PCA Projection of Network Data')
    plt.legend()
    return fig

def main():
    st.title("🛜 AI Network Anomaly Detection with Multi-Model Ensemble")
    
    # Sidebar configuration
    st.sidebar.header("Model Configuration")
    models = st.sidebar.multiselect(
        "Select Detection Models",
        ["Isolation Forest", "Local Outlier Factor", "One-Class SVM"],
        default=["Isolation Forest", "Local Outlier Factor"]
    )
    
    contamination = st.sidebar.slider("Expected Anomaly Ratio", 0.01, 0.5, 0.1)
    ensemble_method = st.sidebar.selectbox("Ensemble Method", ["Average", "MOA", "AOM"])

    # Data input section
    uploaded_file = st.file_uploader("Upload network data (CSV)", type=["csv"])
    
    if uploaded_file:
        data = pd.read_csv(uploaded_file)
        st.success("Uploaded data loaded successfully!")
    else:
        # Generate synthetic network data
        np.random.seed(42)
        n_samples = 500
        data = pd.DataFrame({
            "traffic": np.random.normal(100, 15, n_samples),
            "latency": np.random.normal(50, 8, n_samples),
            "packet_loss": np.random.normal(0.5, 0.2, n_samples),
            "error_rate": np.random.normal(0.1, 0.05, n_samples)
        })
        # Inject anomalies
        anomaly_idx = np.random.choice(n_samples, 50, replace=False)
        data.loc[anomaly_idx, 'traffic'] *= 2.5
        data.loc[anomaly_idx, 'latency'] += 100
        data.loc[anomaly_idx, 'packet_loss'] *= 4
        st.info("Using synthetic network data. Upload a CSV to use your own.")

    # Data preprocessing
    numeric_cols = data.select_dtypes(include=np.number).columns.tolist()
    X = data[numeric_cols].values
    X_norm = standardizer(X)

    # Model initialization
    model_dict = {
        "Isolation Forest": IForest(contamination=contamination, n_jobs=-1),
        "Local Outlier Factor": LOF(contamination=contamination, n_jobs=-1),
        "One-Class SVM": OCSVM(contamination=contamination)
    }

    selected_models = [model_dict[m] for m in models]
    if not selected_models:
        st.error("Please select at least one detection model!")
        return

    # Training and prediction
    st.subheader("Model Training Progress")
    progress_bar = st.progress(0)
    train_scores = np.zeros([len(X), len(selected_models)])

    for i, model in enumerate(selected_models):
        model.fit(X_norm)
        train_scores[:, i] = model.decision_function(X_norm)
        progress_bar.progress((i+1)/len(selected_models))

    # Ensemble prediction
    if ensemble_method == "Average":
        combined_scores = average(train_scores)
    elif ensemble_method == "MOA":
        combined_scores = moa(train_scores)
    else:
        combined_scores = aom(train_scores)

    threshold = np.percentile(combined_scores, 100*(1-contamination))
    predictions = (combined_scores > threshold).astype(int)

    # Performance metrics
    if uploaded_file is None:  # Use synthetic ground truth
        y_true = np.zeros(n_samples)
        y_true[anomaly_idx] = 1
        precision = precision_score(y_true, predictions)
        recall = recall_score(y_true, predictions)
    else:
        precision = recall = "N/A (No ground truth)"

    metrics_df = pd.DataFrame({
        "Model": models + ["Ensemble"],
        "Precision": list([m.decision_scores_.mean() for m in selected_models]) + [precision],
        "Recall": list([m.decision_scores_.std() for m in selected_models]) + [recall]
    })

    # Display results
    st.subheader("Detection Results")
    col1, col2 = st.columns(2)
    with col1:
        st.metric("Total Anomalies", sum(predictions))
        st.metric("Anomaly Ratio", f"{sum(predictions)/len(data):.2%}")
    with col2:
        st.metric("Ensemble Precision", f"{precision:.2%}" if isinstance(precision, float) else precision)
        st.metric("Ensemble Recall", f"{recall:.2%}" if isinstance(recall, float) else recall)

    # Visualization
    st.subheader("Data Visualization")
    
    tab1, tab2 = st.tabs(["2D Projection", "3D Projection"])
    with tab1:
        pca = PCA(n_components=2)
        viz_data = pca.fit_transform(X_norm)
        plt.figure(figsize=(10, 6))
        plt.scatter(viz_data[predictions==0, 0], viz_data[predictions==0, 1],
                    c='blue', label='Normal', alpha=0.6)
        plt.scatter(viz_data[predictions==1, 0], viz_data[predictions==1, 1],
                    c='red', marker='x', label='Anomaly')
        plt.xlabel("Principal Component 1")
        plt.ylabel("Principal Component 2")
        plt.title("PCA Projection of Network Data")
        plt.legend()
        st.pyplot()
    
    with tab2:
        st.pyplot(plot_3d_projections(X_norm, predictions))

    # Generate report
    st.subheader("Analysis Report")
    report = generate_report(data[numeric_cols], predictions, models, metrics_df)
    st.code(report, language='text')

    # Report download
    st.download_button(
        label="Download Full Report",
        data=report,
        file_name=f"network_anomaly_report_{datetime.now().strftime('%Y%m%d')}.txt",
        mime="text/plain"
    )

if __name__ == "__main__":
    main()