File size: 7,032 Bytes
b2b5a52
 
 
d7bc36b
 
 
f91b527
 
 
 
 
 
 
b2b5a52
f91b527
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2b5a52
f91b527
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2b5a52
f91b527
 
 
 
 
 
 
 
 
 
 
 
 
b2b5a52
f91b527
 
 
 
d7bc36b
f91b527
d7bc36b
f91b527
d7bc36b
f91b527
d7bc36b
f91b527
 
 
 
d7bc36b
f91b527
 
 
 
 
 
b2b5a52
f91b527
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d7bc36b
588f02b
f91b527
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2b5a52
d7bc36b
f91b527
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pyod.models.iforest import IForest
from pyod.models.lof import LOF
from pyod.models.ocsvm import OCSVM
from pyod.models.combination import aom, moa, average
from pyod.utils.utility import standardizer
from sklearn.decomposition import PCA
from sklearn.metrics import precision_score, recall_score
import base64
from datetime import datetime

# Configuration
st.set_option('deprecation.showPyplotGlobalUse', False)

def generate_report(data, predictions, model_names, metrics):
    report = f"""
    Network Anomaly Detection Report
    Generated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
    -----------------------------------------------
    Total Data Points: {len(data)}
    Features Analyzed: {', '.join(data.columns)}
    
    Detection Results:
    - Total Anomalies Detected: {sum(predictions)}
    - Anomaly Percentage: {sum(predictions)/len(data):.2%}
    
    Model Performance:
    {metrics.to_markdown()}
    
    Conclusion:
    The system detected {sum(predictions)} potential anomalies using ensemble of {len(model_names)} models.
    Recommended actions: Investigate flagged points, check network equipment logs, and verify traffic patterns.
    """
    return report

def plot_3d_projections(data, predictions):
    pca = PCA(n_components=3)
    projections = pca.fit_transform(data)
    
    fig = plt.figure(figsize=(10, 7))
    ax = fig.add_subplot(111, projection='3d')
    
    normal = projections[predictions == 0]
    anomalies = projections[predictions == 1]
    
    ax.scatter(normal[:,0], normal[:,1], normal[:,2], c='b', label='Normal')
    ax.scatter(anomalies[:,0], anomalies[:,1], anomalies[:,2], c='r', marker='x', label='Anomaly')
    
    ax.set_xlabel('PC1')
    ax.set_ylabel('PC2')
    ax.set_zlabel('PC3')
    plt.title('3D PCA Projection of Network Data')
    plt.legend()
    return fig

def main():
    st.title("πŸ›œ AI Network Anomaly Detection with Multi-Model Ensemble")
    
    # Sidebar configuration
    st.sidebar.header("Model Configuration")
    models = st.sidebar.multiselect(
        "Select Detection Models",
        ["Isolation Forest", "Local Outlier Factor", "One-Class SVM"],
        default=["Isolation Forest", "Local Outlier Factor"]
    )
    
    contamination = st.sidebar.slider("Expected Anomaly Ratio", 0.01, 0.5, 0.1)
    ensemble_method = st.sidebar.selectbox("Ensemble Method", ["Average", "MOA", "AOM"])

    # Data input section
    uploaded_file = st.file_uploader("Upload network data (CSV)", type=["csv"])
    
    if uploaded_file:
        data = pd.read_csv(uploaded_file)
        st.success("Uploaded data loaded successfully!")
    else:
        # Generate synthetic network data
        np.random.seed(42)
        n_samples = 500
        data = pd.DataFrame({
            "traffic": np.random.normal(100, 15, n_samples),
            "latency": np.random.normal(50, 8, n_samples),
            "packet_loss": np.random.normal(0.5, 0.2, n_samples),
            "error_rate": np.random.normal(0.1, 0.05, n_samples)
        })
        # Inject anomalies
        anomaly_idx = np.random.choice(n_samples, 50, replace=False)
        data.loc[anomaly_idx, 'traffic'] *= 2.5
        data.loc[anomaly_idx, 'latency'] += 100
        data.loc[anomaly_idx, 'packet_loss'] *= 4
        st.info("Using synthetic network data. Upload a CSV to use your own.")

    # Data preprocessing
    numeric_cols = data.select_dtypes(include=np.number).columns.tolist()
    X = data[numeric_cols].values
    X_norm = standardizer(X)

    # Model initialization
    model_dict = {
        "Isolation Forest": IForest(contamination=contamination, n_jobs=-1),
        "Local Outlier Factor": LOF(contamination=contamination, n_jobs=-1),
        "One-Class SVM": OCSVM(contamination=contamination)
    }

    selected_models = [model_dict[m] for m in models]
    if not selected_models:
        st.error("Please select at least one detection model!")
        return

    # Training and prediction
    st.subheader("Model Training Progress")
    progress_bar = st.progress(0)
    train_scores = np.zeros([len(X), len(selected_models)])

    for i, model in enumerate(selected_models):
        model.fit(X_norm)
        train_scores[:, i] = model.decision_function(X_norm)
        progress_bar.progress((i+1)/len(selected_models))

    # Ensemble prediction
    if ensemble_method == "Average":
        combined_scores = average(train_scores)
    elif ensemble_method == "MOA":
        combined_scores = moa(train_scores)
    else:
        combined_scores = aom(train_scores)

    threshold = np.percentile(combined_scores, 100*(1-contamination))
    predictions = (combined_scores > threshold).astype(int)

    # Performance metrics
    if uploaded_file is None:  # Use synthetic ground truth
        y_true = np.zeros(n_samples)
        y_true[anomaly_idx] = 1
        precision = precision_score(y_true, predictions)
        recall = recall_score(y_true, predictions)
    else:
        precision = recall = "N/A (No ground truth)"

    metrics_df = pd.DataFrame({
        "Model": models + ["Ensemble"],
        "Precision": list([m.decision_scores_.mean() for m in selected_models]) + [precision],
        "Recall": list([m.decision_scores_.std() for m in selected_models]) + [recall]
    })

    # Display results
    st.subheader("Detection Results")
    col1, col2 = st.columns(2)
    with col1:
        st.metric("Total Anomalies", sum(predictions))
        st.metric("Anomaly Ratio", f"{sum(predictions)/len(data):.2%}")
    with col2:
        st.metric("Ensemble Precision", f"{precision:.2%}" if isinstance(precision, float) else precision)
        st.metric("Ensemble Recall", f"{recall:.2%}" if isinstance(recall, float) else recall)

    # Visualization
    st.subheader("Data Visualization")
    
    tab1, tab2 = st.tabs(["2D Projection", "3D Projection"])
    with tab1:
        pca = PCA(n_components=2)
        viz_data = pca.fit_transform(X_norm)
        plt.figure(figsize=(10, 6))
        plt.scatter(viz_data[predictions==0, 0], viz_data[predictions==0, 1],
                    c='blue', label='Normal', alpha=0.6)
        plt.scatter(viz_data[predictions==1, 0], viz_data[predictions==1, 1],
                    c='red', marker='x', label='Anomaly')
        plt.xlabel("Principal Component 1")
        plt.ylabel("Principal Component 2")
        plt.title("PCA Projection of Network Data")
        plt.legend()
        st.pyplot()
    
    with tab2:
        st.pyplot(plot_3d_projections(X_norm, predictions))

    # Generate report
    st.subheader("Analysis Report")
    report = generate_report(data[numeric_cols], predictions, models, metrics_df)
    st.code(report, language='text')

    # Report download
    st.download_button(
        label="Download Full Report",
        data=report,
        file_name=f"network_anomaly_report_{datetime.now().strftime('%Y%m%d')}.txt",
        mime="text/plain"
    )

if __name__ == "__main__":
    main()