import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt from pyod.models.iforest import IForest from pyod.models.lof import LOF from pyod.models.ocsvm import OCSVM from pyod.models.combination import aom, moa, average from pyod.utils.utility import standardizer from sklearn.decomposition import PCA from sklearn.metrics import precision_score, recall_score import base64 from datetime import datetime # Configuration st.set_option('deprecation.showPyplotGlobalUse', False) def generate_report(data, predictions, model_names, metrics): report = f""" Network Anomaly Detection Report Generated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} ----------------------------------------------- Total Data Points: {len(data)} Features Analyzed: {', '.join(data.columns)} Detection Results: - Total Anomalies Detected: {sum(predictions)} - Anomaly Percentage: {sum(predictions)/len(data):.2%} Model Performance: {metrics.to_markdown()} Conclusion: The system detected {sum(predictions)} potential anomalies using ensemble of {len(model_names)} models. Recommended actions: Investigate flagged points, check network equipment logs, and verify traffic patterns. """ return report def plot_3d_projections(data, predictions): pca = PCA(n_components=3) projections = pca.fit_transform(data) fig = plt.figure(figsize=(10, 7)) ax = fig.add_subplot(111, projection='3d') normal = projections[predictions == 0] anomalies = projections[predictions == 1] ax.scatter(normal[:,0], normal[:,1], normal[:,2], c='b', label='Normal') ax.scatter(anomalies[:,0], anomalies[:,1], anomalies[:,2], c='r', marker='x', label='Anomaly') ax.set_xlabel('PC1') ax.set_ylabel('PC2') ax.set_zlabel('PC3') plt.title('3D PCA Projection of Network Data') plt.legend() return fig def main(): st.title("🛜 AI Network Anomaly Detection with Multi-Model Ensemble") # Sidebar configuration st.sidebar.header("Model Configuration") models = st.sidebar.multiselect( "Select Detection Models", ["Isolation Forest", "Local Outlier Factor", "One-Class SVM"], default=["Isolation Forest", "Local Outlier Factor"] ) contamination = st.sidebar.slider("Expected Anomaly Ratio", 0.01, 0.5, 0.1) ensemble_method = st.sidebar.selectbox("Ensemble Method", ["Average", "MOA", "AOM"]) # Data input section uploaded_file = st.file_uploader("Upload network data (CSV)", type=["csv"]) if uploaded_file: data = pd.read_csv(uploaded_file) st.success("Uploaded data loaded successfully!") else: # Generate synthetic network data np.random.seed(42) n_samples = 500 data = pd.DataFrame({ "traffic": np.random.normal(100, 15, n_samples), "latency": np.random.normal(50, 8, n_samples), "packet_loss": np.random.normal(0.5, 0.2, n_samples), "error_rate": np.random.normal(0.1, 0.05, n_samples) }) # Inject anomalies anomaly_idx = np.random.choice(n_samples, 50, replace=False) data.loc[anomaly_idx, 'traffic'] *= 2.5 data.loc[anomaly_idx, 'latency'] += 100 data.loc[anomaly_idx, 'packet_loss'] *= 4 st.info("Using synthetic network data. Upload a CSV to use your own.") # Data preprocessing numeric_cols = data.select_dtypes(include=np.number).columns.tolist() X = data[numeric_cols].values X_norm = standardizer(X) # Model initialization model_dict = { "Isolation Forest": IForest(contamination=contamination, n_jobs=-1), "Local Outlier Factor": LOF(contamination=contamination, n_jobs=-1), "One-Class SVM": OCSVM(contamination=contamination) } selected_models = [model_dict[m] for m in models] if not selected_models: st.error("Please select at least one detection model!") return # Training and prediction st.subheader("Model Training Progress") progress_bar = st.progress(0) train_scores = np.zeros([len(X), len(selected_models)]) for i, model in enumerate(selected_models): model.fit(X_norm) train_scores[:, i] = model.decision_function(X_norm) progress_bar.progress((i+1)/len(selected_models)) # Ensemble prediction if ensemble_method == "Average": combined_scores = average(train_scores) elif ensemble_method == "MOA": combined_scores = moa(train_scores) else: combined_scores = aom(train_scores) threshold = np.percentile(combined_scores, 100*(1-contamination)) predictions = (combined_scores > threshold).astype(int) # Performance metrics if uploaded_file is None: # Use synthetic ground truth y_true = np.zeros(n_samples) y_true[anomaly_idx] = 1 precision = precision_score(y_true, predictions) recall = recall_score(y_true, predictions) else: precision = recall = "N/A (No ground truth)" metrics_df = pd.DataFrame({ "Model": models + ["Ensemble"], "Precision": list([m.decision_scores_.mean() for m in selected_models]) + [precision], "Recall": list([m.decision_scores_.std() for m in selected_models]) + [recall] }) # Display results st.subheader("Detection Results") col1, col2 = st.columns(2) with col1: st.metric("Total Anomalies", sum(predictions)) st.metric("Anomaly Ratio", f"{sum(predictions)/len(data):.2%}") with col2: st.metric("Ensemble Precision", f"{precision:.2%}" if isinstance(precision, float) else precision) st.metric("Ensemble Recall", f"{recall:.2%}" if isinstance(recall, float) else recall) # Visualization st.subheader("Data Visualization") tab1, tab2 = st.tabs(["2D Projection", "3D Projection"]) with tab1: pca = PCA(n_components=2) viz_data = pca.fit_transform(X_norm) plt.figure(figsize=(10, 6)) plt.scatter(viz_data[predictions==0, 0], viz_data[predictions==0, 1], c='blue', label='Normal', alpha=0.6) plt.scatter(viz_data[predictions==1, 0], viz_data[predictions==1, 1], c='red', marker='x', label='Anomaly') plt.xlabel("Principal Component 1") plt.ylabel("Principal Component 2") plt.title("PCA Projection of Network Data") plt.legend() st.pyplot() with tab2: st.pyplot(plot_3d_projections(X_norm, predictions)) # Generate report st.subheader("Analysis Report") report = generate_report(data[numeric_cols], predictions, models, metrics_df) st.code(report, language='text') # Report download st.download_button( label="Download Full Report", data=report, file_name=f"network_anomaly_report_{datetime.now().strftime('%Y%m%d')}.txt", mime="text/plain" ) if __name__ == "__main__": main()