Spaces:

sunbal7
/

PublicWiFiAnomalyDetection

Sleeping

App Files Files Community

sunbal7 commited on Feb 28

Commit

f91b527

verified ·

1 Parent(s): 3f6c91c

Update app.py

Browse files

Files changed (1) hide show

app.py +174 -71

app.py CHANGED Viewed

@@ -4,88 +4,191 @@ import numpy as np
 import matplotlib.pyplot as plt
 from pyod.models.iforest import IForest
 from pyod.models.lof import LOF
-def main():
-    st.title("AI-Based Network Anomaly Detection (Predictive Maintenance)")
-    st.markdown(
-        """
-        This application uses AI to detect unusual behavior in a network before it leads to failure.
-        By leveraging open source models and PyOD, it predicts potential issues, enabling proactive maintenance.
-        """
-    )
-    # Sidebar settings for model and parameters
-    st.sidebar.header("Settings")
-    model_choice = st.sidebar.selectbox("Select Anomaly Detection Model", ("Isolation Forest", "Local Outlier Factor"))
-    contamination = st.sidebar.slider("Contamination (Expected anomaly ratio)", 0.0, 0.5, 0.1)
-    uploaded_file = st.file_uploader("Upload CSV file with network data", type=["csv"])
-    if uploaded_file is not None:
         data = pd.read_csv(uploaded_file)
-        st.write("### Data Preview")
-        st.dataframe(data.head())
     else:
-        st.info("No file uploaded. Generating synthetic network data for demonstration.")
-        # Generate synthetic data with features like traffic, latency, and packet_loss
         np.random.seed(42)
-        n_samples = 300
-        traffic = np.random.normal(100, 10, n_samples)
-        latency = np.random.normal(50, 5, n_samples)
-        packet_loss = np.random.normal(0.5, 0.1, n_samples)
-        # Introduce anomalies by modifying a subset of data points
-        anomaly_indices = np.random.choice(n_samples, size=20, replace=False)
-        traffic[anomaly_indices] *= 1.5
-        latency[anomaly_indices] *= 2
-        packet_loss[anomaly_indices] *= 5
         data = pd.DataFrame({
-            "traffic": traffic,
-            "latency": latency,
-            "packet_loss": packet_loss
         })
-        st.write("### Synthetic Data")
-        st.dataframe(data.head())
-    # Use only numeric features for anomaly detection
-    features = data.select_dtypes(include=[np.number]).columns.tolist()
-    if not features:
-        st.error("No numeric columns found in the data for anomaly detection.")
         return
-    X = data[features].values
-    # Initialize the selected model from PyOD
-    if model_choice == "Isolation Forest":
-        model = IForest(contamination=contamination)
-    elif model_choice == "Local Outlier Factor":
-        model = LOF(contamination=contamination)
-    # Fit the model and predict anomalies (0: normal, 1: anomaly)
-    model.fit(X)
-    predictions = model.labels_
-    data["anomaly"] = predictions
-    st.subheader("Anomaly Detection Results")
-    st.write(data.head())
-    n_anomalies = np.sum(predictions)
-    st.write(f"Detected **{n_anomalies}** anomalies out of **{len(data)}** data points.")
-    # Visualization (if at least 2 numeric features are available)
-    if len(features) >= 2:
-        st.subheader("Visualization")
-        fig, ax = plt.subplots()
-        # Plot using the first two numeric features
-        x_feature = features[0]
-        y_feature = features[1]
-        normal_data = data[data["anomaly"] == 0]
-        anomaly_data = data[data["anomaly"] == 1]
-        ax.scatter(normal_data[x_feature], normal_data[y_feature], label="Normal", color="blue", alpha=0.5)
-        ax.scatter(anomaly_data[x_feature], anomaly_data[y_feature], label="Anomaly", color="red", marker="x")
-        ax.set_xlabel(x_feature)
-        ax.set_ylabel(y_feature)
-        ax.legend()
-        st.pyplot(fig)
 if __name__ == "__main__":
-    main()

 import matplotlib.pyplot as plt
 from pyod.models.iforest import IForest
 from pyod.models.lof import LOF
+from pyod.models.ocsvm import OCSVM
+from pyod.models.combination import aom, moa, average
+from pyod.utils.utility import standardizer
+from sklearn.decomposition import PCA
+from sklearn.metrics import precision_score, recall_score
+import base64
+from datetime import datetime
+# Configuration
+st.set_option('deprecation.showPyplotGlobalUse', False)
+def generate_report(data, predictions, model_names, metrics):
+    report = f"""
+    Network Anomaly Detection Report
+    Generated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
+    -----------------------------------------------
+    Total Data Points: {len(data)}
+    Features Analyzed: {', '.join(data.columns)}
+    Detection Results:
+    - Total Anomalies Detected: {sum(predictions)}
+    - Anomaly Percentage: {sum(predictions)/len(data):.2%}
+    Model Performance:
+    {metrics.to_markdown()}
+    Conclusion:
+    The system detected {sum(predictions)} potential anomalies using ensemble of {len(model_names)} models.
+    Recommended actions: Investigate flagged points, check network equipment logs, and verify traffic patterns.
+    """
+    return report
+def plot_3d_projections(data, predictions):
+    pca = PCA(n_components=3)
+    projections = pca.fit_transform(data)
+    fig = plt.figure(figsize=(10, 7))
+    ax = fig.add_subplot(111, projection='3d')
+    normal = projections[predictions == 0]
+    anomalies = projections[predictions == 1]
+    ax.scatter(normal[:,0], normal[:,1], normal[:,2], c='b', label='Normal')
+    ax.scatter(anomalies[:,0], anomalies[:,1], anomalies[:,2], c='r', marker='x', label='Anomaly')
+    ax.set_xlabel('PC1')
+    ax.set_ylabel('PC2')
+    ax.set_zlabel('PC3')
+    plt.title('3D PCA Projection of Network Data')
+    plt.legend()
+    return fig
+def main():
+    st.title("🛜 AI Network Anomaly Detection with Multi-Model Ensemble")
+    # Sidebar configuration
+    st.sidebar.header("Model Configuration")
+    models = st.sidebar.multiselect(
+        "Select Detection Models",
+        ["Isolation Forest", "Local Outlier Factor", "One-Class SVM"],
+        default=["Isolation Forest", "Local Outlier Factor"]
+    )
+    contamination = st.sidebar.slider("Expected Anomaly Ratio", 0.01, 0.5, 0.1)
+    ensemble_method = st.sidebar.selectbox("Ensemble Method", ["Average", "MOA", "AOM"])
+    # Data input section
+    uploaded_file = st.file_uploader("Upload network data (CSV)", type=["csv"])
+    if uploaded_file:
         data = pd.read_csv(uploaded_file)
+        st.success("Uploaded data loaded successfully!")
     else:
+        # Generate synthetic network data
         np.random.seed(42)
+        n_samples = 500
         data = pd.DataFrame({
+            "traffic": np.random.normal(100, 15, n_samples),
+            "latency": np.random.normal(50, 8, n_samples),
+            "packet_loss": np.random.normal(0.5, 0.2, n_samples),
+            "error_rate": np.random.normal(0.1, 0.05, n_samples)
         })
+        # Inject anomalies
+        anomaly_idx = np.random.choice(n_samples, 50, replace=False)
+        data.loc[anomaly_idx, 'traffic'] *= 2.5
+        data.loc[anomaly_idx, 'latency'] += 100
+        data.loc[anomaly_idx, 'packet_loss'] *= 4
+        st.info("Using synthetic network data. Upload a CSV to use your own.")
+    # Data preprocessing
+    numeric_cols = data.select_dtypes(include=np.number).columns.tolist()
+    X = data[numeric_cols].values
+    X_norm = standardizer(X)
+    # Model initialization
+    model_dict = {
+        "Isolation Forest": IForest(contamination=contamination, n_jobs=-1),
+        "Local Outlier Factor": LOF(contamination=contamination, n_jobs=-1),
+        "One-Class SVM": OCSVM(contamination=contamination)
+    }
+    selected_models = [model_dict[m] for m in models]
+    if not selected_models:
+        st.error("Please select at least one detection model!")
         return
+    # Training and prediction
+    st.subheader("Model Training Progress")
+    progress_bar = st.progress(0)
+    train_scores = np.zeros([len(X), len(selected_models)])
+    for i, model in enumerate(selected_models):
+        model.fit(X_norm)
+        train_scores[:, i] = model.decision_function(X_norm)
+        progress_bar.progress((i+1)/len(selected_models))
+    # Ensemble prediction
+    if ensemble_method == "Average":
+        combined_scores = average(train_scores)
+    elif ensemble_method == "MOA":
+        combined_scores = moa(train_scores)
+    else:
+        combined_scores = aom(train_scores)
+    threshold = np.percentile(combined_scores, 100*(1-contamination))
+    predictions = (combined_scores > threshold).astype(int)
+    # Performance metrics
+    if uploaded_file is None:  # Use synthetic ground truth
+        y_true = np.zeros(n_samples)
+        y_true[anomaly_idx] = 1
+        precision = precision_score(y_true, predictions)
+        recall = recall_score(y_true, predictions)
+    else:
+        precision = recall = "N/A (No ground truth)"
+    metrics_df = pd.DataFrame({
+        "Model": models + ["Ensemble"],
+        "Precision": list([m.decision_scores_.mean() for m in selected_models]) + [precision],
+        "Recall": list([m.decision_scores_.std() for m in selected_models]) + [recall]
+    })
+    # Display results
+    st.subheader("Detection Results")
+    col1, col2 = st.columns(2)
+    with col1:
+        st.metric("Total Anomalies", sum(predictions))
+        st.metric("Anomaly Ratio", f"{sum(predictions)/len(data):.2%}")
+    with col2:
+        st.metric("Ensemble Precision", f"{precision:.2%}" if isinstance(precision, float) else precision)
+        st.metric("Ensemble Recall", f"{recall:.2%}" if isinstance(recall, float) else recall)
+    # Visualization
+    st.subheader("Data Visualization")
+    tab1, tab2 = st.tabs(["2D Projection", "3D Projection"])
+    with tab1:
+        pca = PCA(n_components=2)
+        viz_data = pca.fit_transform(X_norm)
+        plt.figure(figsize=(10, 6))
+        plt.scatter(viz_data[predictions==0, 0], viz_data[predictions==0, 1],
+                    c='blue', label='Normal', alpha=0.6)
+        plt.scatter(viz_data[predictions==1, 0], viz_data[predictions==1, 1],
+                    c='red', marker='x', label='Anomaly')
+        plt.xlabel("Principal Component 1")
+        plt.ylabel("Principal Component 2")
+        plt.title("PCA Projection of Network Data")
+        plt.legend()
+        st.pyplot()
+    with tab2:
+        st.pyplot(plot_3d_projections(X_norm, predictions))
+    # Generate report
+    st.subheader("Analysis Report")
+    report = generate_report(data[numeric_cols], predictions, models, metrics_df)
+    st.code(report, language='text')
+    # Report download
+    st.download_button(
+        label="Download Full Report",
+        data=report,
+        file_name=f"network_anomaly_report_{datetime.now().strftime('%Y%m%d')}.txt",
+        mime="text/plain"
+    )
 if __name__ == "__main__":
+    main()