sunbal7 commited on
Commit
f91b527
·
verified ·
1 Parent(s): 3f6c91c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +174 -71
app.py CHANGED
@@ -4,88 +4,191 @@ import numpy as np
4
  import matplotlib.pyplot as plt
5
  from pyod.models.iforest import IForest
6
  from pyod.models.lof import LOF
 
 
 
 
 
 
 
7
 
8
- def main():
9
- st.title("AI-Based Network Anomaly Detection (Predictive Maintenance)")
10
- st.markdown(
11
- """
12
- This application uses AI to detect unusual behavior in a network before it leads to failure.
13
- By leveraging open source models and PyOD, it predicts potential issues, enabling proactive maintenance.
14
- """
15
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- # Sidebar settings for model and parameters
18
- st.sidebar.header("Settings")
19
- model_choice = st.sidebar.selectbox("Select Anomaly Detection Model", ("Isolation Forest", "Local Outlier Factor"))
20
- contamination = st.sidebar.slider("Contamination (Expected anomaly ratio)", 0.0, 0.5, 0.1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- uploaded_file = st.file_uploader("Upload CSV file with network data", type=["csv"])
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- if uploaded_file is not None:
 
 
 
25
  data = pd.read_csv(uploaded_file)
26
- st.write("### Data Preview")
27
- st.dataframe(data.head())
28
  else:
29
- st.info("No file uploaded. Generating synthetic network data for demonstration.")
30
- # Generate synthetic data with features like traffic, latency, and packet_loss
31
  np.random.seed(42)
32
- n_samples = 300
33
- traffic = np.random.normal(100, 10, n_samples)
34
- latency = np.random.normal(50, 5, n_samples)
35
- packet_loss = np.random.normal(0.5, 0.1, n_samples)
36
- # Introduce anomalies by modifying a subset of data points
37
- anomaly_indices = np.random.choice(n_samples, size=20, replace=False)
38
- traffic[anomaly_indices] *= 1.5
39
- latency[anomaly_indices] *= 2
40
- packet_loss[anomaly_indices] *= 5
41
-
42
  data = pd.DataFrame({
43
- "traffic": traffic,
44
- "latency": latency,
45
- "packet_loss": packet_loss
 
46
  })
47
- st.write("### Synthetic Data")
48
- st.dataframe(data.head())
 
 
 
 
49
 
50
- # Use only numeric features for anomaly detection
51
- features = data.select_dtypes(include=[np.number]).columns.tolist()
52
- if not features:
53
- st.error("No numeric columns found in the data for anomaly detection.")
 
 
 
 
 
 
 
 
 
 
 
54
  return
55
 
56
- X = data[features].values
57
-
58
- # Initialize the selected model from PyOD
59
- if model_choice == "Isolation Forest":
60
- model = IForest(contamination=contamination)
61
- elif model_choice == "Local Outlier Factor":
62
- model = LOF(contamination=contamination)
63
-
64
- # Fit the model and predict anomalies (0: normal, 1: anomaly)
65
- model.fit(X)
66
- predictions = model.labels_
67
- data["anomaly"] = predictions
68
-
69
- st.subheader("Anomaly Detection Results")
70
- st.write(data.head())
71
- n_anomalies = np.sum(predictions)
72
- st.write(f"Detected **{n_anomalies}** anomalies out of **{len(data)}** data points.")
73
-
74
- # Visualization (if at least 2 numeric features are available)
75
- if len(features) >= 2:
76
- st.subheader("Visualization")
77
- fig, ax = plt.subplots()
78
- # Plot using the first two numeric features
79
- x_feature = features[0]
80
- y_feature = features[1]
81
- normal_data = data[data["anomaly"] == 0]
82
- anomaly_data = data[data["anomaly"] == 1]
83
- ax.scatter(normal_data[x_feature], normal_data[y_feature], label="Normal", color="blue", alpha=0.5)
84
- ax.scatter(anomaly_data[x_feature], anomaly_data[y_feature], label="Anomaly", color="red", marker="x")
85
- ax.set_xlabel(x_feature)
86
- ax.set_ylabel(y_feature)
87
- ax.legend()
88
- st.pyplot(fig)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  if __name__ == "__main__":
91
- main()
 
4
  import matplotlib.pyplot as plt
5
  from pyod.models.iforest import IForest
6
  from pyod.models.lof import LOF
7
+ from pyod.models.ocsvm import OCSVM
8
+ from pyod.models.combination import aom, moa, average
9
+ from pyod.utils.utility import standardizer
10
+ from sklearn.decomposition import PCA
11
+ from sklearn.metrics import precision_score, recall_score
12
+ import base64
13
+ from datetime import datetime
14
 
15
+ # Configuration
16
+ st.set_option('deprecation.showPyplotGlobalUse', False)
17
+
18
+ def generate_report(data, predictions, model_names, metrics):
19
+ report = f"""
20
+ Network Anomaly Detection Report
21
+ Generated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
22
+ -----------------------------------------------
23
+ Total Data Points: {len(data)}
24
+ Features Analyzed: {', '.join(data.columns)}
25
+
26
+ Detection Results:
27
+ - Total Anomalies Detected: {sum(predictions)}
28
+ - Anomaly Percentage: {sum(predictions)/len(data):.2%}
29
+
30
+ Model Performance:
31
+ {metrics.to_markdown()}
32
+
33
+ Conclusion:
34
+ The system detected {sum(predictions)} potential anomalies using ensemble of {len(model_names)} models.
35
+ Recommended actions: Investigate flagged points, check network equipment logs, and verify traffic patterns.
36
+ """
37
+ return report
38
 
39
+ def plot_3d_projections(data, predictions):
40
+ pca = PCA(n_components=3)
41
+ projections = pca.fit_transform(data)
42
+
43
+ fig = plt.figure(figsize=(10, 7))
44
+ ax = fig.add_subplot(111, projection='3d')
45
+
46
+ normal = projections[predictions == 0]
47
+ anomalies = projections[predictions == 1]
48
+
49
+ ax.scatter(normal[:,0], normal[:,1], normal[:,2], c='b', label='Normal')
50
+ ax.scatter(anomalies[:,0], anomalies[:,1], anomalies[:,2], c='r', marker='x', label='Anomaly')
51
+
52
+ ax.set_xlabel('PC1')
53
+ ax.set_ylabel('PC2')
54
+ ax.set_zlabel('PC3')
55
+ plt.title('3D PCA Projection of Network Data')
56
+ plt.legend()
57
+ return fig
58
 
59
+ def main():
60
+ st.title("🛜 AI Network Anomaly Detection with Multi-Model Ensemble")
61
+
62
+ # Sidebar configuration
63
+ st.sidebar.header("Model Configuration")
64
+ models = st.sidebar.multiselect(
65
+ "Select Detection Models",
66
+ ["Isolation Forest", "Local Outlier Factor", "One-Class SVM"],
67
+ default=["Isolation Forest", "Local Outlier Factor"]
68
+ )
69
+
70
+ contamination = st.sidebar.slider("Expected Anomaly Ratio", 0.01, 0.5, 0.1)
71
+ ensemble_method = st.sidebar.selectbox("Ensemble Method", ["Average", "MOA", "AOM"])
72
 
73
+ # Data input section
74
+ uploaded_file = st.file_uploader("Upload network data (CSV)", type=["csv"])
75
+
76
+ if uploaded_file:
77
  data = pd.read_csv(uploaded_file)
78
+ st.success("Uploaded data loaded successfully!")
 
79
  else:
80
+ # Generate synthetic network data
 
81
  np.random.seed(42)
82
+ n_samples = 500
 
 
 
 
 
 
 
 
 
83
  data = pd.DataFrame({
84
+ "traffic": np.random.normal(100, 15, n_samples),
85
+ "latency": np.random.normal(50, 8, n_samples),
86
+ "packet_loss": np.random.normal(0.5, 0.2, n_samples),
87
+ "error_rate": np.random.normal(0.1, 0.05, n_samples)
88
  })
89
+ # Inject anomalies
90
+ anomaly_idx = np.random.choice(n_samples, 50, replace=False)
91
+ data.loc[anomaly_idx, 'traffic'] *= 2.5
92
+ data.loc[anomaly_idx, 'latency'] += 100
93
+ data.loc[anomaly_idx, 'packet_loss'] *= 4
94
+ st.info("Using synthetic network data. Upload a CSV to use your own.")
95
 
96
+ # Data preprocessing
97
+ numeric_cols = data.select_dtypes(include=np.number).columns.tolist()
98
+ X = data[numeric_cols].values
99
+ X_norm = standardizer(X)
100
+
101
+ # Model initialization
102
+ model_dict = {
103
+ "Isolation Forest": IForest(contamination=contamination, n_jobs=-1),
104
+ "Local Outlier Factor": LOF(contamination=contamination, n_jobs=-1),
105
+ "One-Class SVM": OCSVM(contamination=contamination)
106
+ }
107
+
108
+ selected_models = [model_dict[m] for m in models]
109
+ if not selected_models:
110
+ st.error("Please select at least one detection model!")
111
  return
112
 
113
+ # Training and prediction
114
+ st.subheader("Model Training Progress")
115
+ progress_bar = st.progress(0)
116
+ train_scores = np.zeros([len(X), len(selected_models)])
117
+
118
+ for i, model in enumerate(selected_models):
119
+ model.fit(X_norm)
120
+ train_scores[:, i] = model.decision_function(X_norm)
121
+ progress_bar.progress((i+1)/len(selected_models))
122
+
123
+ # Ensemble prediction
124
+ if ensemble_method == "Average":
125
+ combined_scores = average(train_scores)
126
+ elif ensemble_method == "MOA":
127
+ combined_scores = moa(train_scores)
128
+ else:
129
+ combined_scores = aom(train_scores)
130
+
131
+ threshold = np.percentile(combined_scores, 100*(1-contamination))
132
+ predictions = (combined_scores > threshold).astype(int)
133
+
134
+ # Performance metrics
135
+ if uploaded_file is None: # Use synthetic ground truth
136
+ y_true = np.zeros(n_samples)
137
+ y_true[anomaly_idx] = 1
138
+ precision = precision_score(y_true, predictions)
139
+ recall = recall_score(y_true, predictions)
140
+ else:
141
+ precision = recall = "N/A (No ground truth)"
142
+
143
+ metrics_df = pd.DataFrame({
144
+ "Model": models + ["Ensemble"],
145
+ "Precision": list([m.decision_scores_.mean() for m in selected_models]) + [precision],
146
+ "Recall": list([m.decision_scores_.std() for m in selected_models]) + [recall]
147
+ })
148
+
149
+ # Display results
150
+ st.subheader("Detection Results")
151
+ col1, col2 = st.columns(2)
152
+ with col1:
153
+ st.metric("Total Anomalies", sum(predictions))
154
+ st.metric("Anomaly Ratio", f"{sum(predictions)/len(data):.2%}")
155
+ with col2:
156
+ st.metric("Ensemble Precision", f"{precision:.2%}" if isinstance(precision, float) else precision)
157
+ st.metric("Ensemble Recall", f"{recall:.2%}" if isinstance(recall, float) else recall)
158
+
159
+ # Visualization
160
+ st.subheader("Data Visualization")
161
+
162
+ tab1, tab2 = st.tabs(["2D Projection", "3D Projection"])
163
+ with tab1:
164
+ pca = PCA(n_components=2)
165
+ viz_data = pca.fit_transform(X_norm)
166
+ plt.figure(figsize=(10, 6))
167
+ plt.scatter(viz_data[predictions==0, 0], viz_data[predictions==0, 1],
168
+ c='blue', label='Normal', alpha=0.6)
169
+ plt.scatter(viz_data[predictions==1, 0], viz_data[predictions==1, 1],
170
+ c='red', marker='x', label='Anomaly')
171
+ plt.xlabel("Principal Component 1")
172
+ plt.ylabel("Principal Component 2")
173
+ plt.title("PCA Projection of Network Data")
174
+ plt.legend()
175
+ st.pyplot()
176
+
177
+ with tab2:
178
+ st.pyplot(plot_3d_projections(X_norm, predictions))
179
+
180
+ # Generate report
181
+ st.subheader("Analysis Report")
182
+ report = generate_report(data[numeric_cols], predictions, models, metrics_df)
183
+ st.code(report, language='text')
184
+
185
+ # Report download
186
+ st.download_button(
187
+ label="Download Full Report",
188
+ data=report,
189
+ file_name=f"network_anomaly_report_{datetime.now().strftime('%Y%m%d')}.txt",
190
+ mime="text/plain"
191
+ )
192
 
193
  if __name__ == "__main__":
194
+ main()