Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,88 +4,191 @@ import numpy as np
|
|
4 |
import matplotlib.pyplot as plt
|
5 |
from pyod.models.iforest import IForest
|
6 |
from pyod.models.lof import LOF
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
-
|
|
|
|
|
|
|
25 |
data = pd.read_csv(uploaded_file)
|
26 |
-
st.
|
27 |
-
st.dataframe(data.head())
|
28 |
else:
|
29 |
-
|
30 |
-
# Generate synthetic data with features like traffic, latency, and packet_loss
|
31 |
np.random.seed(42)
|
32 |
-
n_samples =
|
33 |
-
traffic = np.random.normal(100, 10, n_samples)
|
34 |
-
latency = np.random.normal(50, 5, n_samples)
|
35 |
-
packet_loss = np.random.normal(0.5, 0.1, n_samples)
|
36 |
-
# Introduce anomalies by modifying a subset of data points
|
37 |
-
anomaly_indices = np.random.choice(n_samples, size=20, replace=False)
|
38 |
-
traffic[anomaly_indices] *= 1.5
|
39 |
-
latency[anomaly_indices] *= 2
|
40 |
-
packet_loss[anomaly_indices] *= 5
|
41 |
-
|
42 |
data = pd.DataFrame({
|
43 |
-
"traffic":
|
44 |
-
"latency":
|
45 |
-
"packet_loss":
|
|
|
46 |
})
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
49 |
|
50 |
-
#
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
return
|
55 |
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
model
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
if __name__ == "__main__":
|
91 |
-
main()
|
|
|
4 |
import matplotlib.pyplot as plt
|
5 |
from pyod.models.iforest import IForest
|
6 |
from pyod.models.lof import LOF
|
7 |
+
from pyod.models.ocsvm import OCSVM
|
8 |
+
from pyod.models.combination import aom, moa, average
|
9 |
+
from pyod.utils.utility import standardizer
|
10 |
+
from sklearn.decomposition import PCA
|
11 |
+
from sklearn.metrics import precision_score, recall_score
|
12 |
+
import base64
|
13 |
+
from datetime import datetime
|
14 |
|
15 |
+
# Configuration
|
16 |
+
st.set_option('deprecation.showPyplotGlobalUse', False)
|
17 |
+
|
18 |
+
def generate_report(data, predictions, model_names, metrics):
|
19 |
+
report = f"""
|
20 |
+
Network Anomaly Detection Report
|
21 |
+
Generated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
|
22 |
+
-----------------------------------------------
|
23 |
+
Total Data Points: {len(data)}
|
24 |
+
Features Analyzed: {', '.join(data.columns)}
|
25 |
+
|
26 |
+
Detection Results:
|
27 |
+
- Total Anomalies Detected: {sum(predictions)}
|
28 |
+
- Anomaly Percentage: {sum(predictions)/len(data):.2%}
|
29 |
+
|
30 |
+
Model Performance:
|
31 |
+
{metrics.to_markdown()}
|
32 |
+
|
33 |
+
Conclusion:
|
34 |
+
The system detected {sum(predictions)} potential anomalies using ensemble of {len(model_names)} models.
|
35 |
+
Recommended actions: Investigate flagged points, check network equipment logs, and verify traffic patterns.
|
36 |
+
"""
|
37 |
+
return report
|
38 |
|
39 |
+
def plot_3d_projections(data, predictions):
|
40 |
+
pca = PCA(n_components=3)
|
41 |
+
projections = pca.fit_transform(data)
|
42 |
+
|
43 |
+
fig = plt.figure(figsize=(10, 7))
|
44 |
+
ax = fig.add_subplot(111, projection='3d')
|
45 |
+
|
46 |
+
normal = projections[predictions == 0]
|
47 |
+
anomalies = projections[predictions == 1]
|
48 |
+
|
49 |
+
ax.scatter(normal[:,0], normal[:,1], normal[:,2], c='b', label='Normal')
|
50 |
+
ax.scatter(anomalies[:,0], anomalies[:,1], anomalies[:,2], c='r', marker='x', label='Anomaly')
|
51 |
+
|
52 |
+
ax.set_xlabel('PC1')
|
53 |
+
ax.set_ylabel('PC2')
|
54 |
+
ax.set_zlabel('PC3')
|
55 |
+
plt.title('3D PCA Projection of Network Data')
|
56 |
+
plt.legend()
|
57 |
+
return fig
|
58 |
|
59 |
+
def main():
|
60 |
+
st.title("🛜 AI Network Anomaly Detection with Multi-Model Ensemble")
|
61 |
+
|
62 |
+
# Sidebar configuration
|
63 |
+
st.sidebar.header("Model Configuration")
|
64 |
+
models = st.sidebar.multiselect(
|
65 |
+
"Select Detection Models",
|
66 |
+
["Isolation Forest", "Local Outlier Factor", "One-Class SVM"],
|
67 |
+
default=["Isolation Forest", "Local Outlier Factor"]
|
68 |
+
)
|
69 |
+
|
70 |
+
contamination = st.sidebar.slider("Expected Anomaly Ratio", 0.01, 0.5, 0.1)
|
71 |
+
ensemble_method = st.sidebar.selectbox("Ensemble Method", ["Average", "MOA", "AOM"])
|
72 |
|
73 |
+
# Data input section
|
74 |
+
uploaded_file = st.file_uploader("Upload network data (CSV)", type=["csv"])
|
75 |
+
|
76 |
+
if uploaded_file:
|
77 |
data = pd.read_csv(uploaded_file)
|
78 |
+
st.success("Uploaded data loaded successfully!")
|
|
|
79 |
else:
|
80 |
+
# Generate synthetic network data
|
|
|
81 |
np.random.seed(42)
|
82 |
+
n_samples = 500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
data = pd.DataFrame({
|
84 |
+
"traffic": np.random.normal(100, 15, n_samples),
|
85 |
+
"latency": np.random.normal(50, 8, n_samples),
|
86 |
+
"packet_loss": np.random.normal(0.5, 0.2, n_samples),
|
87 |
+
"error_rate": np.random.normal(0.1, 0.05, n_samples)
|
88 |
})
|
89 |
+
# Inject anomalies
|
90 |
+
anomaly_idx = np.random.choice(n_samples, 50, replace=False)
|
91 |
+
data.loc[anomaly_idx, 'traffic'] *= 2.5
|
92 |
+
data.loc[anomaly_idx, 'latency'] += 100
|
93 |
+
data.loc[anomaly_idx, 'packet_loss'] *= 4
|
94 |
+
st.info("Using synthetic network data. Upload a CSV to use your own.")
|
95 |
|
96 |
+
# Data preprocessing
|
97 |
+
numeric_cols = data.select_dtypes(include=np.number).columns.tolist()
|
98 |
+
X = data[numeric_cols].values
|
99 |
+
X_norm = standardizer(X)
|
100 |
+
|
101 |
+
# Model initialization
|
102 |
+
model_dict = {
|
103 |
+
"Isolation Forest": IForest(contamination=contamination, n_jobs=-1),
|
104 |
+
"Local Outlier Factor": LOF(contamination=contamination, n_jobs=-1),
|
105 |
+
"One-Class SVM": OCSVM(contamination=contamination)
|
106 |
+
}
|
107 |
+
|
108 |
+
selected_models = [model_dict[m] for m in models]
|
109 |
+
if not selected_models:
|
110 |
+
st.error("Please select at least one detection model!")
|
111 |
return
|
112 |
|
113 |
+
# Training and prediction
|
114 |
+
st.subheader("Model Training Progress")
|
115 |
+
progress_bar = st.progress(0)
|
116 |
+
train_scores = np.zeros([len(X), len(selected_models)])
|
117 |
+
|
118 |
+
for i, model in enumerate(selected_models):
|
119 |
+
model.fit(X_norm)
|
120 |
+
train_scores[:, i] = model.decision_function(X_norm)
|
121 |
+
progress_bar.progress((i+1)/len(selected_models))
|
122 |
+
|
123 |
+
# Ensemble prediction
|
124 |
+
if ensemble_method == "Average":
|
125 |
+
combined_scores = average(train_scores)
|
126 |
+
elif ensemble_method == "MOA":
|
127 |
+
combined_scores = moa(train_scores)
|
128 |
+
else:
|
129 |
+
combined_scores = aom(train_scores)
|
130 |
+
|
131 |
+
threshold = np.percentile(combined_scores, 100*(1-contamination))
|
132 |
+
predictions = (combined_scores > threshold).astype(int)
|
133 |
+
|
134 |
+
# Performance metrics
|
135 |
+
if uploaded_file is None: # Use synthetic ground truth
|
136 |
+
y_true = np.zeros(n_samples)
|
137 |
+
y_true[anomaly_idx] = 1
|
138 |
+
precision = precision_score(y_true, predictions)
|
139 |
+
recall = recall_score(y_true, predictions)
|
140 |
+
else:
|
141 |
+
precision = recall = "N/A (No ground truth)"
|
142 |
+
|
143 |
+
metrics_df = pd.DataFrame({
|
144 |
+
"Model": models + ["Ensemble"],
|
145 |
+
"Precision": list([m.decision_scores_.mean() for m in selected_models]) + [precision],
|
146 |
+
"Recall": list([m.decision_scores_.std() for m in selected_models]) + [recall]
|
147 |
+
})
|
148 |
+
|
149 |
+
# Display results
|
150 |
+
st.subheader("Detection Results")
|
151 |
+
col1, col2 = st.columns(2)
|
152 |
+
with col1:
|
153 |
+
st.metric("Total Anomalies", sum(predictions))
|
154 |
+
st.metric("Anomaly Ratio", f"{sum(predictions)/len(data):.2%}")
|
155 |
+
with col2:
|
156 |
+
st.metric("Ensemble Precision", f"{precision:.2%}" if isinstance(precision, float) else precision)
|
157 |
+
st.metric("Ensemble Recall", f"{recall:.2%}" if isinstance(recall, float) else recall)
|
158 |
+
|
159 |
+
# Visualization
|
160 |
+
st.subheader("Data Visualization")
|
161 |
+
|
162 |
+
tab1, tab2 = st.tabs(["2D Projection", "3D Projection"])
|
163 |
+
with tab1:
|
164 |
+
pca = PCA(n_components=2)
|
165 |
+
viz_data = pca.fit_transform(X_norm)
|
166 |
+
plt.figure(figsize=(10, 6))
|
167 |
+
plt.scatter(viz_data[predictions==0, 0], viz_data[predictions==0, 1],
|
168 |
+
c='blue', label='Normal', alpha=0.6)
|
169 |
+
plt.scatter(viz_data[predictions==1, 0], viz_data[predictions==1, 1],
|
170 |
+
c='red', marker='x', label='Anomaly')
|
171 |
+
plt.xlabel("Principal Component 1")
|
172 |
+
plt.ylabel("Principal Component 2")
|
173 |
+
plt.title("PCA Projection of Network Data")
|
174 |
+
plt.legend()
|
175 |
+
st.pyplot()
|
176 |
+
|
177 |
+
with tab2:
|
178 |
+
st.pyplot(plot_3d_projections(X_norm, predictions))
|
179 |
+
|
180 |
+
# Generate report
|
181 |
+
st.subheader("Analysis Report")
|
182 |
+
report = generate_report(data[numeric_cols], predictions, models, metrics_df)
|
183 |
+
st.code(report, language='text')
|
184 |
+
|
185 |
+
# Report download
|
186 |
+
st.download_button(
|
187 |
+
label="Download Full Report",
|
188 |
+
data=report,
|
189 |
+
file_name=f"network_anomaly_report_{datetime.now().strftime('%Y%m%d')}.txt",
|
190 |
+
mime="text/plain"
|
191 |
+
)
|
192 |
|
193 |
if __name__ == "__main__":
|
194 |
+
main()
|