Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,214 +1,196 @@
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
4 |
-
import
|
5 |
-
from
|
6 |
-
|
7 |
-
from
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
plt.legend()
|
61 |
-
return fig
|
62 |
-
|
63 |
-
def main():
|
64 |
-
st.title("π AI Network Anomaly Detection with Multi-Model Ensemble")
|
65 |
|
66 |
-
#
|
67 |
-
|
68 |
-
|
69 |
-
"Select Detection Models",
|
70 |
-
["Isolation Forest", "Local Outlier Factor", "One-Class SVM"],
|
71 |
-
default=["Isolation Forest", "Local Outlier Factor"]
|
72 |
-
)
|
73 |
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
-
if
|
81 |
-
|
82 |
-
|
83 |
-
st.
|
84 |
-
except Exception as e:
|
85 |
-
st.error(f"Error reading file: {str(e)}")
|
86 |
-
return
|
87 |
-
else:
|
88 |
-
# Generate synthetic network data
|
89 |
-
np.random.seed(42)
|
90 |
-
n_samples = 500
|
91 |
-
data = pd.DataFrame({
|
92 |
-
"traffic": np.random.normal(100, 15, n_samples),
|
93 |
-
"latency": np.random.normal(50, 8, n_samples),
|
94 |
-
"packet_loss": np.random.normal(0.5, 0.2, n_samples),
|
95 |
-
"error_rate": np.random.normal(0.1, 0.05, n_samples)
|
96 |
-
})
|
97 |
-
# Inject anomalies
|
98 |
-
anomaly_idx = np.random.choice(n_samples, 50, replace=False)
|
99 |
-
data.loc[anomaly_idx, 'traffic'] *= 2.5
|
100 |
-
data.loc[anomaly_idx, 'latency'] += 100
|
101 |
-
data.loc[anomaly_idx, 'packet_loss'] *= 4
|
102 |
-
st.info("Using synthetic network data. Upload a CSV to use your own.")
|
103 |
-
|
104 |
-
# Data preprocessing
|
105 |
-
numeric_cols = data.select_dtypes(include=np.number).columns.tolist()
|
106 |
-
if not numeric_cols:
|
107 |
-
st.error("No numeric columns found for analysis!")
|
108 |
-
return
|
109 |
-
|
110 |
-
X = data[numeric_cols].values
|
111 |
-
X_norm = standardizer(X)
|
112 |
-
|
113 |
-
# Model initialization
|
114 |
-
model_dict = {
|
115 |
-
"Isolation Forest": IForest(contamination=contamination, n_jobs=-1),
|
116 |
-
"Local Outlier Factor": LOF(contamination=contamination, n_jobs=-1),
|
117 |
-
"One-Class SVM": OCSVM(contamination=contamination)
|
118 |
-
}
|
119 |
-
|
120 |
-
selected_models = [model_dict[m] for m in models if m in model_dict]
|
121 |
-
if not selected_models:
|
122 |
-
st.error("Please select at least one detection model!")
|
123 |
-
return
|
124 |
-
|
125 |
-
# Model training
|
126 |
-
try:
|
127 |
-
st.subheader("Model Training Progress")
|
128 |
-
progress_bar = st.progress(0)
|
129 |
-
train_scores = np.zeros([len(X), len(selected_models)])
|
130 |
-
|
131 |
-
for i, model in enumerate(selected_models):
|
132 |
-
model.fit(X_norm)
|
133 |
-
train_scores[:, i] = model.decision_function(X_norm)
|
134 |
-
progress_bar.progress((i+1)/len(selected_models))
|
135 |
-
except Exception as e:
|
136 |
-
st.error(f"Model training failed: {str(e)}")
|
137 |
-
return
|
138 |
-
|
139 |
-
# Ensemble prediction
|
140 |
-
try:
|
141 |
-
if ensemble_method == "Average":
|
142 |
-
combined_scores = average(train_scores)
|
143 |
-
elif ensemble_method == "MOA":
|
144 |
-
combined_scores = moa(train_scores)
|
145 |
else:
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
# Visualization
|
180 |
-
st.subheader("Data Visualization")
|
181 |
-
tab1, tab2 = st.tabs(["2D Projection", "3D Projection"])
|
182 |
-
|
183 |
-
with tab1:
|
184 |
-
fig, ax = plt.subplots(figsize=(10, 6))
|
185 |
-
pca = PCA(n_components=2)
|
186 |
-
viz_data = pca.fit_transform(X_norm)
|
187 |
-
ax.scatter(viz_data[predictions==0, 0], viz_data[predictions==0, 1],
|
188 |
-
c='blue', label='Normal', alpha=0.6)
|
189 |
-
ax.scatter(viz_data[predictions==1, 0], viz_data[predictions==1, 1],
|
190 |
-
c='red', marker='x', label='Anomaly')
|
191 |
-
ax.set_xlabel("Principal Component 1")
|
192 |
-
ax.set_ylabel("Principal Component 2")
|
193 |
-
ax.set_title("PCA Projection of Network Data")
|
194 |
-
ax.legend()
|
195 |
-
st.pyplot(fig)
|
196 |
-
|
197 |
-
with tab2:
|
198 |
-
st.pyplot(plot_3d_projections(X_norm, predictions))
|
199 |
-
|
200 |
-
# Report generation
|
201 |
-
st.subheader("Analysis Report")
|
202 |
-
report = generate_report(data[numeric_cols], predictions, models, metrics_df)
|
203 |
-
st.code(report, language='text')
|
204 |
-
|
205 |
-
# Download report
|
206 |
-
st.download_button(
|
207 |
-
label="Download Full Report",
|
208 |
-
data=report,
|
209 |
-
file_name=f"network_anomaly_report_{datetime.now().strftime('%Y%m%d')}.txt",
|
210 |
-
mime="text/plain"
|
211 |
-
)
|
212 |
-
|
213 |
-
if __name__ == "__main__":
|
214 |
-
main()
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
4 |
+
import plotly.express as px
|
5 |
+
from sklearn.ensemble import IsolationForest
|
6 |
+
import io
|
7 |
+
from fpdf import FPDF
|
8 |
+
import requests
|
9 |
+
import PyPDF2
|
10 |
+
import tempfile
|
11 |
+
import os
|
12 |
+
|
13 |
+
st.set_page_config(page_title="π WiFi Anomaly Detection", layout="wide")
|
14 |
+
|
15 |
+
# -------------------------------
|
16 |
+
# WiFi Anomaly Detection Overview
|
17 |
+
# -------------------------------
|
18 |
+
st.title("π WiFi Anomaly Detection Overview")
|
19 |
+
st.markdown("""
|
20 |
+
**Detect anomalies in Public Wi-Fi Systems**:
|
21 |
+
Identify suspicious spikes that may indicate hacking attempts, ensuring proactive maintenance and reliable network performance.
|
22 |
+
""")
|
23 |
+
st.markdown("### How it Works:")
|
24 |
+
st.markdown("""
|
25 |
+
- **Data Collection:** Upload network logs in CSV, TXT, or PDF format.
|
26 |
+
- **Anomaly Detection:** Use AI algorithms to automatically spot unusual patterns.
|
27 |
+
- **Visualization:** Review data in 2D and 3D interactive charts.
|
28 |
+
- **Report Generation:** Download a comprehensive PDF report with summaries and visuals.
|
29 |
+
""")
|
30 |
+
|
31 |
+
# -------------------------------
|
32 |
+
# Sidebar: File Upload & Options
|
33 |
+
# -------------------------------
|
34 |
+
st.sidebar.header("π Upload Data File")
|
35 |
+
uploaded_file = st.sidebar.file_uploader("Choose a file", type=["csv", "txt", "pdf"])
|
36 |
+
st.sidebar.markdown("---")
|
37 |
+
model_option = st.sidebar.radio("Select Anomaly Detection Model", ("Local Model", "Groq API"))
|
38 |
+
|
39 |
+
# -------------------------------
|
40 |
+
# Helper Functions
|
41 |
+
# -------------------------------
|
42 |
+
|
43 |
+
def load_data(uploaded_file):
|
44 |
+
file_type = uploaded_file.name.split('.')[-1].lower()
|
45 |
+
if file_type == 'csv':
|
46 |
+
try:
|
47 |
+
df = pd.read_csv(uploaded_file)
|
48 |
+
return df, "csv"
|
49 |
+
except Exception as e:
|
50 |
+
st.error("Error reading CSV file.")
|
51 |
+
return None, None
|
52 |
+
elif file_type == 'txt':
|
53 |
+
try:
|
54 |
+
# Try comma separated first; if not, try whitespace separation
|
55 |
+
try:
|
56 |
+
df = pd.read_csv(uploaded_file, sep=",")
|
57 |
+
except:
|
58 |
+
df = pd.read_csv(uploaded_file, sep="\s+")
|
59 |
+
return df, "txt"
|
60 |
+
except Exception as e:
|
61 |
+
st.error("Error reading TXT file.")
|
62 |
+
return None, None
|
63 |
+
elif file_type == 'pdf':
|
64 |
+
try:
|
65 |
+
pdf_reader = PyPDF2.PdfReader(uploaded_file)
|
66 |
+
text = ""
|
67 |
+
for page in pdf_reader.pages:
|
68 |
+
text += page.extract_text()
|
69 |
+
# For demonstration, create a DataFrame with one text column
|
70 |
+
df = pd.DataFrame({"text": [text]})
|
71 |
+
return df, "pdf"
|
72 |
+
except Exception as e:
|
73 |
+
st.error("Error reading PDF file.")
|
74 |
+
return None, None
|
75 |
+
else:
|
76 |
+
st.error("Unsupported file type.")
|
77 |
+
return None, None
|
78 |
+
|
79 |
+
def run_local_anomaly_detection(df):
|
80 |
+
# Use IsolationForest for numeric data anomaly detection.
|
81 |
+
numeric_cols = df.select_dtypes(include=[np.number]).columns
|
82 |
+
if len(numeric_cols) < 2:
|
83 |
+
st.warning("Not enough numeric columns for anomaly detection. (Need at least 2 numeric columns)")
|
84 |
+
return df
|
85 |
+
X = df[numeric_cols].fillna(0)
|
86 |
+
model = IsolationForest(contamination=0.1, random_state=42)
|
87 |
+
model.fit(X)
|
88 |
+
# Model returns -1 for anomalies, 1 for normal records
|
89 |
+
df['anomaly'] = model.predict(X)
|
90 |
+
df['anomaly_flag'] = df['anomaly'].apply(lambda x: "π¨ Anomaly" if x == -1 else "β
Normal")
|
91 |
+
return df
|
92 |
+
|
93 |
+
def call_groq_api(df):
|
94 |
+
# ----- Dummy Groq API integration -----
|
95 |
+
# In a real implementation, you would send your data via a POST request like:
|
96 |
+
# response = requests.post("https://api.groq.ai/detect", json=df.to_dict(orient="records"))
|
97 |
+
# and then process the JSON response.
|
98 |
+
# For demo purposes, we simply call the local model.
|
99 |
+
# ----------------------------------------
|
100 |
+
df = run_local_anomaly_detection(df)
|
101 |
+
return df
|
102 |
+
|
103 |
+
def generate_plots(df):
|
104 |
+
# Create 2D and 3D scatter plots based on the first numeric columns
|
105 |
+
numeric_cols = df.select_dtypes(include=[np.number]).columns
|
106 |
+
fig2d, fig3d = None, None
|
107 |
+
if len(numeric_cols) >= 2:
|
108 |
+
fig2d = px.scatter(df, x=numeric_cols[0], y=numeric_cols[1],
|
109 |
+
color='anomaly_flag',
|
110 |
+
title="π 2D Anomaly Detection Plot")
|
111 |
+
if len(numeric_cols) >= 3:
|
112 |
+
fig3d = px.scatter_3d(df, x=numeric_cols[0], y=numeric_cols[1], z=numeric_cols[2],
|
113 |
+
color='anomaly_flag',
|
114 |
+
title="π 3D Anomaly Detection Plot")
|
115 |
+
return fig2d, fig3d
|
116 |
+
|
117 |
+
def generate_pdf_report(summary_text, fig2d, fig3d):
|
118 |
+
pdf = FPDF()
|
119 |
+
pdf.add_page()
|
120 |
+
pdf.set_font("Arial", 'B', 16)
|
121 |
+
pdf.cell(0, 10, "WiFi Anomaly Detection Report", ln=True)
|
122 |
+
pdf.ln(10)
|
123 |
+
pdf.set_font("Arial", size=12)
|
124 |
+
pdf.multi_cell(0, 10, summary_text)
|
125 |
+
pdf.ln(10)
|
126 |
|
127 |
+
# Save figures as temporary image files using Kaleido (Plotly's image export engine)
|
128 |
+
image_files = []
|
129 |
+
if fig2d is not None:
|
130 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmpfile:
|
131 |
+
fig2d.write_image(tmpfile.name)
|
132 |
+
image_files.append(tmpfile.name)
|
133 |
+
if fig3d is not None:
|
134 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmpfile:
|
135 |
+
fig3d.write_image(tmpfile.name)
|
136 |
+
image_files.append(tmpfile.name)
|
137 |
|
138 |
+
# Add each image to the PDF
|
139 |
+
for image in image_files:
|
140 |
+
pdf.image(image, w=pdf.w - 40)
|
141 |
+
pdf.ln(10)
|
|
|
|
|
|
|
|
|
|
|
142 |
|
143 |
+
# Clean up temporary image files
|
144 |
+
for image in image_files:
|
145 |
+
os.remove(image)
|
|
|
|
|
|
|
|
|
146 |
|
147 |
+
pdf_output = io.BytesIO()
|
148 |
+
pdf.output(pdf_output)
|
149 |
+
pdf_data = pdf_output.getvalue()
|
150 |
+
pdf_output.close()
|
151 |
+
return pdf_data
|
152 |
+
|
153 |
+
# -------------------------------
|
154 |
+
# Main Workflow
|
155 |
+
# -------------------------------
|
156 |
+
if uploaded_file is not None:
|
157 |
+
df, file_type = load_data(uploaded_file)
|
158 |
|
159 |
+
if df is not None:
|
160 |
+
if file_type == "pdf":
|
161 |
+
st.subheader("π Extracted Text from PDF:")
|
162 |
+
st.text_area("PDF Content", df["text"][0], height=300)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
else:
|
164 |
+
st.subheader("π Data Preview:")
|
165 |
+
st.dataframe(df.head())
|
166 |
+
|
167 |
+
if st.button("βΆοΈ Check Data Visualization & Summary"):
|
168 |
+
if file_type in ["csv", "txt"]:
|
169 |
+
# Run the selected anomaly detection method
|
170 |
+
if model_option == "Local Model":
|
171 |
+
df_result = run_local_anomaly_detection(df)
|
172 |
+
else:
|
173 |
+
df_result = call_groq_api(df)
|
174 |
+
|
175 |
+
st.subheader("π Anomaly Detection Summary:")
|
176 |
+
anomaly_count = (df_result['anomaly'] == -1).sum()
|
177 |
+
total_count = df_result.shape[0]
|
178 |
+
summary_text = f"Total records: {total_count}\nDetected anomalies: {anomaly_count}"
|
179 |
+
st.text(summary_text)
|
180 |
+
st.dataframe(df_result.head())
|
181 |
+
|
182 |
+
fig2d, fig3d = generate_plots(df_result)
|
183 |
+
if fig2d:
|
184 |
+
st.plotly_chart(fig2d, use_container_width=True)
|
185 |
+
if fig3d:
|
186 |
+
st.plotly_chart(fig3d, use_container_width=True)
|
187 |
+
|
188 |
+
if st.button("β¬οΈ Download Report as PDF"):
|
189 |
+
pdf_data = generate_pdf_report(summary_text, fig2d, fig3d)
|
190 |
+
st.download_button("Download PDF", data=pdf_data,
|
191 |
+
file_name="wifi_anomaly_report.pdf",
|
192 |
+
mime="application/pdf")
|
193 |
+
else:
|
194 |
+
st.info("Anomaly detection is available only for CSV/TXT data.")
|
195 |
+
else:
|
196 |
+
st.info("Please upload a CSV, TXT, or PDF file to begin. π")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|