sunbal7 commited on
Commit
c6655cf
Β·
verified Β·
1 Parent(s): c3de3ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -205
app.py CHANGED
@@ -1,214 +1,196 @@
1
  import streamlit as st
2
  import pandas as pd
3
  import numpy as np
4
- import matplotlib.pyplot as plt
5
- from pyod.models.iforest import IForest
6
- from pyod.models.lof import LOF
7
- from pyod.models.ocsvm import OCSVM
8
- from pyod.models.combination import aom, moa, average
9
- from pyod.utils.utility import standardizer
10
- from sklearn.decomposition import PCA
11
- from sklearn.metrics import precision_score, recall_score
12
- from datetime import datetime
13
-
14
- def generate_report(data, predictions, model_names, metrics):
15
- # Create markdown table manually
16
- metrics_table = "\n".join([
17
- "| Model | Precision | Recall |",
18
- "|-------|-----------|--------|"
19
- ] + [
20
- f"| {row['Model']} | {row['Precision']} | {row['Recall']} |"
21
- for _, row in metrics.iterrows()
22
- ])
23
-
24
- report = f"""
25
- Network Anomaly Detection Report
26
- Generated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
27
- -----------------------------------------------
28
- Total Data Points: {len(data)}
29
- Features Analyzed: {', '.join(data.columns)}
30
-
31
- Detection Results:
32
- - Total Anomalies Detected: {sum(predictions)}
33
- - Anomaly Percentage: {sum(predictions)/len(data):.2%}
34
-
35
- Model Performance:
36
- {metrics_table}
37
-
38
- Conclusion:
39
- The system detected {sum(predictions)} potential anomalies using ensemble of {len(model_names)} models.
40
- Recommended actions: Investigate flagged points, check network equipment logs, and verify traffic patterns.
41
- """
42
- return report
43
-
44
- def plot_3d_projections(data, predictions):
45
- fig = plt.figure(figsize=(10, 7))
46
- ax = fig.add_subplot(111, projection='3d')
47
- pca = PCA(n_components=3)
48
- projections = pca.fit_transform(data)
49
-
50
- normal = projections[predictions == 0]
51
- anomalies = projections[predictions == 1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
- ax.scatter(normal[:,0], normal[:,1], normal[:,2], c='b', label='Normal')
54
- ax.scatter(anomalies[:,0], anomalies[:,1], anomalies[:,2], c='r', marker='x', label='Anomaly')
 
 
 
 
 
 
 
 
55
 
56
- ax.set_xlabel('PC1')
57
- ax.set_ylabel('PC2')
58
- ax.set_zlabel('PC3')
59
- plt.title('3D PCA Projection of Network Data')
60
- plt.legend()
61
- return fig
62
-
63
- def main():
64
- st.title("πŸ›œ AI Network Anomaly Detection with Multi-Model Ensemble")
65
 
66
- # Sidebar configuration
67
- st.sidebar.header("Model Configuration")
68
- models = st.sidebar.multiselect(
69
- "Select Detection Models",
70
- ["Isolation Forest", "Local Outlier Factor", "One-Class SVM"],
71
- default=["Isolation Forest", "Local Outlier Factor"]
72
- )
73
 
74
- contamination = st.sidebar.slider("Expected Anomaly Ratio", 0.01, 0.5, 0.1)
75
- ensemble_method = st.sidebar.selectbox("Ensemble Method", ["Average", "MOA", "AOM"])
76
-
77
- # Data handling
78
- uploaded_file = st.file_uploader("Upload network data (CSV)", type=["csv"])
 
 
 
 
 
 
79
 
80
- if uploaded_file:
81
- try:
82
- data = pd.read_csv(uploaded_file)
83
- st.success("Uploaded data loaded successfully!")
84
- except Exception as e:
85
- st.error(f"Error reading file: {str(e)}")
86
- return
87
- else:
88
- # Generate synthetic network data
89
- np.random.seed(42)
90
- n_samples = 500
91
- data = pd.DataFrame({
92
- "traffic": np.random.normal(100, 15, n_samples),
93
- "latency": np.random.normal(50, 8, n_samples),
94
- "packet_loss": np.random.normal(0.5, 0.2, n_samples),
95
- "error_rate": np.random.normal(0.1, 0.05, n_samples)
96
- })
97
- # Inject anomalies
98
- anomaly_idx = np.random.choice(n_samples, 50, replace=False)
99
- data.loc[anomaly_idx, 'traffic'] *= 2.5
100
- data.loc[anomaly_idx, 'latency'] += 100
101
- data.loc[anomaly_idx, 'packet_loss'] *= 4
102
- st.info("Using synthetic network data. Upload a CSV to use your own.")
103
-
104
- # Data preprocessing
105
- numeric_cols = data.select_dtypes(include=np.number).columns.tolist()
106
- if not numeric_cols:
107
- st.error("No numeric columns found for analysis!")
108
- return
109
-
110
- X = data[numeric_cols].values
111
- X_norm = standardizer(X)
112
-
113
- # Model initialization
114
- model_dict = {
115
- "Isolation Forest": IForest(contamination=contamination, n_jobs=-1),
116
- "Local Outlier Factor": LOF(contamination=contamination, n_jobs=-1),
117
- "One-Class SVM": OCSVM(contamination=contamination)
118
- }
119
-
120
- selected_models = [model_dict[m] for m in models if m in model_dict]
121
- if not selected_models:
122
- st.error("Please select at least one detection model!")
123
- return
124
-
125
- # Model training
126
- try:
127
- st.subheader("Model Training Progress")
128
- progress_bar = st.progress(0)
129
- train_scores = np.zeros([len(X), len(selected_models)])
130
-
131
- for i, model in enumerate(selected_models):
132
- model.fit(X_norm)
133
- train_scores[:, i] = model.decision_function(X_norm)
134
- progress_bar.progress((i+1)/len(selected_models))
135
- except Exception as e:
136
- st.error(f"Model training failed: {str(e)}")
137
- return
138
-
139
- # Ensemble prediction
140
- try:
141
- if ensemble_method == "Average":
142
- combined_scores = average(train_scores)
143
- elif ensemble_method == "MOA":
144
- combined_scores = moa(train_scores)
145
  else:
146
- combined_scores = aom(train_scores)
147
- except Exception as e:
148
- st.error(f"Ensemble method failed: {str(e)}")
149
- return
150
-
151
- threshold = np.percentile(combined_scores, 100*(1-contamination))
152
- predictions = (combined_scores > threshold).astype(int)
153
-
154
- # Performance metrics
155
- if uploaded_file is None:
156
- y_true = np.zeros(n_samples)
157
- y_true[anomaly_idx] = 1
158
- precision = precision_score(y_true, predictions)
159
- recall = recall_score(y_true, predictions)
160
- else:
161
- precision = recall = "N/A (No ground truth)"
162
-
163
- metrics_df = pd.DataFrame({
164
- "Model": models + ["Ensemble"],
165
- "Precision": list([m.decision_scores_.mean() for m in selected_models]) + [precision],
166
- "Recall": list([m.decision_scores_.std() for m in selected_models]) + [recall]
167
- })
168
-
169
- # Display results
170
- st.subheader("Detection Results")
171
- col1, col2 = st.columns(2)
172
- with col1:
173
- st.metric("Total Anomalies", sum(predictions))
174
- st.metric("Anomaly Ratio", f"{sum(predictions)/len(data):.2%}")
175
- with col2:
176
- st.metric("Ensemble Precision", f"{precision:.2%}" if isinstance(precision, float) else precision)
177
- st.metric("Ensemble Recall", f"{recall:.2%}" if isinstance(recall, float) else recall)
178
-
179
- # Visualization
180
- st.subheader("Data Visualization")
181
- tab1, tab2 = st.tabs(["2D Projection", "3D Projection"])
182
-
183
- with tab1:
184
- fig, ax = plt.subplots(figsize=(10, 6))
185
- pca = PCA(n_components=2)
186
- viz_data = pca.fit_transform(X_norm)
187
- ax.scatter(viz_data[predictions==0, 0], viz_data[predictions==0, 1],
188
- c='blue', label='Normal', alpha=0.6)
189
- ax.scatter(viz_data[predictions==1, 0], viz_data[predictions==1, 1],
190
- c='red', marker='x', label='Anomaly')
191
- ax.set_xlabel("Principal Component 1")
192
- ax.set_ylabel("Principal Component 2")
193
- ax.set_title("PCA Projection of Network Data")
194
- ax.legend()
195
- st.pyplot(fig)
196
-
197
- with tab2:
198
- st.pyplot(plot_3d_projections(X_norm, predictions))
199
-
200
- # Report generation
201
- st.subheader("Analysis Report")
202
- report = generate_report(data[numeric_cols], predictions, models, metrics_df)
203
- st.code(report, language='text')
204
-
205
- # Download report
206
- st.download_button(
207
- label="Download Full Report",
208
- data=report,
209
- file_name=f"network_anomaly_report_{datetime.now().strftime('%Y%m%d')}.txt",
210
- mime="text/plain"
211
- )
212
-
213
- if __name__ == "__main__":
214
- main()
 
1
  import streamlit as st
2
  import pandas as pd
3
  import numpy as np
4
+ import plotly.express as px
5
+ from sklearn.ensemble import IsolationForest
6
+ import io
7
+ from fpdf import FPDF
8
+ import requests
9
+ import PyPDF2
10
+ import tempfile
11
+ import os
12
+
13
+ st.set_page_config(page_title="πŸš€ WiFi Anomaly Detection", layout="wide")
14
+
15
+ # -------------------------------
16
+ # WiFi Anomaly Detection Overview
17
+ # -------------------------------
18
+ st.title("πŸš€ WiFi Anomaly Detection Overview")
19
+ st.markdown("""
20
+ **Detect anomalies in Public Wi-Fi Systems**:
21
+ Identify suspicious spikes that may indicate hacking attempts, ensuring proactive maintenance and reliable network performance.
22
+ """)
23
+ st.markdown("### How it Works:")
24
+ st.markdown("""
25
+ - **Data Collection:** Upload network logs in CSV, TXT, or PDF format.
26
+ - **Anomaly Detection:** Use AI algorithms to automatically spot unusual patterns.
27
+ - **Visualization:** Review data in 2D and 3D interactive charts.
28
+ - **Report Generation:** Download a comprehensive PDF report with summaries and visuals.
29
+ """)
30
+
31
+ # -------------------------------
32
+ # Sidebar: File Upload & Options
33
+ # -------------------------------
34
+ st.sidebar.header("πŸ“ Upload Data File")
35
+ uploaded_file = st.sidebar.file_uploader("Choose a file", type=["csv", "txt", "pdf"])
36
+ st.sidebar.markdown("---")
37
+ model_option = st.sidebar.radio("Select Anomaly Detection Model", ("Local Model", "Groq API"))
38
+
39
+ # -------------------------------
40
+ # Helper Functions
41
+ # -------------------------------
42
+
43
+ def load_data(uploaded_file):
44
+ file_type = uploaded_file.name.split('.')[-1].lower()
45
+ if file_type == 'csv':
46
+ try:
47
+ df = pd.read_csv(uploaded_file)
48
+ return df, "csv"
49
+ except Exception as e:
50
+ st.error("Error reading CSV file.")
51
+ return None, None
52
+ elif file_type == 'txt':
53
+ try:
54
+ # Try comma separated first; if not, try whitespace separation
55
+ try:
56
+ df = pd.read_csv(uploaded_file, sep=",")
57
+ except:
58
+ df = pd.read_csv(uploaded_file, sep="\s+")
59
+ return df, "txt"
60
+ except Exception as e:
61
+ st.error("Error reading TXT file.")
62
+ return None, None
63
+ elif file_type == 'pdf':
64
+ try:
65
+ pdf_reader = PyPDF2.PdfReader(uploaded_file)
66
+ text = ""
67
+ for page in pdf_reader.pages:
68
+ text += page.extract_text()
69
+ # For demonstration, create a DataFrame with one text column
70
+ df = pd.DataFrame({"text": [text]})
71
+ return df, "pdf"
72
+ except Exception as e:
73
+ st.error("Error reading PDF file.")
74
+ return None, None
75
+ else:
76
+ st.error("Unsupported file type.")
77
+ return None, None
78
+
79
+ def run_local_anomaly_detection(df):
80
+ # Use IsolationForest for numeric data anomaly detection.
81
+ numeric_cols = df.select_dtypes(include=[np.number]).columns
82
+ if len(numeric_cols) < 2:
83
+ st.warning("Not enough numeric columns for anomaly detection. (Need at least 2 numeric columns)")
84
+ return df
85
+ X = df[numeric_cols].fillna(0)
86
+ model = IsolationForest(contamination=0.1, random_state=42)
87
+ model.fit(X)
88
+ # Model returns -1 for anomalies, 1 for normal records
89
+ df['anomaly'] = model.predict(X)
90
+ df['anomaly_flag'] = df['anomaly'].apply(lambda x: "🚨 Anomaly" if x == -1 else "βœ… Normal")
91
+ return df
92
+
93
+ def call_groq_api(df):
94
+ # ----- Dummy Groq API integration -----
95
+ # In a real implementation, you would send your data via a POST request like:
96
+ # response = requests.post("https://api.groq.ai/detect", json=df.to_dict(orient="records"))
97
+ # and then process the JSON response.
98
+ # For demo purposes, we simply call the local model.
99
+ # ----------------------------------------
100
+ df = run_local_anomaly_detection(df)
101
+ return df
102
+
103
+ def generate_plots(df):
104
+ # Create 2D and 3D scatter plots based on the first numeric columns
105
+ numeric_cols = df.select_dtypes(include=[np.number]).columns
106
+ fig2d, fig3d = None, None
107
+ if len(numeric_cols) >= 2:
108
+ fig2d = px.scatter(df, x=numeric_cols[0], y=numeric_cols[1],
109
+ color='anomaly_flag',
110
+ title="πŸ“ˆ 2D Anomaly Detection Plot")
111
+ if len(numeric_cols) >= 3:
112
+ fig3d = px.scatter_3d(df, x=numeric_cols[0], y=numeric_cols[1], z=numeric_cols[2],
113
+ color='anomaly_flag',
114
+ title="πŸ“Š 3D Anomaly Detection Plot")
115
+ return fig2d, fig3d
116
+
117
+ def generate_pdf_report(summary_text, fig2d, fig3d):
118
+ pdf = FPDF()
119
+ pdf.add_page()
120
+ pdf.set_font("Arial", 'B', 16)
121
+ pdf.cell(0, 10, "WiFi Anomaly Detection Report", ln=True)
122
+ pdf.ln(10)
123
+ pdf.set_font("Arial", size=12)
124
+ pdf.multi_cell(0, 10, summary_text)
125
+ pdf.ln(10)
126
 
127
+ # Save figures as temporary image files using Kaleido (Plotly's image export engine)
128
+ image_files = []
129
+ if fig2d is not None:
130
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmpfile:
131
+ fig2d.write_image(tmpfile.name)
132
+ image_files.append(tmpfile.name)
133
+ if fig3d is not None:
134
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmpfile:
135
+ fig3d.write_image(tmpfile.name)
136
+ image_files.append(tmpfile.name)
137
 
138
+ # Add each image to the PDF
139
+ for image in image_files:
140
+ pdf.image(image, w=pdf.w - 40)
141
+ pdf.ln(10)
 
 
 
 
 
142
 
143
+ # Clean up temporary image files
144
+ for image in image_files:
145
+ os.remove(image)
 
 
 
 
146
 
147
+ pdf_output = io.BytesIO()
148
+ pdf.output(pdf_output)
149
+ pdf_data = pdf_output.getvalue()
150
+ pdf_output.close()
151
+ return pdf_data
152
+
153
+ # -------------------------------
154
+ # Main Workflow
155
+ # -------------------------------
156
+ if uploaded_file is not None:
157
+ df, file_type = load_data(uploaded_file)
158
 
159
+ if df is not None:
160
+ if file_type == "pdf":
161
+ st.subheader("πŸ“„ Extracted Text from PDF:")
162
+ st.text_area("PDF Content", df["text"][0], height=300)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  else:
164
+ st.subheader("πŸ“Š Data Preview:")
165
+ st.dataframe(df.head())
166
+
167
+ if st.button("▢️ Check Data Visualization & Summary"):
168
+ if file_type in ["csv", "txt"]:
169
+ # Run the selected anomaly detection method
170
+ if model_option == "Local Model":
171
+ df_result = run_local_anomaly_detection(df)
172
+ else:
173
+ df_result = call_groq_api(df)
174
+
175
+ st.subheader("πŸ“ Anomaly Detection Summary:")
176
+ anomaly_count = (df_result['anomaly'] == -1).sum()
177
+ total_count = df_result.shape[0]
178
+ summary_text = f"Total records: {total_count}\nDetected anomalies: {anomaly_count}"
179
+ st.text(summary_text)
180
+ st.dataframe(df_result.head())
181
+
182
+ fig2d, fig3d = generate_plots(df_result)
183
+ if fig2d:
184
+ st.plotly_chart(fig2d, use_container_width=True)
185
+ if fig3d:
186
+ st.plotly_chart(fig3d, use_container_width=True)
187
+
188
+ if st.button("⬇️ Download Report as PDF"):
189
+ pdf_data = generate_pdf_report(summary_text, fig2d, fig3d)
190
+ st.download_button("Download PDF", data=pdf_data,
191
+ file_name="wifi_anomaly_report.pdf",
192
+ mime="application/pdf")
193
+ else:
194
+ st.info("Anomaly detection is available only for CSV/TXT data.")
195
+ else:
196
+ st.info("Please upload a CSV, TXT, or PDF file to begin. 😊")