Spaces:

sunbal7
/

PublicWiFiAnomalyDetection

Sleeping

App Files Files Community

sunbal7 commited on Mar 1

Commit

c6655cf

verified ·

1 Parent(s): c3de3ec

Update app.py

Browse files

Files changed (1) hide show

app.py +187 -205

app.py CHANGED Viewed

@@ -1,214 +1,196 @@
 import streamlit as st
 import pandas as pd
 import numpy as np
-import matplotlib.pyplot as plt
-from pyod.models.iforest import IForest
-from pyod.models.lof import LOF
-from pyod.models.ocsvm import OCSVM
-from pyod.models.combination import aom, moa, average
-from pyod.utils.utility import standardizer
-from sklearn.decomposition import PCA
-from sklearn.metrics import precision_score, recall_score
-from datetime import datetime
-def generate_report(data, predictions, model_names, metrics):
-    # Create markdown table manually
-    metrics_table = "\n".join([
-        "| Model | Precision | Recall |",
-        "|-------|-----------|--------|"
-    ] + [
-        f"| {row['Model']} | {row['Precision']} | {row['Recall']} |"
-        for _, row in metrics.iterrows()
-    ])
-    report = f"""
-    Network Anomaly Detection Report
-    Generated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
-    -----------------------------------------------
-    Total Data Points: {len(data)}
-    Features Analyzed: {', '.join(data.columns)}
-    Detection Results:
-    - Total Anomalies Detected: {sum(predictions)}
-    - Anomaly Percentage: {sum(predictions)/len(data):.2%}
-    Model Performance:
-    {metrics_table}
-    Conclusion:
-    The system detected {sum(predictions)} potential anomalies using ensemble of {len(model_names)} models.
-    Recommended actions: Investigate flagged points, check network equipment logs, and verify traffic patterns.
-    """
-    return report
-def plot_3d_projections(data, predictions):
-    fig = plt.figure(figsize=(10, 7))
-    ax = fig.add_subplot(111, projection='3d')
-    pca = PCA(n_components=3)
-    projections = pca.fit_transform(data)
-    normal = projections[predictions == 0]
-    anomalies = projections[predictions == 1]
-    ax.scatter(normal[:,0], normal[:,1], normal[:,2], c='b', label='Normal')
-    ax.scatter(anomalies[:,0], anomalies[:,1], anomalies[:,2], c='r', marker='x', label='Anomaly')
-    ax.set_xlabel('PC1')
-    ax.set_ylabel('PC2')
-    ax.set_zlabel('PC3')
-    plt.title('3D PCA Projection of Network Data')
-    plt.legend()
-    return fig
-def main():
-    st.title("🛜 AI Network Anomaly Detection with Multi-Model Ensemble")
-    # Sidebar configuration
-    st.sidebar.header("Model Configuration")
-    models = st.sidebar.multiselect(
-        "Select Detection Models",
-        ["Isolation Forest", "Local Outlier Factor", "One-Class SVM"],
-        default=["Isolation Forest", "Local Outlier Factor"]
-    )
-    contamination = st.sidebar.slider("Expected Anomaly Ratio", 0.01, 0.5, 0.1)
-    ensemble_method = st.sidebar.selectbox("Ensemble Method", ["Average", "MOA", "AOM"])
-    # Data handling
-    uploaded_file = st.file_uploader("Upload network data (CSV)", type=["csv"])
-    if uploaded_file:
-        try:
-            data = pd.read_csv(uploaded_file)
-            st.success("Uploaded data loaded successfully!")
-        except Exception as e:
-            st.error(f"Error reading file: {str(e)}")
-            return
-    else:
-        # Generate synthetic network data
-        np.random.seed(42)
-        n_samples = 500
-        data = pd.DataFrame({
-            "traffic": np.random.normal(100, 15, n_samples),
-            "latency": np.random.normal(50, 8, n_samples),
-            "packet_loss": np.random.normal(0.5, 0.2, n_samples),
-            "error_rate": np.random.normal(0.1, 0.05, n_samples)
-        })
-        # Inject anomalies
-        anomaly_idx = np.random.choice(n_samples, 50, replace=False)
-        data.loc[anomaly_idx, 'traffic'] *= 2.5
-        data.loc[anomaly_idx, 'latency'] += 100
-        data.loc[anomaly_idx, 'packet_loss'] *= 4
-        st.info("Using synthetic network data. Upload a CSV to use your own.")
-    # Data preprocessing
-    numeric_cols = data.select_dtypes(include=np.number).columns.tolist()
-    if not numeric_cols:
-        st.error("No numeric columns found for analysis!")
-        return
-    X = data[numeric_cols].values
-    X_norm = standardizer(X)
-    # Model initialization
-    model_dict = {
-        "Isolation Forest": IForest(contamination=contamination, n_jobs=-1),
-        "Local Outlier Factor": LOF(contamination=contamination, n_jobs=-1),
-        "One-Class SVM": OCSVM(contamination=contamination)
-    }
-    selected_models = [model_dict[m] for m in models if m in model_dict]
-    if not selected_models:
-        st.error("Please select at least one detection model!")
-        return
-    # Model training
-    try:
-        st.subheader("Model Training Progress")
-        progress_bar = st.progress(0)
-        train_scores = np.zeros([len(X), len(selected_models)])
-        for i, model in enumerate(selected_models):
-            model.fit(X_norm)
-            train_scores[:, i] = model.decision_function(X_norm)
-            progress_bar.progress((i+1)/len(selected_models))
-    except Exception as e:
-        st.error(f"Model training failed: {str(e)}")
-        return
-    # Ensemble prediction
-    try:
-        if ensemble_method == "Average":
-            combined_scores = average(train_scores)
-        elif ensemble_method == "MOA":
-            combined_scores = moa(train_scores)
         else:
-            combined_scores = aom(train_scores)
-    except Exception as e:
-        st.error(f"Ensemble method failed: {str(e)}")
-        return
-    threshold = np.percentile(combined_scores, 100*(1-contamination))
-    predictions = (combined_scores > threshold).astype(int)
-    # Performance metrics
-    if uploaded_file is None:
-        y_true = np.zeros(n_samples)
-        y_true[anomaly_idx] = 1
-        precision = precision_score(y_true, predictions)
-        recall = recall_score(y_true, predictions)
-    else:
-        precision = recall = "N/A (No ground truth)"
-    metrics_df = pd.DataFrame({
-        "Model": models + ["Ensemble"],
-        "Precision": list([m.decision_scores_.mean() for m in selected_models]) + [precision],
-        "Recall": list([m.decision_scores_.std() for m in selected_models]) + [recall]
-    })
-    # Display results
-    st.subheader("Detection Results")
-    col1, col2 = st.columns(2)
-    with col1:
-        st.metric("Total Anomalies", sum(predictions))
-        st.metric("Anomaly Ratio", f"{sum(predictions)/len(data):.2%}")
-    with col2:
-        st.metric("Ensemble Precision", f"{precision:.2%}" if isinstance(precision, float) else precision)
-        st.metric("Ensemble Recall", f"{recall:.2%}" if isinstance(recall, float) else recall)
-    # Visualization
-    st.subheader("Data Visualization")
-    tab1, tab2 = st.tabs(["2D Projection", "3D Projection"])
-    with tab1:
-        fig, ax = plt.subplots(figsize=(10, 6))
-        pca = PCA(n_components=2)
-        viz_data = pca.fit_transform(X_norm)
-        ax.scatter(viz_data[predictions==0, 0], viz_data[predictions==0, 1],
-                    c='blue', label='Normal', alpha=0.6)
-        ax.scatter(viz_data[predictions==1, 0], viz_data[predictions==1, 1],
-                    c='red', marker='x', label='Anomaly')
-        ax.set_xlabel("Principal Component 1")
-        ax.set_ylabel("Principal Component 2")
-        ax.set_title("PCA Projection of Network Data")
-        ax.legend()
-        st.pyplot(fig)
-    with tab2:
-        st.pyplot(plot_3d_projections(X_norm, predictions))
-    # Report generation
-    st.subheader("Analysis Report")
-    report = generate_report(data[numeric_cols], predictions, models, metrics_df)
-    st.code(report, language='text')
-    # Download report
-    st.download_button(
-        label="Download Full Report",
-        data=report,
-        file_name=f"network_anomaly_report_{datetime.now().strftime('%Y%m%d')}.txt",
-        mime="text/plain"
-    )
-if __name__ == "__main__":
-    main()

 import streamlit as st
 import pandas as pd
 import numpy as np
+import plotly.express as px
+from sklearn.ensemble import IsolationForest
+import io
+from fpdf import FPDF
+import requests
+import PyPDF2
+import tempfile
+import os
+st.set_page_config(page_title="🚀 WiFi Anomaly Detection", layout="wide")
+# -------------------------------
+# WiFi Anomaly Detection Overview
+# -------------------------------
+st.title("🚀 WiFi Anomaly Detection Overview")
+st.markdown("""
+**Detect anomalies in Public Wi-Fi Systems**:
+Identify suspicious spikes that may indicate hacking attempts, ensuring proactive maintenance and reliable network performance.
+""")
+st.markdown("### How it Works:")
+st.markdown("""
+- **Data Collection:** Upload network logs in CSV, TXT, or PDF format.
+- **Anomaly Detection:** Use AI algorithms to automatically spot unusual patterns.
+- **Visualization:** Review data in 2D and 3D interactive charts.
+- **Report Generation:** Download a comprehensive PDF report with summaries and visuals.
+""")
+# -------------------------------
+# Sidebar: File Upload & Options
+# -------------------------------
+st.sidebar.header("📁 Upload Data File")
+uploaded_file = st.sidebar.file_uploader("Choose a file", type=["csv", "txt", "pdf"])
+st.sidebar.markdown("---")
+model_option = st.sidebar.radio("Select Anomaly Detection Model", ("Local Model", "Groq API"))
+# -------------------------------
+# Helper Functions
+# -------------------------------
+def load_data(uploaded_file):
+    file_type = uploaded_file.name.split('.')[-1].lower()
+    if file_type == 'csv':
+        try:
+            df = pd.read_csv(uploaded_file)
+            return df, "csv"
+        except Exception as e:
+            st.error("Error reading CSV file.")
+            return None, None
+    elif file_type == 'txt':
+        try:
+            # Try comma separated first; if not, try whitespace separation
+            try:
+                df = pd.read_csv(uploaded_file, sep=",")
+            except:
+                df = pd.read_csv(uploaded_file, sep="\s+")
+            return df, "txt"
+        except Exception as e:
+            st.error("Error reading TXT file.")
+            return None, None
+    elif file_type == 'pdf':
+        try:
+            pdf_reader = PyPDF2.PdfReader(uploaded_file)
+            text = ""
+            for page in pdf_reader.pages:
+                text += page.extract_text()
+            # For demonstration, create a DataFrame with one text column
+            df = pd.DataFrame({"text": [text]})
+            return df, "pdf"
+        except Exception as e:
+            st.error("Error reading PDF file.")
+            return None, None
+    else:
+        st.error("Unsupported file type.")
+        return None, None
+def run_local_anomaly_detection(df):
+    # Use IsolationForest for numeric data anomaly detection.
+    numeric_cols = df.select_dtypes(include=[np.number]).columns
+    if len(numeric_cols) < 2:
+        st.warning("Not enough numeric columns for anomaly detection. (Need at least 2 numeric columns)")
+        return df
+    X = df[numeric_cols].fillna(0)
+    model = IsolationForest(contamination=0.1, random_state=42)
+    model.fit(X)
+    # Model returns -1 for anomalies, 1 for normal records
+    df['anomaly'] = model.predict(X)
+    df['anomaly_flag'] = df['anomaly'].apply(lambda x: "🚨 Anomaly" if x == -1 else "✅ Normal")
+    return df
+def call_groq_api(df):
+    # ----- Dummy Groq API integration -----
+    # In a real implementation, you would send your data via a POST request like:
+    #   response = requests.post("https://api.groq.ai/detect", json=df.to_dict(orient="records"))
+    # and then process the JSON response.
+    # For demo purposes, we simply call the local model.
+    # ----------------------------------------
+    df = run_local_anomaly_detection(df)
+    return df
+def generate_plots(df):
+    # Create 2D and 3D scatter plots based on the first numeric columns
+    numeric_cols = df.select_dtypes(include=[np.number]).columns
+    fig2d, fig3d = None, None
+    if len(numeric_cols) >= 2:
+        fig2d = px.scatter(df, x=numeric_cols[0], y=numeric_cols[1],
+                           color='anomaly_flag',
+                           title="📈 2D Anomaly Detection Plot")
+    if len(numeric_cols) >= 3:
+        fig3d = px.scatter_3d(df, x=numeric_cols[0], y=numeric_cols[1], z=numeric_cols[2],
+                              color='anomaly_flag',
+                              title="📊 3D Anomaly Detection Plot")
+    return fig2d, fig3d
+def generate_pdf_report(summary_text, fig2d, fig3d):
+    pdf = FPDF()
+    pdf.add_page()
+    pdf.set_font("Arial", 'B', 16)
+    pdf.cell(0, 10, "WiFi Anomaly Detection Report", ln=True)
+    pdf.ln(10)
+    pdf.set_font("Arial", size=12)
+    pdf.multi_cell(0, 10, summary_text)
+    pdf.ln(10)
+    # Save figures as temporary image files using Kaleido (Plotly's image export engine)
+    image_files = []
+    if fig2d is not None:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmpfile:
+            fig2d.write_image(tmpfile.name)
+            image_files.append(tmpfile.name)
+    if fig3d is not None:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmpfile:
+            fig3d.write_image(tmpfile.name)
+            image_files.append(tmpfile.name)
+    # Add each image to the PDF
+    for image in image_files:
+        pdf.image(image, w=pdf.w - 40)
+        pdf.ln(10)
+    # Clean up temporary image files
+    for image in image_files:
+        os.remove(image)
+    pdf_output = io.BytesIO()
+    pdf.output(pdf_output)
+    pdf_data = pdf_output.getvalue()
+    pdf_output.close()
+    return pdf_data
+# -------------------------------
+# Main Workflow
+# -------------------------------
+if uploaded_file is not None:
+    df, file_type = load_data(uploaded_file)
+    if df is not None:
+        if file_type == "pdf":
+            st.subheader("📄 Extracted Text from PDF:")
+            st.text_area("PDF Content", df["text"][0], height=300)
         else:
+            st.subheader("📊 Data Preview:")
+            st.dataframe(df.head())
+        if st.button("▶️ Check Data Visualization & Summary"):
+            if file_type in ["csv", "txt"]:
+                # Run the selected anomaly detection method
+                if model_option == "Local Model":
+                    df_result = run_local_anomaly_detection(df)
+                else:
+                    df_result = call_groq_api(df)
+                st.subheader("📝 Anomaly Detection Summary:")
+                anomaly_count = (df_result['anomaly'] == -1).sum()
+                total_count = df_result.shape[0]
+                summary_text = f"Total records: {total_count}\nDetected anomalies: {anomaly_count}"
+                st.text(summary_text)
+                st.dataframe(df_result.head())
+                fig2d, fig3d = generate_plots(df_result)
+                if fig2d:
+                    st.plotly_chart(fig2d, use_container_width=True)
+                if fig3d:
+                    st.plotly_chart(fig3d, use_container_width=True)
+                if st.button("⬇️ Download Report as PDF"):
+                    pdf_data = generate_pdf_report(summary_text, fig2d, fig3d)
+                    st.download_button("Download PDF", data=pdf_data,
+                                       file_name="wifi_anomaly_report.pdf",
+                                       mime="application/pdf")
+            else:
+                st.info("Anomaly detection is available only for CSV/TXT data.")
+else:
+    st.info("Please upload a CSV, TXT, or PDF file to begin. 😊")