File size: 4,635 Bytes
c4cf758
 
 
 
 
 
a32c351
873bd97
c4cf758
 
 
 
1867a74
 
 
 
3154606
1867a74
a32c351
 
 
c4cf758
1867a74
a32c351
 
c4cf758
1867a74
a32c351
 
 
1867a74
a32c351
 
 
1867a74
a32c351
 
 
873bd97
 
a32c351
1867a74
 
 
 
a32c351
 
 
 
 
 
 
 
 
c4cf758
1867a74
 
 
c4cf758
1867a74
 
c4cf758
1867a74
a32c351
 
 
1867a74
a32c351
 
 
 
 
 
 
c4cf758
 
 
 
 
a32c351
c4cf758
 
 
 
1867a74
a32c351
1867a74
a32c351
 
1867a74
 
a32c351
 
 
c4cf758
a32c351
 
1867a74
 
a32c351
c4cf758
1867a74
c4cf758
1867a74
 
c4cf758
 
 
 
a32c351
c4cf758
1867a74
c4cf758
a32c351
 
1867a74
 
 
c4cf758
 
1867a74
c4cf758
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import gradio as gr
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import tensorflow as tf
import joblib
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import MinMaxScaler

# Load the dataset
webtraffic_data = pd.read_csv("webtraffic.csv")

# Convert 'Hour Index' to datetime
start_date = pd.Timestamp("2024-01-01 00:00:00")
webtraffic_data['Datetime'] = start_date + pd.to_timedelta(webtraffic_data['Hour Index'], unit='h')
webtraffic_data.drop(columns=['Hour Index'], inplace=True)

# Split the data into train/test
train_size = int(len(webtraffic_data) * 0.8)
train_data = webtraffic_data.iloc[:train_size]
test_data = webtraffic_data.iloc[train_size:]

# Load pre-trained models
sarima_model = joblib.load("sarima_model.pkl")  # SARIMA model
lstm_model = tf.keras.models.load_model("lstm_model.keras")  # LSTM model

# Initialize scalers and scale the data for LSTM
scaler_X = MinMaxScaler(feature_range=(0, 1))
scaler_y = MinMaxScaler(feature_range=(0, 1))

# Fit scalers on the training data
X_train_scaled = scaler_X.fit_transform(train_data['Sessions'].values.reshape(-1, 1))
y_train_scaled = scaler_y.fit_transform(train_data['Sessions'].values.reshape(-1, 1))

# Scale the test data
X_test_scaled = scaler_X.transform(test_data['Sessions'].values.reshape(-1, 1))
y_test_scaled = scaler_y.transform(test_data['Sessions'].values.reshape(-1, 1))

# Reshape test data for LSTM (samples, time_steps, features)
X_test_lstm = X_test_scaled.reshape((X_test_scaled.shape[0], 1, 1))

# Generate predictions for SARIMA
sarima_predictions = sarima_model.predict(start=len(train_data), end=len(webtraffic_data) - 1)

# Generate predictions for LSTM
lstm_predictions_scaled = lstm_model.predict(X_test_lstm)
lstm_predictions = scaler_y.inverse_transform(lstm_predictions_scaled).flatten()

# Combine predictions into a DataFrame for visualization
future_predictions = pd.DataFrame({
    "Datetime": test_data['Datetime'],
    "SARIMA_Predicted": sarima_predictions,
    "LSTM_Predicted": lstm_predictions
})

# Calculate metrics
mae_sarima = mean_absolute_error(test_data['Sessions'], sarima_predictions)
rmse_sarima = mean_squared_error(test_data['Sessions'], sarima_predictions, squared=False)

mae_lstm = mean_absolute_error(test_data['Sessions'], lstm_predictions)
rmse_lstm = mean_squared_error(test_data['Sessions'], lstm_predictions, squared=False)

# Function to generate plots
def generate_plot(model):
    """Generate plot based on the selected model."""
    plt.figure(figsize=(15, 6))
    plt.plot(test_data['Datetime'], test_data['Sessions'], label='Actual Traffic', color='black', linestyle='dotted', linewidth=2)

    if model == "SARIMA":
        plt.plot(future_predictions['Datetime'], future_predictions['SARIMA_Predicted'], label='SARIMA Predicted', color='blue', linewidth=2)
    elif model == "LSTM":
        plt.plot(future_predictions['Datetime'], future_predictions['LSTM_Predicted'], label='LSTM Predicted', color='green', linewidth=2)

    plt.title(f"{model} Predictions vs Actual Traffic", fontsize=16)
    plt.xlabel("Datetime", fontsize=12)
    plt.ylabel("Sessions", fontsize=12)
    plt.legend(loc="upper left")
    plt.grid(True)
    plt.tight_layout()
    plot_path = f"{model.lower()}_plot.png"
    plt.savefig(plot_path)
    plt.close()
    return plot_path

# Function to display metrics
def display_metrics():
    """Generate metrics for both models."""
    metrics = {
        "Model": ["SARIMA", "LSTM"],
        "Mean Absolute Error (MAE)": [mae_sarima, mae_lstm],
        "Root Mean Squared Error (RMSE)": [rmse_sarima, rmse_lstm]
    }
    return pd.DataFrame(metrics)

# Gradio interface function
def dashboard_interface(model="SARIMA"):
    """Generate plot and metrics for the selected model."""
    plot_path = generate_plot(model)
    metrics_df = display_metrics()
    return plot_path, metrics_df.to_string()

# Build the Gradio dashboard
with gr.Blocks() as dashboard:
    gr.Markdown("## Web Traffic Prediction Dashboard")
    gr.Markdown("Select a model to view its predictions and performance metrics.")

    # Dropdown for model selection
    model_selection = gr.Dropdown(["SARIMA", "LSTM"], label="Select Model", value="SARIMA")

    # Outputs: Plot and Metrics
    plot_output = gr.Image(label="Prediction Plot")
    metrics_output = gr.Textbox(label="Metrics", lines=10)

    # Button to update dashboard
    gr.Button("Update Dashboard").click(
        fn=dashboard_interface,
        inputs=[model_selection],
        outputs=[plot_output, metrics_output]
    )

# Launch the dashboard
dashboard.launch()