Spaces:
Sleeping
Sleeping
import plotly.express as px | |
import numpy as np | |
import plotly.graph_objects as go | |
import streamlit as st | |
import pandas as pd | |
import statsmodels.api as sm | |
from sklearn.metrics import mean_absolute_percentage_error | |
import sys | |
import os | |
from utilities import set_header, load_local_css, load_authenticator | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
import sweetviz as sv | |
import tempfile | |
from sklearn.preprocessing import MinMaxScaler | |
from st_aggrid import AgGrid | |
from st_aggrid import GridOptionsBuilder, GridUpdateMode | |
from st_aggrid import GridOptionsBuilder | |
import sys | |
import re | |
import pickle | |
from sklearn.metrics import r2_score, mean_absolute_percentage_error | |
from Data_prep_functions import plot_actual_vs_predicted | |
import sqlite3 | |
from utilities import update_db | |
sys.setrecursionlimit(10**6) | |
original_stdout = sys.stdout | |
sys.stdout = open("temp_stdout.txt", "w") | |
sys.stdout.close() | |
sys.stdout = original_stdout | |
st.set_page_config(layout="wide") | |
load_local_css("styles.css") | |
set_header() | |
# TODO : | |
## 1. Add non panel model support | |
## 2. EDA Function | |
for k, v in st.session_state.items(): | |
if k not in ["logout", "login", "config"] and not k.startswith("FormSubmitter"): | |
st.session_state[k] = v | |
authenticator = st.session_state.get("authenticator") | |
if authenticator is None: | |
authenticator = load_authenticator() | |
name, authentication_status, username = authenticator.login("Login", "main") | |
auth_status = st.session_state.get("authentication_status") | |
if auth_status == True: | |
is_state_initiaized = st.session_state.get("initialized", False) | |
if not is_state_initiaized: | |
if "session_name" not in st.session_state: | |
st.session_state["session_name"] = None | |
if "project_dct" not in st.session_state: | |
st.error("Please load a project from Home page") | |
st.stop() | |
conn = sqlite3.connect( | |
r"DB/User.db", check_same_thread=False | |
) # connection with sql db | |
c = conn.cursor() | |
if not os.path.exists( | |
os.path.join(st.session_state["project_path"], "tuned_model.pkl") | |
): | |
st.error("Please save a tuned model") | |
st.stop() | |
if ( | |
"session_state_saved" in st.session_state["project_dct"]["model_tuning"].keys() | |
and st.session_state["project_dct"]["model_tuning"]["session_state_saved"] != [] | |
): | |
for key in ["used_response_metrics", "media_data", "bin_dict"]: | |
if key not in st.session_state: | |
st.session_state[key] = st.session_state["project_dct"]["model_tuning"][ | |
"session_state_saved" | |
][key] | |
st.session_state["bin_dict"] = st.session_state["project_dct"][ | |
"model_build" | |
]["session_state_saved"]["bin_dict"] | |
media_data = st.session_state["media_data"] | |
st.write(media_data.columns) | |
panel_col = [ | |
col.lower() | |
.replace(".", "_") | |
.replace("@", "_") | |
.replace(" ", "_") | |
.replace("-", "") | |
.replace(":", "") | |
.replace("__", "_") | |
for col in st.session_state["bin_dict"]["Panel Level 1"] | |
][ | |
0 | |
] # set the panel column | |
is_panel = True if len(panel_col) > 0 else False | |
date_col = "date" | |
def plot_residual_predicted(actual, predicted, df_): | |
df_["Residuals"] = actual - pd.Series(predicted) | |
df_["StdResidual"] = (df_["Residuals"] - df_["Residuals"].mean()) / df_[ | |
"Residuals" | |
].std() | |
# Create a Plotly scatter plot | |
fig = px.scatter( | |
df_, | |
x=predicted, | |
y="StdResidual", | |
opacity=0.5, | |
color_discrete_sequence=["#11B6BD"], | |
) | |
# Add horizontal lines | |
fig.add_hline(y=0, line_dash="dash", line_color="darkorange") | |
fig.add_hline(y=2, line_color="red") | |
fig.add_hline(y=-2, line_color="red") | |
fig.update_xaxes(title="Predicted") | |
fig.update_yaxes(title="Standardized Residuals (Actual - Predicted)") | |
# Set the same width and height for both figures | |
fig.update_layout( | |
title="Residuals over Predicted Values", | |
autosize=False, | |
width=600, | |
height=400, | |
) | |
return fig | |
def residual_distribution(actual, predicted): | |
Residuals = actual - pd.Series(predicted) | |
# Create a Seaborn distribution plot | |
sns.set(style="whitegrid") | |
plt.figure(figsize=(6, 4)) | |
sns.histplot(Residuals, kde=True, color="#11B6BD") | |
plt.title(" Distribution of Residuals") | |
plt.xlabel("Residuals") | |
plt.ylabel("Probability Density") | |
return plt | |
def qqplot(actual, predicted): | |
Residuals = actual - pd.Series(predicted) | |
Residuals = pd.Series(Residuals) | |
Resud_std = (Residuals - Residuals.mean()) / Residuals.std() | |
# Create a QQ plot using Plotly with custom colors | |
fig = go.Figure() | |
fig.add_trace( | |
go.Scatter( | |
x=sm.ProbPlot(Resud_std).theoretical_quantiles, | |
y=sm.ProbPlot(Resud_std).sample_quantiles, | |
mode="markers", | |
marker=dict(size=5, color="#11B6BD"), | |
name="QQ Plot", | |
) | |
) | |
# Add the 45-degree reference line | |
diagonal_line = go.Scatter( | |
x=[ | |
-2, | |
2, | |
], # Adjust the x values as needed to fit the range of your data | |
y=[-2, 2], # Adjust the y values accordingly | |
mode="lines", | |
line=dict(color="red"), # Customize the line color and style | |
name=" ", | |
) | |
fig.add_trace(diagonal_line) | |
# Customize the layout | |
fig.update_layout( | |
title="QQ Plot of Residuals", | |
title_x=0.5, | |
autosize=False, | |
width=600, | |
height=400, | |
xaxis_title="Theoretical Quantiles", | |
yaxis_title="Sample Quantiles", | |
) | |
return fig | |
def get_random_effects(media_data, panel_col, mdf): | |
random_eff_df = pd.DataFrame(columns=[panel_col, "random_effect"]) | |
for i, market in enumerate(media_data[panel_col].unique()): | |
print(i, end="\r") | |
intercept = mdf.random_effects[market].values[0] | |
random_eff_df.loc[i, "random_effect"] = intercept | |
random_eff_df.loc[i, panel_col] = market | |
return random_eff_df | |
def mdf_predict(X_df, mdf, random_eff_df): | |
X = X_df.copy() | |
X = pd.merge( | |
X, | |
random_eff_df[[panel_col, "random_effect"]], | |
on=panel_col, | |
how="left", | |
) | |
X["pred_fixed_effect"] = mdf.predict(X) | |
X["pred"] = X["pred_fixed_effect"] + X["random_effect"] | |
X.drop(columns=["pred_fixed_effect", "random_effect"], inplace=True) | |
return X | |
def metrics_df_panel(model_dict): | |
metrics_df = pd.DataFrame( | |
columns=[ | |
"Model", | |
"R2", | |
"ADJR2", | |
"Train Mape", | |
"Test Mape", | |
"Summary", | |
"Model_object", | |
] | |
) | |
i = 0 | |
for key in model_dict.keys(): | |
target = key.split("__")[1] | |
metrics_df.at[i, "Model"] = target | |
y = model_dict[key]["X_train_tuned"][target] | |
random_df = get_random_effects( | |
media_data, panel_col, model_dict[key]["Model_object"] | |
) | |
pred = mdf_predict( | |
model_dict[key]["X_train_tuned"], | |
model_dict[key]["Model_object"], | |
random_df, | |
)["pred"] | |
ytest = model_dict[key]["X_test_tuned"][target] | |
predtest = mdf_predict( | |
model_dict[key]["X_test_tuned"], | |
model_dict[key]["Model_object"], | |
random_df, | |
)["pred"] | |
metrics_df.at[i, "R2"] = r2_score(y, pred) | |
metrics_df.at[i, "ADJR2"] = 1 - (1 - metrics_df.loc[i, "R2"]) * ( | |
len(y) - 1 | |
) / (len(y) - len(model_dict[key]["feature_set"]) - 1) | |
metrics_df.at[i, "Train Mape"] = mean_absolute_percentage_error(y, pred) | |
metrics_df.at[i, "Test Mape"] = mean_absolute_percentage_error( | |
ytest, predtest | |
) | |
metrics_df.at[i, "Summary"] = model_dict[key]["Model_object"].summary() | |
metrics_df.at[i, "Model_object"] = model_dict[key]["Model_object"] | |
i += 1 | |
metrics_df = np.round(metrics_df, 2) | |
return metrics_df | |
with open( | |
os.path.join(st.session_state["project_path"], "final_df_transformed.pkl"), | |
"rb", | |
) as f: | |
data = pickle.load(f) | |
transformed_data = data["final_df_transformed"] | |
with open( | |
os.path.join(st.session_state["project_path"], "data_import.pkl"), "rb" | |
) as f: | |
data = pickle.load(f) | |
st.session_state["bin_dict"] = data["bin_dict"] | |
with open( | |
os.path.join(st.session_state["project_path"], "tuned_model.pkl"), "rb" | |
) as file: | |
tuned_model_dict = pickle.load(file) | |
feature_set_dct = { | |
key.split("__")[1]: key_dict["feature_set"] | |
for key, key_dict in tuned_model_dict.items() | |
} | |
# """ the above part should be modified so that we are fetching features set from the saved model""" | |
def contributions(X, model, target): | |
X1 = X.copy() | |
for j, col in enumerate(X1.columns): | |
X1[col] = X1[col] * model.params.values[j] | |
contributions = np.round( | |
(X1.sum() / sum(X1.sum()) * 100).sort_values(ascending=False), 2 | |
) | |
contributions = ( | |
pd.DataFrame(contributions, columns=target) | |
.reset_index() | |
.rename(columns={"index": "Channel"}) | |
) | |
contributions["Channel"] = [ | |
re.split(r"_imp|_cli", col)[0] for col in contributions["Channel"] | |
] | |
return contributions | |
if "contribution_df" not in st.session_state: | |
st.session_state["contribution_df"] = None | |
def contributions_panel(model_dict): | |
media_data = st.session_state["media_data"] | |
contribution_df = pd.DataFrame(columns=["Channel"]) | |
for key in model_dict.keys(): | |
best_feature_set = model_dict[key]["feature_set"] | |
model = model_dict[key]["Model_object"] | |
target = key.split("__")[1] | |
X_train = model_dict[key]["X_train_tuned"] | |
contri_df = pd.DataFrame() | |
y = [] | |
y_pred = [] | |
random_eff_df = get_random_effects(media_data, panel_col, model) | |
random_eff_df["fixed_effect"] = model.fe_params["Intercept"] | |
random_eff_df["panel_effect"] = ( | |
random_eff_df["random_effect"] + random_eff_df["fixed_effect"] | |
) | |
coef_df = pd.DataFrame(model.fe_params) | |
coef_df.reset_index(inplace=True) | |
coef_df.columns = ["feature", "coef"] | |
x_train_contribution = X_train.copy() | |
x_train_contribution = mdf_predict( | |
x_train_contribution, model, random_eff_df | |
) | |
x_train_contribution = pd.merge( | |
x_train_contribution, | |
random_eff_df[[panel_col, "panel_effect"]], | |
on=panel_col, | |
how="left", | |
) | |
for i in range(len(coef_df))[1:]: | |
coef = coef_df.loc[i, "coef"] | |
col = coef_df.loc[i, "feature"] | |
x_train_contribution[str(col) + "_contr"] = ( | |
coef * x_train_contribution[col] | |
) | |
# x_train_contribution['sum_contributions'] = x_train_contribution.filter(regex="contr").sum(axis=1) | |
# x_train_contribution['sum_contributions'] = x_train_contribution['sum_contributions'] + x_train_contribution[ | |
# 'panel_effect'] | |
base_cols = ["panel_effect"] + [ | |
c | |
for c in x_train_contribution.filter(regex="contr").columns | |
if c | |
in [ | |
"Week_number_contr", | |
"Trend_contr", | |
"sine_wave_contr", | |
"cosine_wave_contr", | |
] | |
] | |
x_train_contribution["base_contr"] = x_train_contribution[base_cols].sum( | |
axis=1 | |
) | |
x_train_contribution.drop(columns=base_cols, inplace=True) | |
# x_train_contribution.to_csv("Test/smr_x_train_contribution.csv", index=False) | |
contri_df = pd.DataFrame( | |
x_train_contribution.filter(regex="contr").sum(axis=0) | |
) | |
contri_df.reset_index(inplace=True) | |
contri_df.columns = ["Channel", target] | |
contri_df["Channel"] = ( | |
contri_df["Channel"] | |
.str.split("(_impres|_clicks)") | |
.apply(lambda c: c[0]) | |
) | |
contri_df[target] = 100 * contri_df[target] / contri_df[target].sum() | |
contri_df["Channel"].replace("base_contr", "base", inplace=True) | |
contribution_df = pd.merge( | |
contribution_df, contri_df, on="Channel", how="outer" | |
) | |
# st.session_state["contribution_df"] = contributions_panel(tuned_model_dict) | |
return contribution_df | |
metrics_table = metrics_df_panel(tuned_model_dict) | |
st.title("AI Model Results") | |
st.header('Contribution Overview') | |
options = st.session_state["used_response_metrics"] | |
st.write(options) | |
options = [ | |
opt.lower() | |
.replace(" ", "_") | |
.replace("-", "") | |
.replace(":", "") | |
.replace("__", "_") | |
for opt in options | |
] | |
default_options = ( | |
st.session_state["project_dct"]["saved_model_results"].get("selected_options") | |
if st.session_state["project_dct"]["saved_model_results"].get( | |
"selected_options" | |
) | |
is not None | |
else [options[-1]] | |
) | |
for i in default_options: | |
if i not in options: | |
st.write(i) | |
default_options.remove(i) | |
def format_display(inp): | |
return inp.title().replace("_", " ").strip() | |
contribution_selections = st.multiselect( | |
"Select the Response Metrics to compare contributions", | |
options, | |
default=default_options, | |
format_func=format_display, | |
) | |
trace_data = [] | |
st.session_state["contribution_df"] = contributions_panel(tuned_model_dict) | |
st.write(st.session_state["contribution_df"].columns) | |
# for selection in contribution_selections: | |
# trace = go.Bar( | |
# x=st.session_state["contribution_df"]["Channel"], | |
# y=st.session_state["contribution_df"][selection], | |
# name=selection, | |
# text=np.round(st.session_state["contribution_df"][selection], 0) | |
# .astype(int) | |
# .astype(str) | |
# + "%", | |
# textposition="outside", | |
# ) | |
# trace_data.append(trace) | |
# layout = go.Layout( | |
# title="Metrics Contribution by Channel", | |
# xaxis=dict(title="Channel Name"), | |
# yaxis=dict(title="Metrics Contribution"), | |
# barmode="group", | |
# ) | |
# fig = go.Figure(data=trace_data, layout=layout) | |
# st.plotly_chart(fig, use_container_width=True) | |
def create_grouped_bar_plot(contribution_df, contribution_selections): | |
# Extract the 'Channel' names | |
channel_names = contribution_df["Channel"].tolist() | |
# Dictionary to store all contributions except 'const' and 'base' | |
all_contributions = { | |
name: [] for name in channel_names if name not in ["const", "base"] | |
} | |
# Dictionary to store base sales for each selection | |
base_sales_dict = {} | |
# Accumulate contributions for each channel from each selection | |
for selection in contribution_selections: | |
contributions = contribution_df[selection].values.astype(float) | |
base_sales = 0 # Initialize base sales for the current selection | |
for channel_name, contribution in zip(channel_names, contributions): | |
if channel_name in all_contributions: | |
all_contributions[channel_name].append(contribution) | |
elif channel_name == "base": | |
base_sales = ( | |
contribution # Capture base sales for the current selection | |
) | |
# Store base sales for each selection | |
base_sales_dict[selection] = base_sales | |
# Calculate the average of contributions and sort by this average | |
sorted_channels = sorted( | |
all_contributions.items(), key=lambda x: -np.mean(x[1]) | |
) | |
sorted_channel_names = [name for name, _ in sorted_channels] | |
sorted_channel_names = [ | |
"Base Sales" | |
] + sorted_channel_names # Adding 'Base Sales' at the start | |
trace_data = [] | |
max_value = ( | |
0 # Initialize max_value to find the highest bar for y-axis adjustment | |
) | |
# Create traces for the grouped bar chart | |
for selection in contribution_selections: | |
display_name = sorted_channel_names | |
display_contribution = [base_sales_dict[selection]] + [ | |
np.mean(all_contributions[name]) for name in sorted_channel_names[1:] | |
] # Start with base sales for the current selection | |
# Generating text labels for each bar | |
text_values = [ | |
f"{val}%" for val in np.round(display_contribution, 0).astype(int) | |
] | |
# Find the max value for y-axis calculation | |
max_contribution = max(display_contribution) | |
if max_contribution > max_value: | |
max_value = max_contribution | |
# Create a bar trace for each selection | |
trace = go.Bar( | |
x=display_name, | |
y=display_contribution, | |
name=selection, | |
text=text_values, | |
textposition="outside", | |
) | |
trace_data.append(trace) | |
# Define layout for the bar chart | |
layout = go.Layout( | |
title="Metrics Contribution by Channel", | |
xaxis=dict(title="Channel Name"), | |
yaxis=dict( | |
title="Metrics Contribution", range=[0, max_value * 1.2] | |
), # Set y-axis 20% higher than the max bar | |
barmode="group", | |
plot_bgcolor="white", | |
) | |
# Create the figure with trace data and layout | |
fig = go.Figure(data=trace_data, layout=layout) | |
return fig | |
# Display the chart in Streamlit | |
st.plotly_chart( | |
create_grouped_bar_plot( | |
st.session_state["contribution_df"], contribution_selections | |
), | |
use_container_width=True, | |
) | |
############################################ Waterfall Chart ############################################ | |
import plotly.graph_objects as go | |
# # Initialize a Plotly figure | |
# fig = go.Figure() | |
# for selection in contribution_selections: | |
# # Ensure contributions are numeric | |
# contributions = ( | |
# st.session_state["contribution_df"][selection].values.astype(float).tolist() | |
# ) | |
# channel_names = st.session_state["contribution_df"]["Channel"].tolist() | |
# display_name, display_contribution, base_contribution = [], [], 0 | |
# for channel_name, contribution in zip(channel_names, contributions): | |
# if channel_name != "const" and channel_name != "base": | |
# display_name.append(channel_name) | |
# display_contribution.append(contribution) | |
# else: | |
# base_contribution = contribution | |
# display_name = ["Base Sales"] + display_name | |
# display_contribution = [base_contribution] + display_contribution | |
# # Generating text labels for each bar, ensuring operations are compatible with string formats | |
# text_values = [ | |
# f"{val}%" for val in np.round(display_contribution, 0).astype(int) | |
# ] | |
# fig.add_trace( | |
# go.Waterfall( | |
# orientation="v", | |
# measure=["relative"] * len(display_contribution), | |
# x=display_name, | |
# text=text_values, | |
# textposition="outside", | |
# y=display_contribution, | |
# increasing={"marker": {"color": "green"}}, | |
# decreasing={"marker": {"color": "red"}}, | |
# totals={"marker": {"color": "blue"}}, | |
# name=selection, | |
# ) | |
# ) | |
# fig.update_layout( | |
# title="Metrics Contribution by Channel", | |
# xaxis={"title": "Channel Name"}, | |
# yaxis={"title": "Metrics Contribution"}, | |
# height=600, | |
# ) | |
# # Displaying the waterfall chart in Streamlit | |
# st.plotly_chart(fig, use_container_width=True) | |
def preprocess_and_plot(contribution_df, contribution_selections): | |
# Extract the 'Channel' names | |
channel_names = contribution_df["Channel"].tolist() | |
# Dictionary to store all contributions except 'const' and 'base' | |
all_contributions = { | |
name: [] for name in channel_names if name not in ["const", "base"] | |
} | |
# Dictionary to store base sales for each selection | |
base_sales_dict = {} | |
# Accumulate contributions for each channel from each selection | |
for selection in contribution_selections: | |
contributions = contribution_df[selection].values.astype(float) | |
base_sales = 0 # Initialize base sales for the current selection | |
for channel_name, contribution in zip(channel_names, contributions): | |
if channel_name in all_contributions: | |
all_contributions[channel_name].append(contribution) | |
elif channel_name == "base": | |
base_sales = ( | |
contribution # Capture base sales for the current selection | |
) | |
# Store base sales for each selection | |
base_sales_dict[selection] = base_sales | |
# Calculate the average of contributions and sort by this average | |
sorted_channels = sorted( | |
all_contributions.items(), key=lambda x: -np.mean(x[1]) | |
) | |
sorted_channel_names = [name for name, _ in sorted_channels] | |
sorted_channel_names = [ | |
"Base Sales" | |
] + sorted_channel_names # Adding 'Base Sales' at the start | |
# Initialize a Plotly figure | |
fig = go.Figure() | |
for selection in contribution_selections: | |
display_name = ["Base Sales"] + sorted_channel_names[ | |
1: | |
] # Channel names for the plot | |
display_contribution = [ | |
base_sales_dict[selection] | |
] # Start with base sales for the current selection | |
# Append average contributions for other channels | |
for name in sorted_channel_names[1:]: | |
display_contribution.append(np.mean(all_contributions[name])) | |
# Generating text labels for each bar | |
text_values = [ | |
f"{val}%" for val in np.round(display_contribution, 0).astype(int) | |
] | |
# Add a waterfall trace for each selection | |
fig.add_trace( | |
go.Waterfall( | |
orientation="v", | |
measure=["relative"] * len(display_contribution), | |
x=display_name, | |
text=text_values, | |
textposition="outside", | |
y=display_contribution, | |
increasing={"marker": {"color": "green"}}, | |
decreasing={"marker": {"color": "red"}}, | |
totals={"marker": {"color": "blue"}}, | |
name=selection, | |
) | |
) | |
# Update layout of the figure | |
fig.update_layout( | |
title="Metrics Contribution by Channel", | |
xaxis={"title": "Channel Name"}, | |
yaxis=dict(title="Metrics Contribution", range=[0, 100 * 1.2]), | |
) | |
return fig | |
# Displaying the waterfall chart | |
st.plotly_chart( | |
preprocess_and_plot( | |
st.session_state["contribution_df"], contribution_selections | |
), | |
use_container_width=True, | |
) | |
############################################ Waterfall Chart ############################################ | |
st.header("Analysis of Models Result") | |
# st.markdown() | |
previous_selection = st.session_state["project_dct"]["saved_model_results"].get( | |
"model_grid_sel", [1] | |
) | |
# st.write(np.round(metrics_table, 2)) | |
gd_table = metrics_table.iloc[:, :-2] | |
gd = GridOptionsBuilder.from_dataframe(gd_table) | |
# gd.configure_pagination(enabled=True) | |
gd.configure_selection( | |
use_checkbox=True, | |
selection_mode="single", | |
pre_select_all_rows=False, | |
pre_selected_rows=previous_selection, | |
) | |
gridoptions = gd.build() | |
table = AgGrid( | |
gd_table, | |
gridOptions=gridoptions, | |
fit_columns_on_grid_load=True, | |
height=200, | |
) | |
# table=metrics_table.iloc[:,:-2] | |
# table.insert(0, "Select", False) | |
# selection_table=st.data_editor(table,column_config={"Select": st.column_config.CheckboxColumn(required=True)}) | |
if len(table.selected_rows) > 0: | |
st.session_state["project_dct"]["saved_model_results"]["model_grid_sel"] = ( | |
table.selected_rows[0]["_selectedRowNodeInfo"]["nodeRowIndex"] | |
) | |
if len(table.selected_rows) == 0: | |
st.warning( | |
"Click on the checkbox to view comprehensive results of the selected model." | |
) | |
st.stop() | |
else: | |
target_column = table.selected_rows[0]["Model"] | |
feature_set = feature_set_dct[target_column] | |
model = metrics_table[metrics_table["Model"] == target_column]["Model_object"].iloc[ | |
0 | |
] | |
target = metrics_table[metrics_table["Model"] == target_column]["Model"].iloc[0] | |
st.header("Model Summary") | |
st.write(model.summary()) | |
sel_dict = tuned_model_dict[ | |
[k for k in tuned_model_dict.keys() if k.split("__")[1] == target][0] | |
] | |
X_train = sel_dict["X_train_tuned"] | |
y_train = X_train[target] | |
random_effects = get_random_effects(media_data, panel_col, model) | |
pred = mdf_predict(X_train, model, random_effects)["pred"] | |
X_test = sel_dict["X_test_tuned"] | |
y_test = X_test[target] | |
predtest = mdf_predict(X_test, model, random_effects)["pred"] | |
metrics_table_train, _, fig_train = plot_actual_vs_predicted( | |
X_train[date_col], | |
y_train, | |
pred, | |
model, | |
target_column=target_column, | |
flag=None, | |
repeat_all_years=False, | |
is_panel=is_panel, | |
) | |
metrics_table_test, _, fig_test = plot_actual_vs_predicted( | |
X_test[date_col], | |
y_test, | |
predtest, | |
model, | |
target_column=target_column, | |
flag=None, | |
repeat_all_years=False, | |
is_panel=is_panel, | |
) | |
metrics_table_train = metrics_table_train.set_index("Metric").transpose() | |
metrics_table_train.index = ["Train"] | |
metrics_table_test = metrics_table_test.set_index("Metric").transpose() | |
metrics_table_test.index = ["Test"] | |
metrics_table = np.round(pd.concat([metrics_table_train, metrics_table_test]), 2) | |
st.markdown("Result Overview") | |
st.dataframe(np.round(metrics_table, 2), use_container_width=True) | |
st.subheader("Actual vs Predicted Plot Train") | |
st.plotly_chart(fig_train, use_container_width=True) | |
st.subheader("Actual vs Predicted Plot Test") | |
st.plotly_chart(fig_test, use_container_width=True) | |
st.markdown("## Residual Analysis") | |
columns = st.columns(2) | |
Xtrain1 = X_train.copy() | |
with columns[0]: | |
fig = plot_residual_predicted(y_train, model.predict(Xtrain1), Xtrain1) | |
st.plotly_chart(fig) | |
with columns[1]: | |
st.empty() | |
fig = qqplot(y_train, model.predict(X_train)) | |
st.plotly_chart(fig) | |
with columns[0]: | |
fig = residual_distribution(y_train, model.predict(X_train)) | |
st.pyplot(fig) | |
update_db("6_AI_Model_Result.py") | |
elif auth_status == False: | |
st.error("Username/Password is incorrect") | |
try: | |
username_forgot_pw, email_forgot_password, random_password = ( | |
authenticator.forgot_password("Forgot password") | |
) | |
if username_forgot_pw: | |
st.success("New password sent securely") | |
# Random password to be transferred to the user securely | |
elif username_forgot_pw == False: | |
st.error("Username not found") | |
except Exception as e: | |
st.error(e) | |