|
import plotly.express as px |
|
import numpy as np |
|
import plotly.graph_objects as go |
|
import streamlit as st |
|
import pandas as pd |
|
import statsmodels.api as sm |
|
from sklearn.metrics import mean_absolute_percentage_error |
|
import sys |
|
import os |
|
from utilities import set_header, load_local_css |
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
import tempfile |
|
from sklearn.preprocessing import MinMaxScaler |
|
|
|
|
|
|
|
import sys |
|
import re |
|
import pickle |
|
from sklearn.metrics import r2_score, mean_absolute_percentage_error |
|
from Data_prep_functions import plot_actual_vs_predicted |
|
import sqlite3 |
|
from utilities import set_header, load_local_css,update_db,project_selection |
|
sys.setrecursionlimit(10**6) |
|
|
|
original_stdout = sys.stdout |
|
sys.stdout = open("temp_stdout.txt", "w") |
|
sys.stdout.close() |
|
sys.stdout = original_stdout |
|
|
|
st.set_page_config(layout="wide") |
|
load_local_css("styles.css") |
|
set_header() |
|
|
|
|
|
|
|
|
|
if 'username' not in st.session_state: |
|
st.session_state['username']=None |
|
|
|
if "project_name" not in st.session_state: |
|
st.session_state["project_name"] = None |
|
|
|
if "project_dct" not in st.session_state: |
|
project_selection() |
|
st.stop() |
|
|
|
try: |
|
with open(os.path.join(st.session_state["project_path"], "data_import.pkl"), "rb") as f: |
|
data = pickle.load(f) |
|
st.session_state["bin_dict"] = data["bin_dict"] |
|
except Exception as e: |
|
st.warning('Save atleast one tuned model to proceed') |
|
st.stop() |
|
|
|
st.session_state["bin_dict"]['Panel Level 1']=st.session_state["bin_dict"].get('Panel Level 1',[]) |
|
|
|
if 'gd_table' not in st.session_state: |
|
st.session_state['gd_table']=pd.DataFrame() |
|
|
|
if 'username' in st.session_state and st.session_state['username'] is not None: |
|
|
|
conn = sqlite3.connect( |
|
r"DB/User.db", check_same_thread=False |
|
) |
|
c = conn.cursor() |
|
|
|
if not os.path.exists( |
|
os.path.join(st.session_state["project_path"], "tuned_model.pkl") |
|
): |
|
st.error("Please save a tuned model") |
|
st.stop() |
|
|
|
if ( |
|
"session_state_saved" in st.session_state["project_dct"]["model_tuning"].keys() |
|
and st.session_state["project_dct"]["model_tuning"]["session_state_saved"] != [] |
|
): |
|
for key in ["used_response_metrics", "media_data", "bin_dict"]: |
|
if key not in st.session_state: |
|
st.session_state[key] = st.session_state["project_dct"]["model_tuning"][ |
|
"session_state_saved" |
|
][key] |
|
|
|
|
|
|
|
|
|
media_data = st.session_state["media_data"] |
|
|
|
|
|
|
|
|
|
is_panel = True if len(st.session_state["bin_dict"]['Panel Level 1']) > 0 else False |
|
|
|
if is_panel: |
|
|
|
panel_col = [ |
|
col.lower() |
|
.replace(".", "_") |
|
.replace("@", "_") |
|
.replace(" ", "_") |
|
.replace("-", "") |
|
.replace(":", "") |
|
.replace("__", "_") |
|
for col in st.session_state["bin_dict"]["Panel Level 1"] |
|
][ |
|
0 |
|
] |
|
|
|
|
|
date_col = "date" |
|
|
|
def plot_residual_predicted(actual, predicted, df_): |
|
df_["Residuals"] = actual - pd.Series(predicted) |
|
df_["StdResidual"] = (df_["Residuals"] - df_["Residuals"].mean()) / df_[ |
|
"Residuals" |
|
].std() |
|
|
|
|
|
fig = px.scatter( |
|
df_, |
|
x=predicted, |
|
y="StdResidual", |
|
opacity=0.5, |
|
color_discrete_sequence=["#11B6BD"], |
|
) |
|
|
|
|
|
fig.add_hline(y=0, line_dash="dash", line_color="darkorange") |
|
fig.add_hline(y=2, line_color="red") |
|
fig.add_hline(y=-2, line_color="red") |
|
|
|
fig.update_xaxes(title="Predicted") |
|
fig.update_yaxes(title="Standardized Residuals (Actual - Predicted)") |
|
|
|
|
|
fig.update_layout( |
|
title="Residuals over Predicted Values", |
|
autosize=False, |
|
width=600, |
|
height=400, |
|
) |
|
|
|
return fig |
|
|
|
def residual_distribution(actual, predicted): |
|
Residuals = actual - pd.Series(predicted) |
|
|
|
|
|
sns.set(style="whitegrid") |
|
plt.figure(figsize=(6, 4)) |
|
sns.histplot(Residuals, kde=True, color="#11B6BD") |
|
|
|
plt.title(" Distribution of Residuals") |
|
plt.xlabel("Residuals") |
|
plt.ylabel("Probability Density") |
|
|
|
return plt |
|
|
|
def qqplot(actual, predicted): |
|
Residuals = actual - pd.Series(predicted) |
|
Residuals = pd.Series(Residuals) |
|
Resud_std = (Residuals - Residuals.mean()) / Residuals.std() |
|
|
|
|
|
fig = go.Figure() |
|
fig.add_trace( |
|
go.Scatter( |
|
x=sm.ProbPlot(Resud_std).theoretical_quantiles, |
|
y=sm.ProbPlot(Resud_std).sample_quantiles, |
|
mode="markers", |
|
marker=dict(size=5, color="#11B6BD"), |
|
name="QQ Plot", |
|
) |
|
) |
|
|
|
|
|
diagonal_line = go.Scatter( |
|
x=[ |
|
-2, |
|
2, |
|
], |
|
y=[-2, 2], |
|
mode="lines", |
|
line=dict(color="red"), |
|
name=" ", |
|
) |
|
fig.add_trace(diagonal_line) |
|
|
|
|
|
fig.update_layout( |
|
title="QQ Plot of Residuals", |
|
title_x=0.5, |
|
autosize=False, |
|
width=600, |
|
height=400, |
|
xaxis_title="Theoretical Quantiles", |
|
yaxis_title="Sample Quantiles", |
|
) |
|
|
|
return fig |
|
|
|
def get_random_effects(media_data, panel_col, mdf): |
|
random_eff_df = pd.DataFrame(columns=[panel_col, "random_effect"]) |
|
for i, market in enumerate(media_data[panel_col].unique()): |
|
print(i, end="\r") |
|
intercept = mdf.random_effects[market].values[0] |
|
random_eff_df.loc[i, "random_effect"] = intercept |
|
random_eff_df.loc[i, panel_col] = market |
|
|
|
return random_eff_df |
|
|
|
def mdf_predict(X_df, mdf, random_eff_df): |
|
X = X_df.copy() |
|
X = pd.merge( |
|
X, |
|
random_eff_df[[panel_col, "random_effect"]], |
|
on=panel_col, |
|
how="left", |
|
) |
|
X["pred_fixed_effect"] = mdf.predict(X) |
|
|
|
X["pred"] = X["pred_fixed_effect"] + X["random_effect"] |
|
X.drop(columns=["pred_fixed_effect", "random_effect"], inplace=True) |
|
return X |
|
|
|
def metrics_df_panel(model_dict, is_panel): |
|
metrics_df = pd.DataFrame( |
|
columns=[ |
|
"Model", |
|
"R2", |
|
"ADJR2", |
|
"Train Mape", |
|
"Test Mape", |
|
"Summary", |
|
"Model_object", |
|
] |
|
) |
|
i = 0 |
|
for key in model_dict.keys(): |
|
target = key.split("__")[1] |
|
metrics_df.at[i, "Model"] = target |
|
y = model_dict[key]["X_train_tuned"][target] |
|
|
|
feature_set = model_dict[key]["feature_set"] |
|
|
|
if is_panel: |
|
random_df = get_random_effects( |
|
media_data, panel_col, model_dict[key]["Model_object"] |
|
) |
|
pred = mdf_predict( |
|
model_dict[key]["X_train_tuned"], |
|
model_dict[key]["Model_object"], |
|
random_df, |
|
)["pred"] |
|
else: |
|
pred = model_dict[key]["Model_object"].predict(model_dict[key]["X_train_tuned"][feature_set]) |
|
|
|
ytest = model_dict[key]["X_test_tuned"][target] |
|
if is_panel: |
|
|
|
predtest = mdf_predict( |
|
model_dict[key]["X_test_tuned"], |
|
model_dict[key]["Model_object"], |
|
random_df, |
|
)["pred"] |
|
|
|
else: |
|
predtest = model_dict[key]["Model_object"].predict(model_dict[key]["X_test_tuned"][feature_set]) |
|
|
|
metrics_df.at[i, "R2"] = r2_score(y, pred) |
|
metrics_df.at[i, "ADJR2"] = 1 - (1 - metrics_df.loc[i, "R2"]) * ( |
|
len(y) - 1 |
|
) / (len(y) - len(model_dict[key]["feature_set"]) - 1) |
|
metrics_df.at[i, "Train Mape"] = mean_absolute_percentage_error(y, pred) |
|
metrics_df.at[i, "Test Mape"] = mean_absolute_percentage_error( |
|
ytest, predtest |
|
) |
|
metrics_df.at[i, "Summary"] = model_dict[key]["Model_object"].summary() |
|
metrics_df.at[i, "Model_object"] = model_dict[key]["Model_object"] |
|
i += 1 |
|
metrics_df = np.round(metrics_df, 2) |
|
|
|
return metrics_df |
|
|
|
with open( |
|
os.path.join(st.session_state["project_path"], "final_df_transformed.pkl"), |
|
"rb", |
|
) as f: |
|
data = pickle.load(f) |
|
transformed_data = data["final_df_transformed"] |
|
with open( |
|
os.path.join(st.session_state["project_path"], "data_import.pkl"), "rb" |
|
) as f: |
|
data = pickle.load(f) |
|
st.session_state["bin_dict"] = data["bin_dict"] |
|
with open( |
|
os.path.join(st.session_state["project_path"], "tuned_model.pkl"), "rb" |
|
) as file: |
|
tuned_model_dict = pickle.load(file) |
|
feature_set_dct = { |
|
key.split("__")[1]: key_dict["feature_set"] |
|
for key, key_dict in tuned_model_dict.items() |
|
} |
|
|
|
|
|
|
|
|
|
|
|
if "contribution_df" not in st.session_state: |
|
st.session_state["contribution_df"] = None |
|
|
|
def map_channel(transformed_var, channel_dict): |
|
for key, value_list in channel_dict.items(): |
|
if any(raw_var in transformed_var for raw_var in value_list): |
|
return key |
|
return transformed_var |
|
|
|
|
|
def contributions_nonpanel(model_dict): |
|
with open(os.path.join(st.session_state["project_path"], "channel_groups.pkl"), "rb") as f: |
|
channels = pickle.load(f) |
|
media_data = st.session_state["media_data"] |
|
contribution_df = pd.DataFrame(columns=["Channel"]) |
|
|
|
for key in model_dict.keys(): |
|
|
|
best_feature_set = model_dict[key]["feature_set"] |
|
model = model_dict[key]["Model_object"] |
|
target = key.split("__")[1] |
|
X_train = model_dict[key]["X_train_tuned"] |
|
contri_df = pd.DataFrame() |
|
y = [] |
|
y_pred = [] |
|
|
|
coef_df = pd.DataFrame(model.params) |
|
coef_df.reset_index(inplace=True) |
|
coef_df.columns = ["feature", "coef"] |
|
x_train_contribution = X_train.copy() |
|
x_train_contribution['pred'] = model.predict(X_train[best_feature_set]) |
|
|
|
for i in range(len(coef_df)): |
|
|
|
coef = coef_df.loc[i, "coef"] |
|
col = coef_df.loc[i, "feature"] |
|
|
|
if col != 'const': |
|
x_train_contribution[str(col) + "_contr"] = (coef * x_train_contribution[col]) |
|
else: |
|
x_train_contribution["const"] = coef |
|
|
|
tuning_cols = [c for c in x_train_contribution.filter(regex="contr").columns if c in ["Week_number_contr", "Trend_contr", "sine_wave_contr", "cosine_wave_contr"]] |
|
flag_cols =[c for c in x_train_contribution.filter(regex="contr").columns if "_flag" in c] |
|
|
|
|
|
all_exog_vars = st.session_state['bin_dict']['Exogenous'] |
|
all_exog_vars = [var.lower().replace(".", "_").replace("@", "_").replace(" ", "_").replace("-", "").replace(":", "").replace("__", "_") for var in all_exog_vars] |
|
exog_cols = [] |
|
if len(all_exog_vars)>0: |
|
for col in x_train_contribution.filter(regex="contr").columns: |
|
if len([exog_var for exog_var in all_exog_vars if exog_var in col])>0: |
|
exog_cols.append(col) |
|
|
|
base_cols = ["const"] + flag_cols + tuning_cols +exog_cols |
|
|
|
|
|
x_train_contribution["base_contr"] = x_train_contribution[base_cols].sum(axis=1) |
|
x_train_contribution.drop(columns=base_cols, inplace=True) |
|
|
|
|
|
x_train_contribution.to_csv("Test/smr_x_train_contribution.csv", index=False) |
|
|
|
contri_df = pd.DataFrame( |
|
x_train_contribution.filter(regex="contr").sum(axis=0) |
|
) |
|
|
|
contri_df.reset_index(inplace=True) |
|
contri_df.columns = ["Channel", target] |
|
contri_df["Channel"] = contri_df["Channel"].apply(lambda x : map_channel(x, channels)) |
|
contri_df[target] = 100 * contri_df[target] / contri_df[target].sum() |
|
contri_df["Channel"].replace("base_contr", "base", inplace=True) |
|
contribution_df = pd.merge(contribution_df, contri_df, on="Channel", how="outer") |
|
|
|
|
|
|
|
return contribution_df |
|
|
|
def contributions_panel(model_dict): |
|
media_data = st.session_state["media_data"] |
|
contribution_df = pd.DataFrame(columns=["Channel"]) |
|
for key in model_dict.keys(): |
|
best_feature_set = model_dict[key]["feature_set"] |
|
model = model_dict[key]["Model_object"] |
|
target = key.split("__")[1] |
|
X_train = model_dict[key]["X_train_tuned"] |
|
contri_df = pd.DataFrame() |
|
|
|
y = [] |
|
y_pred = [] |
|
|
|
random_eff_df = get_random_effects(media_data, panel_col, model) |
|
random_eff_df["fixed_effect"] = model.fe_params["Intercept"] |
|
random_eff_df["panel_effect"] = ( |
|
random_eff_df["random_effect"] + random_eff_df["fixed_effect"] |
|
) |
|
|
|
coef_df = pd.DataFrame(model.fe_params) |
|
coef_df.reset_index(inplace=True) |
|
coef_df.columns = ["feature", "coef"] |
|
|
|
x_train_contribution = X_train.copy() |
|
x_train_contribution = mdf_predict( |
|
x_train_contribution, model, random_eff_df |
|
) |
|
|
|
x_train_contribution = pd.merge( |
|
x_train_contribution, |
|
random_eff_df[[panel_col, "panel_effect"]], |
|
on=panel_col, |
|
how="left", |
|
) |
|
|
|
for i in range(len(coef_df))[1:]: |
|
coef = coef_df.loc[i, "coef"] |
|
col = coef_df.loc[i, "feature"] |
|
x_train_contribution[str(col) + "_contr"] = ( |
|
coef * x_train_contribution[col] |
|
) |
|
|
|
|
|
|
|
|
|
|
|
base_cols = ["panel_effect"] + [ |
|
c |
|
for c in x_train_contribution.filter(regex="contr").columns |
|
if c |
|
in [ |
|
"Week_number_contr", |
|
"Trend_contr", |
|
"sine_wave_contr", |
|
"cosine_wave_contr", |
|
] |
|
] |
|
x_train_contribution["base_contr"] = x_train_contribution[base_cols].sum( |
|
axis=1 |
|
) |
|
x_train_contribution.drop(columns=base_cols, inplace=True) |
|
|
|
|
|
contri_df = pd.DataFrame( |
|
x_train_contribution.filter(regex="contr").sum(axis=0) |
|
) |
|
contri_df.reset_index(inplace=True) |
|
contri_df.columns = ["Channel", target] |
|
contri_df["Channel"] = ( |
|
contri_df["Channel"] |
|
.str.split("(_impres|_clicks)") |
|
.apply(lambda c: c[0]) |
|
) |
|
contri_df[target] = 100 * contri_df[target] / contri_df[target].sum() |
|
contri_df["Channel"].replace("base_contr", "base", inplace=True) |
|
contribution_df = pd.merge( |
|
contribution_df, contri_df, on="Channel", how="outer" |
|
) |
|
|
|
return contribution_df |
|
|
|
metrics_table = metrics_df_panel(tuned_model_dict,is_panel) |
|
|
|
cols1 = st.columns([2, 1]) |
|
with cols1[0]: |
|
st.markdown(f"**Welcome {st.session_state['username']}**") |
|
with cols1[1]: |
|
st.markdown( |
|
f"**Current Project: {st.session_state['project_name']}**" |
|
) |
|
|
|
st.title("AI Model Results") |
|
|
|
st.header('Contribution Overview') |
|
|
|
options = st.session_state["used_response_metrics"] |
|
|
|
|
|
options = [ |
|
opt.lower() |
|
.replace(" ", "_") |
|
.replace("-", "") |
|
.replace(":", "") |
|
.replace("__", "_") |
|
for opt in options |
|
] |
|
|
|
default_options = ( |
|
st.session_state["project_dct"]["saved_model_results"].get("selected_options") |
|
if st.session_state["project_dct"]["saved_model_results"].get( |
|
"selected_options" |
|
) |
|
is not None |
|
else [options[-1]] |
|
) |
|
for i in default_options: |
|
if i not in options: |
|
|
|
default_options.remove(i) |
|
|
|
def format_display(inp): |
|
return inp.title().replace("_", " ").strip() |
|
|
|
contribution_selections = st.multiselect( |
|
"Select the Response Metrics to compare contributions", |
|
options, |
|
default=options[0], |
|
format_func=format_display, |
|
) |
|
trace_data = [] |
|
|
|
if is_panel: |
|
st.session_state["contribution_df"] = contributions_panel(tuned_model_dict) |
|
|
|
else: |
|
st.session_state["contribution_df"] = contributions_nonpanel(tuned_model_dict) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_grouped_bar_plot(contribution_df, contribution_selections): |
|
|
|
channel_names = contribution_df["Channel"].tolist() |
|
|
|
|
|
all_contributions = { |
|
name: [] for name in channel_names if name not in ["const", "base"] |
|
} |
|
|
|
|
|
base_sales_dict = {} |
|
|
|
|
|
for selection in contribution_selections: |
|
contributions = contribution_df[selection].values.astype(float) |
|
base_sales = 0 |
|
|
|
for channel_name, contribution in zip(channel_names, contributions): |
|
if channel_name in all_contributions: |
|
all_contributions[channel_name].append(contribution) |
|
elif channel_name == "base": |
|
base_sales = ( |
|
contribution |
|
) |
|
|
|
|
|
base_sales_dict[selection] = base_sales |
|
|
|
|
|
sorted_channels = sorted( |
|
all_contributions.items(), key=lambda x: -np.mean(x[1]) |
|
) |
|
sorted_channel_names = [name for name, _ in sorted_channels] |
|
sorted_channel_names = [ |
|
"Base Sales" |
|
] + sorted_channel_names |
|
|
|
trace_data = [] |
|
max_value = ( |
|
0 |
|
) |
|
|
|
|
|
for i, selection in enumerate(contribution_selections): |
|
display_name = sorted_channel_names |
|
display_contribution = [base_sales_dict[selection]] + [ |
|
all_contributions[name][i] for name in sorted_channel_names[1:] |
|
] |
|
|
|
|
|
text_values = [ |
|
f"{val}%" for val in np.round(display_contribution, 0).astype(int) |
|
] |
|
|
|
|
|
max_contribution = max(display_contribution) |
|
if max_contribution > max_value: |
|
max_value = max_contribution |
|
|
|
|
|
trace = go.Bar( |
|
x=display_name, |
|
y=display_contribution, |
|
name=selection, |
|
text=text_values, |
|
textposition="outside", |
|
) |
|
trace_data.append(trace) |
|
|
|
|
|
layout = go.Layout( |
|
title="Metrics Contribution by Channel", |
|
xaxis=dict(title="Channel Name"), |
|
yaxis=dict( |
|
title="Metrics Contribution", range=[0, max_value * 1.2] |
|
), |
|
barmode="group", |
|
plot_bgcolor="white", |
|
) |
|
|
|
|
|
fig = go.Figure(data=trace_data, layout=layout) |
|
|
|
return fig |
|
|
|
|
|
st.plotly_chart( |
|
create_grouped_bar_plot( |
|
st.session_state["contribution_df"], contribution_selections |
|
), |
|
use_container_width=True, |
|
) |
|
|
|
|
|
|
|
import plotly.graph_objects as go |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def preprocess_and_plot(contribution_df, contribution_selections): |
|
|
|
channel_names = contribution_df["Channel"].tolist() |
|
|
|
|
|
all_contributions = { |
|
name: [] for name in channel_names if name not in ["const", "base"] |
|
} |
|
|
|
|
|
base_sales_dict = {} |
|
|
|
|
|
for selection in contribution_selections: |
|
contributions = contribution_df[selection].values.astype(float) |
|
base_sales = 0 |
|
|
|
for channel_name, contribution in zip(channel_names, contributions): |
|
if channel_name in all_contributions: |
|
all_contributions[channel_name].append(contribution) |
|
elif channel_name == "base": |
|
base_sales = ( |
|
contribution |
|
) |
|
|
|
|
|
base_sales_dict[selection] = base_sales |
|
|
|
|
|
sorted_channels = sorted( |
|
all_contributions.items(), key=lambda x: -np.mean(x[1]) |
|
) |
|
sorted_channel_names = [name for name, _ in sorted_channels] |
|
sorted_channel_names = [ |
|
"Base Sales" |
|
] + sorted_channel_names |
|
|
|
|
|
fig = go.Figure() |
|
|
|
for i, selection in enumerate(contribution_selections): |
|
display_name = ["Base Sales"] + sorted_channel_names[ |
|
1: |
|
] |
|
display_contribution = [ |
|
base_sales_dict[selection] |
|
] |
|
|
|
|
|
for name in sorted_channel_names[1:]: |
|
display_contribution.append(all_contributions[name][i]) |
|
|
|
|
|
text_values = [ |
|
f"{val}%" for val in np.round(display_contribution, 0).astype(int) |
|
] |
|
|
|
|
|
fig.add_trace( |
|
go.Waterfall( |
|
orientation="v", |
|
measure=["relative"] * len(display_contribution), |
|
x=display_name, |
|
text=text_values, |
|
textposition="outside", |
|
y=display_contribution, |
|
increasing={"marker": {"color": "green"}}, |
|
decreasing={"marker": {"color": "red"}}, |
|
totals={"marker": {"color": "blue"}}, |
|
name=selection, |
|
) |
|
) |
|
|
|
|
|
fig.update_layout( |
|
title="Metrics Contribution by Channel", |
|
xaxis={"title": "Channel Name"}, |
|
yaxis=dict(title="Metrics Contribution", range=[0, 100 * 1.2]), |
|
) |
|
|
|
return fig |
|
|
|
|
|
st.plotly_chart( |
|
preprocess_and_plot( |
|
st.session_state["contribution_df"], contribution_selections |
|
), |
|
use_container_width=True, |
|
) |
|
|
|
|
|
st.header("Analysis of Models Result") |
|
|
|
|
|
|
|
|
|
gd_table = metrics_table.iloc[:, :-2] |
|
gd_table['selected']=list([False]*(len(gd_table))) |
|
target_column = gd_table.at[0, "Model"] |
|
if "selected_row_index_gd_table" not in st.session_state: |
|
st.session_state["selected_row_index_gd_table"] = None |
|
|
|
|
|
st.session_state['gd_table']=gd_table |
|
|
|
def selection_change(): |
|
edited_rows: dict = st.session_state.project_selection["edited_rows"] |
|
st.session_state["selected_row_index_gd_table"] = next(iter(edited_rows)) |
|
st.session_state["gd_table"] =st.session_state['gd_table'].assign(selected=False) |
|
|
|
update_dict = {idx: values for idx, values in edited_rows.items()} |
|
|
|
st.session_state["gd_table"].update( |
|
pd.DataFrame.from_dict(update_dict, orient="index") |
|
) |
|
|
|
|
|
with st.container(): |
|
table = st.data_editor( |
|
st.session_state["gd_table"], |
|
hide_index=True, |
|
on_change=selection_change, |
|
key="project_selection", |
|
use_container_width=True, |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if st.session_state["selected_row_index_gd_table"] is not None: |
|
|
|
|
|
|
|
|
|
|
|
target_column= st.session_state["gd_table"].at[ |
|
st.session_state["selected_row_index_gd_table"], "Model" |
|
] |
|
|
|
feature_set = feature_set_dct[target_column] |
|
|
|
|
|
model = metrics_table[metrics_table["Model"] == target_column]["Model_object"].iloc[ |
|
0 |
|
] |
|
target = metrics_table[metrics_table["Model"] == target_column]["Model"].iloc[0] |
|
st.header("Model Summary") |
|
st.write(model.summary()) |
|
|
|
sel_dict = tuned_model_dict[ |
|
[k for k in tuned_model_dict.keys() if k.split("__")[1] == target][0] |
|
] |
|
|
|
feature_set=sel_dict['feature_set'] |
|
X_train = sel_dict["X_train_tuned"] |
|
y_train = X_train[target] |
|
|
|
if is_panel: |
|
random_effects = get_random_effects(media_data, panel_col, model) |
|
pred = mdf_predict(X_train, model, random_effects)["pred"] |
|
else: |
|
pred=model.predict(X_train[feature_set]) |
|
|
|
X_test = sel_dict["X_test_tuned"] |
|
y_test = X_test[target] |
|
if is_panel: |
|
predtest = mdf_predict(X_test, model, random_effects)["pred"] |
|
else: |
|
predtest=model.predict(X_test[feature_set]) |
|
|
|
metrics_table_train, _, fig_train = plot_actual_vs_predicted( |
|
X_train[date_col], |
|
y_train, |
|
pred, |
|
model, |
|
target_column=target_column, |
|
flag=None, |
|
repeat_all_years=False, |
|
is_panel=is_panel, |
|
) |
|
|
|
metrics_table_test, _, fig_test = plot_actual_vs_predicted( |
|
X_test[date_col], |
|
y_test, |
|
predtest, |
|
model, |
|
target_column=target_column, |
|
flag=None, |
|
repeat_all_years=False, |
|
is_panel=is_panel, |
|
) |
|
|
|
metrics_table_train = metrics_table_train.set_index("Metric").transpose() |
|
metrics_table_train.index = ["Train"] |
|
metrics_table_test = metrics_table_test.set_index("Metric").transpose() |
|
metrics_table_test.index = ["Test"] |
|
metrics_table = np.round(pd.concat([metrics_table_train, metrics_table_test]), 2) |
|
|
|
st.markdown("Result Overview") |
|
st.dataframe(np.round(metrics_table, 2), use_container_width=True) |
|
|
|
st.subheader("Actual vs Predicted Plot Train") |
|
|
|
st.plotly_chart(fig_train, use_container_width=True) |
|
st.subheader("Actual vs Predicted Plot Test") |
|
st.plotly_chart(fig_test, use_container_width=True) |
|
|
|
st.markdown("## Residual Analysis") |
|
columns = st.columns(2) |
|
|
|
Xtrain1 = X_train.copy() |
|
with columns[0]: |
|
fig = plot_residual_predicted(y_train, pred, Xtrain1) |
|
st.plotly_chart(fig) |
|
|
|
with columns[1]: |
|
st.empty() |
|
fig = qqplot(y_train, pred) |
|
st.plotly_chart(fig) |
|
|
|
with columns[0]: |
|
fig = residual_distribution(y_train, pred) |
|
st.pyplot(fig) |
|
|
|
update_db("6_AI_Model_Result.py") |
|
|