RFI / pages /5_Model_Tuning.py
Manoj
first commit
9938325
"""
MMO Build Sprint 3
date :
changes : capability to tune MixedLM as well as simple LR in the same page
"""
import os
import streamlit as st
import pandas as pd
from Eda_functions import format_numbers
import pickle
from utilities import set_header, load_local_css
import statsmodels.api as sm
import re
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from statsmodels.stats.outliers_influence import variance_inflation_factor
# import yaml
# from yaml import SafeLoader
# import streamlit_authenticator as stauth
st.set_option("deprecation.showPyplotGlobalUse", False)
import statsmodels.formula.api as smf
from Data_prep_functions import *
import sqlite3
from utilities import set_header, load_local_css, update_db, project_selection
# for i in ["model_tuned", "X_train_tuned", "X_test_tuned", "tuned_model_features", "tuned_model", "tuned_model_dict"] :
st.set_page_config(
page_title="Model Tuning",
page_icon=":shark:",
layout="wide",
initial_sidebar_state="collapsed",
)
load_local_css("styles.css")
set_header()
if "username" not in st.session_state:
st.session_state["username"] = None
if "project_name" not in st.session_state:
st.session_state["project_name"] = None
if "project_dct" not in st.session_state:
project_selection()
st.stop()
if "username" in st.session_state and st.session_state["username"] is not None:
if not os.path.exists(
os.path.join(st.session_state["project_path"], "best_models.pkl")
):
st.error("Please save a model before tuning")
st.stop()
conn = sqlite3.connect(
r"DB/User.db", check_same_thread=False
) # connection with sql db
c = conn.cursor()
# if not is_state_initiaized:
# if "session_name" not in st.session_state:
# st.session_state["session_name"] = None
if "session_state_saved" in st.session_state["project_dct"]["model_build"].keys():
for key in [
"Model",
"date",
"saved_model_names",
"media_data",
"X_test_spends",
"spends_data"
]:
if key not in st.session_state:
st.session_state[key] = st.session_state["project_dct"]["model_build"][
"session_state_saved"
][key]
st.session_state["bin_dict"] = st.session_state["project_dct"][
"model_build"
]["session_state_saved"]["bin_dict"]
if (
"used_response_metrics" not in st.session_state
or st.session_state["used_response_metrics"] == []
):
st.session_state["used_response_metrics"] = st.session_state[
"project_dct"
]["model_build"]["session_state_saved"]["used_response_metrics"]
else:
st.error("Please load a session with a built model")
st.stop()
# if 'sel_model' not in st.session_state["project_dct"]["model_tuning"].keys():
# st.session_state["project_dct"]["model_tuning"]['sel_model']= {}
for key in ["select_all_flags_check", "selected_flags", "sel_model"]:
if key not in st.session_state["project_dct"]["model_tuning"].keys():
st.session_state["project_dct"]["model_tuning"][key] = {}
# Sprint3
# is_panel = st.session_state['is_panel']
# panel_col = 'markets' # set the panel column
date_col = "date"
# panel_col = [
# col.lower()
# .replace(".", "_")
# .replace("@", "_")
# .replace(" ", "_")
# .replace("-", "")
# .replace(":", "")
# .replace("__", "_")
# for col in st.session_state["bin_dict"]["Panel Level 1"]
# ][
# 0
# ]
panel_col = [] # manoj
# set the panel column
is_panel = True if len(panel_col) > 0 else False
# flag indicating there is not tuned model till now
# Sprint4 - model tuned dict
if "Model_Tuned" not in st.session_state:
st.session_state["Model_Tuned"] = {}
cols1 = st.columns([2, 1])
with cols1[0]:
st.markdown(f"**Welcome {st.session_state['username']}**")
with cols1[1]:
st.markdown(f"**Current Project: {st.session_state['project_name']}**")
st.title("1. Model Tuning")
if "is_tuned_model" not in st.session_state:
st.session_state["is_tuned_model"] = {}
# Sprint4 - if used_response_metrics is not blank, then select one of the used_response_metrics, else target is revenue by default
if (
"used_response_metrics" in st.session_state
and st.session_state["used_response_metrics"] != []
):
default_target_idx = (
st.session_state["project_dct"]["model_tuning"].get("sel_target_col", None)
if st.session_state["project_dct"]["model_tuning"].get(
"sel_target_col", None
)
is not None
else st.session_state["used_response_metrics"][0]
)
def format_display(inp):
return inp.title().replace("_", " ").strip()
sel_target_col = st.selectbox(
"Select the response metric",
st.session_state["used_response_metrics"],
index=st.session_state["used_response_metrics"].index(default_target_idx),
format_func=format_display,
)
target_col = (
sel_target_col.lower()
.replace(" ", "_")
.replace("-", "")
.replace(":", "")
.replace("__", "_")
)
st.session_state["project_dct"]["model_tuning"][
"sel_target_col"
] = sel_target_col
else:
sel_target_col = "Total Approved Accounts - Revenue"
target_col = "total_approved_accounts_revenue"
# Sprint4 - Look through all saved models, only show saved models of the sel resp metric (target_col)
# saved_models = st.session_state['saved_model_names']
with open(
os.path.join(st.session_state["project_path"], "best_models.pkl"), "rb"
) as file:
model_dict = pickle.load(file)
saved_models = model_dict.keys()
required_saved_models = [
m.split("__")[0] for m in saved_models if m.split("__")[1] == target_col
]
if len(required_saved_models) > 0:
default_model_idx = st.session_state["project_dct"]["model_tuning"][
"sel_model"
].get(sel_target_col, required_saved_models[0])
sel_model = st.selectbox(
"Select the model to tune",
required_saved_models,
index=required_saved_models.index(default_model_idx),
)
else:
default_model_idx = st.session_state["project_dct"]["model_tuning"][
"sel_model"
].get(sel_target_col, 0)
sel_model = st.selectbox("Select the model to tune", required_saved_models)
st.session_state["project_dct"]["model_tuning"]["sel_model"][
sel_target_col
] = default_model_idx
sel_model_dict = model_dict[
sel_model + "__" + target_col
] # Sprint4 - get the model obj of the selected model
X_train = sel_model_dict["X_train"]
X_test = sel_model_dict["X_test"]
y_train = sel_model_dict["y_train"]
y_test = sel_model_dict["y_test"]
df = st.session_state["media_data"]
if "selected_model" not in st.session_state:
st.session_state["selected_model"] = 0
st.markdown("### 1.1 Event Flags")
st.markdown("Helps in quantifying the impact of specific occurrences of events")
with st.expander("Apply Event Flags"):
st.session_state["project_dct"]["model_tuning"]["flag_expander"] = True
model = sel_model_dict["Model_object"]
date = st.session_state["date"]
date = pd.to_datetime(date)
X_train = sel_model_dict["X_train"]
# features_set= model_dict[st.session_state["selected_model"]]['feature_set']
features_set = sel_model_dict["feature_set"]
col = st.columns(3)
min_date = min(date)
max_date = max(date)
start_date_default = (
st.session_state["project_dct"]["model_tuning"].get("start_date_default")
if st.session_state["project_dct"]["model_tuning"].get("start_date_default")
is not None
else min_date
)
end_date_default = (
st.session_state["project_dct"]["model_tuning"].get("end_date_default")
if st.session_state["project_dct"]["model_tuning"].get("end_date_default")
is not None
else max_date
)
with col[0]:
start_date = st.date_input(
"Select Start Date",
start_date_default,
min_value=min_date,
max_value=max_date,
)
with col[1]:
end_date_default = (
end_date_default
if pd.Timestamp(end_date_default) >= pd.Timestamp(start_date)
else start_date
)
end_date = st.date_input(
"Select End Date",
end_date_default,
min_value=max(pd.to_datetime(min_date), pd.to_datetime(start_date)),
max_value=pd.to_datetime(max_date),
)
with col[2]:
repeat_default = (
st.session_state["project_dct"]["model_tuning"].get("repeat_default")
if st.session_state["project_dct"]["model_tuning"].get("repeat_default")
is not None
else "No"
)
repeat_default_idx = 0 if repeat_default.lower() == "yes" else 1
repeat = st.selectbox(
"Repeat Annually", ["Yes", "No"], index=repeat_default_idx
)
st.session_state["project_dct"]["model_tuning"][
"start_date_default"
] = start_date
st.session_state["project_dct"]["model_tuning"]["end_date_default"] = end_date
st.session_state["project_dct"]["model_tuning"]["repeat_default"] = repeat
if repeat == "Yes":
repeat = True
else:
repeat = False
if "Flags" not in st.session_state:
st.session_state["Flags"] = {}
if "flags" in st.session_state["project_dct"]["model_tuning"].keys():
st.session_state["Flags"] = st.session_state["project_dct"]["model_tuning"][
"flags"
]
# print("**"*50)
# print(y_train)
# print("**"*50)
# print(model.fittedvalues)
if is_panel: # Sprint3
met, line_values, fig_flag = plot_actual_vs_predicted(
X_train[date_col],
y_train,
model.fittedvalues,
model,
target_column=sel_target_col,
flag=(start_date, end_date),
repeat_all_years=repeat,
is_panel=True,
)
st.plotly_chart(fig_flag, use_container_width=True)
# create flag on test
met, test_line_values, fig_flag = plot_actual_vs_predicted(
X_test[date_col],
y_test,
sel_model_dict["pred_test"],
model,
target_column=sel_target_col,
flag=(start_date, end_date),
repeat_all_years=repeat,
is_panel=True,
)
else:
pred_train = model.predict(X_train[features_set])
met, line_values, fig_flag = plot_actual_vs_predicted(
X_train[date_col],
y_train,
pred_train,
model,
flag=(start_date, end_date),
repeat_all_years=repeat,
is_panel=False,
)
st.plotly_chart(fig_flag, use_container_width=True)
pred_test = model.predict(X_test[features_set])
met, test_line_values, fig_flag = plot_actual_vs_predicted(
X_test[date_col],
y_test,
pred_test,
model,
flag=(start_date, end_date),
repeat_all_years=repeat,
is_panel=False,
)
flag_name = "f1_flag"
flag_name = st.text_input("Enter Flag Name")
# Sprint4 - add selected target col to flag name
if st.button("Update flag"):
st.session_state["Flags"][flag_name + "_flag__" + target_col] = {}
st.session_state["Flags"][flag_name + "_flag__" + target_col][
"train"
] = line_values
st.session_state["Flags"][flag_name + "_flag__" + target_col][
"test"
] = test_line_values
st.success(f'{flag_name + "_flag__" + target_col} stored')
st.session_state["project_dct"]["model_tuning"]["flags"] = st.session_state[
"Flags"
]
# Sprint4 - only show flag created for the particular target col
if st.session_state["Flags"] is None:
st.session_state["Flags"] = {}
target_model_flags = [
f.split("__")[0]
for f in st.session_state["Flags"].keys()
if f.split("__")[1] == target_col
]
options = list(target_model_flags)
selected_options = []
num_columns = 4
num_rows = -(-len(options) // num_columns)
tick = False
if st.checkbox(
"Select all",
value=st.session_state["project_dct"]["model_tuning"][
"select_all_flags_check"
].get(sel_target_col, False),
):
tick = True
st.session_state["project_dct"]["model_tuning"]["select_all_flags_check"][
sel_target_col
] = True
else:
st.session_state["project_dct"]["model_tuning"]["select_all_flags_check"][
sel_target_col
] = False
selection_defualts = st.session_state["project_dct"]["model_tuning"][
"selected_flags"
].get(sel_target_col, [])
selected_options = selection_defualts
for row in range(num_rows):
cols = st.columns(num_columns)
for col in cols:
if options:
option = options.pop(0)
option_default = True if option in selection_defualts else False
selected = col.checkbox(option, value=(tick or option_default))
if selected:
selected_options.append(option)
else:
if option in selected_options:
selected_options.remove(option)
selected_options = list(set(selected_options))
st.session_state["project_dct"]["model_tuning"]["selected_flags"][
sel_target_col
] = selected_options
st.markdown("### 1.2 Select Parameters to Apply")
parameters = st.columns(3)
with parameters[0]:
Trend = st.checkbox(
"**Trend**",
value=st.session_state["project_dct"]["model_tuning"].get(
"trend_check", False
),
)
st.markdown(
"Helps account for long-term trends or seasonality that could influence advertising effectiveness"
)
with parameters[1]:
week_number = st.checkbox(
"**Week_number**",
value=st.session_state["project_dct"]["model_tuning"].get(
"week_num_check", False
),
)
st.markdown(
"Assists in detecting and incorporating weekly patterns or seasonality"
)
with parameters[2]:
sine_cosine = st.checkbox(
"**Sine and Cosine Waves**",
value=st.session_state["project_dct"]["model_tuning"].get(
"sine_cosine_check", False
),
)
st.markdown("Helps in capturing cyclical patterns or seasonality in the data")
#
# def get_tuned_model():
# st.session_state['build_tuned_model']=True
if st.button(
"Build model with Selected Parameters and Flags",
key="build_tuned_model",
use_container_width=True,
):
new_features = features_set
st.header("2.1 Results Summary")
# date=list(df.index)
# df = df.reset_index(drop=True)
# X_train=df[features_set]
ss = MinMaxScaler()
if is_panel == True:
X_train_tuned = X_train[features_set]
# X_train_tuned = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
X_train_tuned[target_col] = X_train[target_col]
X_train_tuned[date_col] = X_train[date_col]
X_train_tuned[panel_col] = X_train[panel_col]
X_test_tuned = X_test[features_set]
# X_test_tuned = pd.DataFrame(ss.transform(X), columns=X.columns)
X_test_tuned[target_col] = X_test[target_col]
X_test_tuned[date_col] = X_test[date_col]
X_test_tuned[panel_col] = X_test[panel_col]
else:
X_train_tuned = X_train[features_set]
# X_train_tuned = pd.DataFrame(ss.fit_transform(X_train_tuned), columns=X_train_tuned.columns)
X_test_tuned = X_test[features_set]
# X_test_tuned = pd.DataFrame(ss.transform(X_test_tuned), columns=X_test_tuned.columns)
for flag in selected_options:
# Spirnt4 - added target_col in flag name
X_train_tuned[flag] = st.session_state["Flags"][flag + "__" + target_col][
"train"
]
X_test_tuned[flag] = st.session_state["Flags"][flag + "__" + target_col][
"test"
]
# test
# X_train_tuned.to_csv("Test/X_train_tuned_flag.csv",index=False)
# X_test_tuned.to_csv("Test/X_test_tuned_flag.csv",index=False)
# print("()()"*20,flag, len(st.session_state['Flags'][flag]))
if Trend:
st.session_state["project_dct"]["model_tuning"]["trend_check"] = True
# Sprint3 - group by panel, calculate trend of each panel spearately. Add trend to new feature set
if is_panel:
newdata = pd.DataFrame()
panel_wise_end_point_train = {}
for panel, groupdf in X_train_tuned.groupby(panel_col):
groupdf.sort_values(date_col, inplace=True)
groupdf["Trend"] = np.arange(1, len(groupdf) + 1, 1)
newdata = pd.concat([newdata, groupdf])
panel_wise_end_point_train[panel] = len(groupdf)
X_train_tuned = newdata.copy()
test_newdata = pd.DataFrame()
for panel, test_groupdf in X_test_tuned.groupby(panel_col):
test_groupdf.sort_values(date_col, inplace=True)
start = panel_wise_end_point_train[panel] + 1
end = start + len(test_groupdf) # should be + 1? - Sprint4
# print("??"*20, panel, len(test_groupdf), len(np.arange(start, end, 1)), start)
test_groupdf["Trend"] = np.arange(start, end, 1)
test_newdata = pd.concat([test_newdata, test_groupdf])
X_test_tuned = test_newdata.copy()
new_features = new_features + ["Trend"]
else:
X_train_tuned["Trend"] = np.arange(1, len(X_train_tuned) + 1, 1)
X_test_tuned["Trend"] = np.arange(
len(X_train_tuned) + 1,
len(X_train_tuned) + len(X_test_tuned) + 1,
1,
)
new_features = new_features + ["Trend"]
else:
st.session_state["project_dct"]["model_tuning"]["trend_check"] = False
if week_number:
st.session_state["project_dct"]["model_tuning"]["week_num_check"] = True
# Sprint3 - create weeknumber from date column in xtrain tuned. add week num to new feature set
if is_panel:
X_train_tuned[date_col] = pd.to_datetime(X_train_tuned[date_col])
X_train_tuned["Week_number"] = X_train_tuned[date_col].dt.day_of_week
if X_train_tuned["Week_number"].nunique() == 1:
st.write(
"All dates in the data are of the same week day. Hence Week number can't be used."
)
else:
X_test_tuned[date_col] = pd.to_datetime(X_test_tuned[date_col])
X_test_tuned["Week_number"] = X_test_tuned[date_col].dt.day_of_week
new_features = new_features + ["Week_number"]
else:
date = pd.to_datetime(date.values)
X_train_tuned["Week_number"] = pd.to_datetime(
X_train[date_col]
).dt.day_of_week
X_test_tuned["Week_number"] = pd.to_datetime(
X_test[date_col]
).dt.day_of_week
new_features = new_features + ["Week_number"]
else:
st.session_state["project_dct"]["model_tuning"]["week_num_check"] = False
if sine_cosine:
st.session_state["project_dct"]["model_tuning"]["sine_cosine_check"] = True
# Sprint3 - create panel wise sine cosine waves in xtrain tuned. add to new feature set
if is_panel:
new_features = new_features + ["sine_wave", "cosine_wave"]
newdata = pd.DataFrame()
newdata_test = pd.DataFrame()
groups = X_train_tuned.groupby(panel_col)
frequency = 2 * np.pi / 365 # Adjust the frequency as needed
train_panel_wise_end_point = {}
for panel, groupdf in groups:
num_samples = len(groupdf)
train_panel_wise_end_point[panel] = num_samples
days_since_start = np.arange(num_samples)
sine_wave = np.sin(frequency * days_since_start)
cosine_wave = np.cos(frequency * days_since_start)
sine_cosine_df = pd.DataFrame(
{"sine_wave": sine_wave, "cosine_wave": cosine_wave}
)
assert len(sine_cosine_df) == len(groupdf)
# groupdf = pd.concat([groupdf, sine_cosine_df], axis=1)
groupdf["sine_wave"] = sine_wave
groupdf["cosine_wave"] = cosine_wave
newdata = pd.concat([newdata, groupdf])
X_train_tuned = newdata.copy()
test_groups = X_test_tuned.groupby(panel_col)
for panel, test_groupdf in test_groups:
num_samples = len(test_groupdf)
start = train_panel_wise_end_point[panel]
days_since_start = np.arange(start, start + num_samples, 1)
# print("##", panel, num_samples, start, len(np.arange(start, start+num_samples, 1)))
sine_wave = np.sin(frequency * days_since_start)
cosine_wave = np.cos(frequency * days_since_start)
sine_cosine_df = pd.DataFrame(
{"sine_wave": sine_wave, "cosine_wave": cosine_wave}
)
assert len(sine_cosine_df) == len(test_groupdf)
# groupdf = pd.concat([groupdf, sine_cosine_df], axis=1)
test_groupdf["sine_wave"] = sine_wave
test_groupdf["cosine_wave"] = cosine_wave
newdata_test = pd.concat([newdata_test, test_groupdf])
X_test_tuned = newdata_test.copy()
else:
new_features = new_features + ["sine_wave", "cosine_wave"]
num_samples = len(X_train_tuned)
frequency = 2 * np.pi / 365 # Adjust the frequency as needed
days_since_start = np.arange(num_samples)
sine_wave = np.sin(frequency * days_since_start)
cosine_wave = np.cos(frequency * days_since_start)
sine_cosine_df = pd.DataFrame(
{"sine_wave": sine_wave, "cosine_wave": cosine_wave}
)
# Concatenate the sine and cosine waves with the scaled X DataFrame
X_train_tuned = pd.concat([X_train_tuned, sine_cosine_df], axis=1)
test_num_samples = len(X_test_tuned)
start = num_samples
days_since_start = np.arange(start, start + test_num_samples, 1)
sine_wave = np.sin(frequency * days_since_start)
cosine_wave = np.cos(frequency * days_since_start)
sine_cosine_df = pd.DataFrame(
{"sine_wave": sine_wave, "cosine_wave": cosine_wave}
)
# Concatenate the sine and cosine waves with the scaled X DataFrame
X_test_tuned = pd.concat([X_test_tuned, sine_cosine_df], axis=1)
else:
st.session_state["project_dct"]["model_tuning"]["sine_cosine_check"] = False
# model
if selected_options:
new_features = new_features + selected_options
if is_panel:
inp_vars_str = " + ".join(new_features)
new_features = list(set(new_features))
md_str = target_col + " ~ " + inp_vars_str
md_tuned = smf.mixedlm(
md_str,
data=X_train_tuned[[target_col] + new_features],
groups=X_train_tuned[panel_col],
)
model_tuned = md_tuned.fit()
# plot act v pred for original model and tuned model
metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(
X_train[date_col],
y_train,
model.fittedvalues,
model,
target_column=sel_target_col,
is_panel=True,
)
metrics_table_tuned, line, actual_vs_predicted_plot_tuned = (
plot_actual_vs_predicted(
X_train_tuned[date_col],
X_train_tuned[target_col],
model_tuned.fittedvalues,
model_tuned,
target_column=sel_target_col,
is_panel=True,
)
)
else:
new_features = list(set(new_features))
model_tuned = sm.OLS(y_train, X_train_tuned[new_features]).fit()
metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(
X_train[date_col],
y_train,
model.predict(X_train[features_set]),
model,
target_column=sel_target_col,
)
# st.write(X_train.columns)
# st.write(X_train_tuned.columns)
metrics_table_tuned, line, actual_vs_predicted_plot_tuned = (
plot_actual_vs_predicted(
X_train[date_col],
y_train,
model_tuned.predict(X_train_tuned[new_features]),
model_tuned,
target_column=sel_target_col,
)
)
mape = np.round(metrics_table.iloc[0, 1], 2)
r2 = np.round(metrics_table.iloc[1, 1], 2)
adjr2 = np.round(metrics_table.iloc[2, 1], 2)
mape_tuned = np.round(metrics_table_tuned.iloc[0, 1], 2)
r2_tuned = np.round(metrics_table_tuned.iloc[1, 1], 2)
adjr2_tuned = np.round(metrics_table_tuned.iloc[2, 1], 2)
parameters_ = st.columns(3)
with parameters_[0]:
st.metric("R2", r2_tuned, np.round(r2_tuned - r2, 2))
with parameters_[1]:
st.metric("Adjusted R2", adjr2_tuned, np.round(adjr2_tuned - adjr2, 2))
with parameters_[2]:
st.metric("MAPE", mape_tuned, np.round(mape_tuned - mape, 2), "inverse")
st.write(model_tuned.summary())
X_train_tuned[date_col] = X_train[date_col]
X_test_tuned[date_col] = X_test[date_col]
X_train_tuned[target_col] = y_train
X_test_tuned[target_col] = y_test
st.header("2.2 Actual vs. Predicted Plot")
if is_panel:
metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(
X_train_tuned[date_col],
X_train_tuned[target_col],
model_tuned.fittedvalues,
model_tuned,
target_column=sel_target_col,
is_panel=True,
)
else:
metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(
X_train_tuned[date_col],
X_train_tuned[target_col],
model_tuned.predict(X_train_tuned[new_features]),
model_tuned,
target_column=sel_target_col,
is_panel=False,
)
# st.write(metrics_table)
# plot_actual_vs_predicted(X_train[date_col], y_train,
# model.fittedvalues, model,
# target_column='Revenue',
# is_panel=is_panel)
st.plotly_chart(actual_vs_predicted_plot, use_container_width=True)
st.markdown("## 2.3 Residual Analysis")
if is_panel:
columns = st.columns(2)
with columns[0]:
fig = plot_residual_predicted(
y_train, model_tuned.fittedvalues, X_train_tuned
)
st.plotly_chart(fig)
with columns[1]:
st.empty()
fig = qqplot(y_train, model_tuned.fittedvalues)
st.plotly_chart(fig)
with columns[0]:
fig = residual_distribution(y_train, model_tuned.fittedvalues)
st.pyplot(fig)
else:
columns = st.columns(2)
with columns[0]:
fig = plot_residual_predicted(
y_train,
model_tuned.predict(X_train_tuned[new_features]),
X_train,
)
st.plotly_chart(fig)
with columns[1]:
st.empty()
fig = qqplot(y_train, model_tuned.predict(X_train_tuned[new_features]))
st.plotly_chart(fig)
with columns[0]:
fig = residual_distribution(
y_train, model_tuned.predict(X_train_tuned[new_features])
)
st.pyplot(fig)
# st.session_state['is_tuned_model'][target_col] = True
# Sprint4 - saved tuned model in a dict
st.session_state["Model_Tuned"][sel_model + "__" + target_col] = {
"Model_object": model_tuned,
"feature_set": new_features,
"X_train_tuned": X_train_tuned,
"X_test_tuned": X_test_tuned,
}
with st.expander("Results Summary Test data"):
test_pred=model_tuned.predict(X_test_tuned[new_features])
st.header("2.2 Actual vs. Predicted Plot")
metrics_table, line, actual_vs_predicted_plot = (
plot_actual_vs_predicted(
X_test_tuned[date_col],
y_test,
test_pred,
model,
target_column=sel_target_col,
is_panel=is_panel,
)
)
st.plotly_chart(actual_vs_predicted_plot, use_container_width=True)
st.markdown("## 2.3 Residual Analysis")
columns = st.columns(2)
with columns[0]:
fig = plot_residual_predicted(y_test, test_pred, X_test_tuned)
st.plotly_chart(fig)
with columns[1]:
st.empty()
fig = qqplot(y_test, test_pred)
st.plotly_chart(fig)
with columns[0]:
fig = residual_distribution(y_test, test_pred)
st.pyplot(fig)
# if st.session_state['build_tuned_model']==True:
if st.session_state["Model_Tuned"] is not None:
if st.button("Use This model for Media Planning", use_container_width=True):
# save_model = st.button('Use this model to build response curves', key='saved_tuned_model')
# if save_model:
# remove any other tuned model saved for this target col
# sprint8
_remove = [
m
for m in st.session_state["Model_Tuned"].keys()
if m.split("__")[1] == target_col and m.split("__")[0] != sel_model
]
if len(_remove) > 0:
for m in _remove:
del st.session_state["Model_Tuned"][m]
st.session_state["is_tuned_model"][target_col] = True
with open(
os.path.join(st.session_state["project_path"], "tuned_model.pkl"),
"wb",
) as f:
# pickle.dump(st.session_state['tuned_model'], f)
pickle.dump(st.session_state["Model_Tuned"], f) # Sprint4
st.session_state["project_dct"]["model_tuning"]["session_state_saved"] = {}
for key in [
"bin_dict",
"used_response_metrics",
"is_tuned_model",
"media_data",
"X_test_spends",
"spends_data"
]:
st.session_state["project_dct"]["model_tuning"]["session_state_saved"][
key
] = st.session_state[key]
project_dct_path = os.path.join(
st.session_state["project_path"], "project_dct.pkl"
)
with open(project_dct_path, "wb") as f:
pickle.dump(st.session_state["project_dct"], f)
update_db("5_Model_Tuning.py")
# import glob
# # Create a search pattern to find all JSON files with "orig_rcs" in their names
# search_pattern = os.path.join(
# st.session_state["project_path"], "orig_rcs*.json"
# )
# # Use glob to find all matching files
# files_to_remove = glob.glob(search_pattern)
# # Remove each file found
# for file_path in files_to_remove:
# os.remove(file_path)
# print(f"Removed: {file_path}")
# Define the paths to the original files
original_json_file_path = os.path.join(
st.session_state["project_path"], "rcs_data_original.json"
)
original_pickle_file_path = os.path.join(
st.session_state["project_path"], "scenario_data_original.pkl"
)
# Remove the original data file if it exists
if os.path.exists(original_json_file_path):
os.remove(original_json_file_path)
if os.path.exists(original_pickle_file_path):
os.remove(original_pickle_file_path)
st.success(sel_model + " for " + target_col + " Tuned saved!")