|
""" |
|
MMO Build Sprint 3 |
|
additions : contributions calculated using tuned Mixed LM model |
|
pending : contributions calculations using - 1. not tuned Mixed LM model, 2. tuned OLS model, 3. not tuned OLS model |
|
|
|
MMO Build Sprint 4 |
|
additions : response metrics selection |
|
pending : contributions calculations using - 1. not tuned Mixed LM model, 2. tuned OLS model, 3. not tuned OLS model |
|
""" |
|
|
|
import streamlit as st |
|
import pandas as pd |
|
from sklearn.preprocessing import MinMaxScaler |
|
import pickle |
|
import os |
|
from utilities_with_panel import load_local_css, set_header |
|
import yaml |
|
from yaml import SafeLoader |
|
import sqlite3 |
|
from utilities import set_header, load_local_css, update_db, project_selection |
|
|
|
st.set_page_config(layout="wide") |
|
load_local_css("styles.css") |
|
set_header() |
|
|
|
|
|
if "username" not in st.session_state: |
|
st.session_state["username"] = None |
|
|
|
if "project_name" not in st.session_state: |
|
st.session_state["project_name"] = None |
|
|
|
if "project_dct" not in st.session_state: |
|
project_selection() |
|
st.stop() |
|
|
|
if "bin_dict" not in st.session_state: |
|
st.warning("Build and tune a model to proceed") |
|
st.stop() |
|
|
|
if "username" in st.session_state and st.session_state["username"] is not None: |
|
|
|
st.session_state["bin_dict"]["Panel Level 1"] = st.session_state["bin_dict"].get( |
|
"Panel Level 1", [] |
|
) |
|
|
|
conn = sqlite3.connect( |
|
r"DB/User.db", check_same_thread=False |
|
) |
|
c = conn.cursor() |
|
|
|
if not os.path.exists( |
|
os.path.join(st.session_state["project_path"], "tuned_model.pkl") |
|
): |
|
st.error("Please save a tuned model") |
|
st.stop() |
|
|
|
if ( |
|
"session_state_saved" in st.session_state["project_dct"]["model_tuning"].keys() |
|
and st.session_state["project_dct"]["model_tuning"]["session_state_saved"] != [] |
|
): |
|
for key in [ |
|
"used_response_metrics", |
|
"is_tuned_model", |
|
"media_data", |
|
"X_test_spends", |
|
"spends_data" |
|
]: |
|
st.session_state[key] = st.session_state["project_dct"]["model_tuning"][ |
|
"session_state_saved" |
|
][key] |
|
elif ( |
|
"session_state_saved" in st.session_state["project_dct"]["model_build"].keys() |
|
and st.session_state["project_dct"]["model_build"]["session_state_saved"] != [] |
|
): |
|
for key in [ |
|
"used_response_metrics", |
|
"date", |
|
"saved_model_names", |
|
"media_data", |
|
"X_test_spends", |
|
]: |
|
st.session_state[key] = st.session_state["project_dct"]["model_build"][ |
|
"session_state_saved" |
|
][key] |
|
else: |
|
st.error("Please tune a model first") |
|
st.session_state["bin_dict"] = st.session_state["project_dct"]["model_build"][ |
|
"session_state_saved" |
|
]["bin_dict"] |
|
st.session_state["media_data"].columns = [ |
|
c.lower() for c in st.session_state["media_data"].columns |
|
] |
|
|
|
from utilities_with_panel import ( |
|
overview_test_data_prep_panel, |
|
overview_test_data_prep_nonpanel, |
|
initialize_data, |
|
create_channel_summary, |
|
create_contribution_pie, |
|
create_contribuion_stacked_plot, |
|
create_channel_spends_sales_plot, |
|
format_numbers, |
|
channel_name_formating, |
|
) |
|
|
|
import plotly.graph_objects as go |
|
import yaml |
|
from yaml import SafeLoader |
|
import time |
|
|
|
def get_random_effects(media_data, panel_col, mdf): |
|
random_eff_df = pd.DataFrame(columns=[panel_col, "random_effect"]) |
|
for i, market in enumerate(media_data[panel_col].unique()): |
|
print(i, end="\r") |
|
intercept = mdf.random_effects[market].values[0] |
|
random_eff_df.loc[i, "random_effect"] = intercept |
|
random_eff_df.loc[i, panel_col] = market |
|
|
|
return random_eff_df |
|
|
|
def process_train_and_test(train, test, features, panel_col, target_col): |
|
X1 = train[features] |
|
|
|
ss = MinMaxScaler() |
|
X1 = pd.DataFrame(ss.fit_transform(X1), columns=X1.columns) |
|
|
|
X1[panel_col] = train[panel_col] |
|
X1[target_col] = train[target_col] |
|
|
|
if test is not None: |
|
X2 = test[features] |
|
X2 = pd.DataFrame(ss.transform(X2), columns=X2.columns) |
|
X2[panel_col] = test[panel_col] |
|
X2[target_col] = test[target_col] |
|
return X1, X2 |
|
return X1 |
|
|
|
def mdf_predict(X_df, mdf, random_eff_df): |
|
X = X_df.copy() |
|
X = pd.merge( |
|
X, |
|
random_eff_df[[panel_col, "random_effect"]], |
|
on=panel_col, |
|
how="left", |
|
) |
|
X["pred_fixed_effect"] = mdf.predict(X) |
|
|
|
X["pred"] = X["pred_fixed_effect"] + X["random_effect"] |
|
|
|
X.drop(columns=["pred_fixed_effect", "random_effect"], inplace=True) |
|
|
|
return X |
|
|
|
with open( |
|
os.path.join(st.session_state["project_path"], "data_import.pkl"), |
|
"rb", |
|
) as f: |
|
data = pickle.load(f) |
|
|
|
|
|
st.session_state["orig_media_data"] = data["final_df"] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
is_panel = False |
|
|
|
if is_panel: |
|
panel_col = [ |
|
col.lower() |
|
.replace(".", "_") |
|
.replace("@", "_") |
|
.replace(" ", "_") |
|
.replace("-", "") |
|
.replace(":", "") |
|
.replace("__", "_") |
|
for col in st.session_state["bin_dict"]["Panel Level 1"] |
|
][0] |
|
|
|
date_col = "date" |
|
|
|
cols1 = st.columns([2, 1]) |
|
with cols1[0]: |
|
st.markdown(f"**Welcome {st.session_state['username']}**") |
|
with cols1[1]: |
|
st.markdown(f"**Current Project: {st.session_state['project_name']}**") |
|
|
|
st.title("Current Media Performance") |
|
|
|
|
|
|
|
|
|
sel_target_col = st.selectbox( |
|
"Select the response metric", |
|
st.session_state["used_response_metrics"], |
|
) |
|
sel_target_col_frmttd = sel_target_col.replace('_', ' ').replace('-', ' ') |
|
sel_target_col_frmttd = sel_target_col_frmttd.title() |
|
target_col = ( |
|
sel_target_col.lower() |
|
.replace(" ", "_") |
|
.replace("-", "") |
|
.replace(":", "") |
|
.replace("__", "_") |
|
) |
|
target = sel_target_col |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if is_panel: |
|
|
|
|
|
if st.session_state["is_tuned_model"][target_col] == True: |
|
with open( |
|
os.path.join(st.session_state["project_path"], "tuned_model.pkl"), |
|
"rb", |
|
) as file: |
|
model_dict = pickle.load(file) |
|
saved_models = list(model_dict.keys()) |
|
|
|
required_saved_models = [ |
|
m.split("__")[0] for m in saved_models if m.split("__")[1] == target_col |
|
] |
|
sel_model = st.selectbox( |
|
"Select the model to review", required_saved_models |
|
) |
|
sel_model_dict = model_dict[sel_model + "__" + target_col] |
|
|
|
model = sel_model_dict["Model_object"] |
|
X_train = sel_model_dict["X_train_tuned"] |
|
X_test = sel_model_dict["X_test_tuned"] |
|
best_feature_set = sel_model_dict["feature_set"] |
|
|
|
else: |
|
with open( |
|
os.path.join(st.session_state["project_path"], "best_models.pkl"), |
|
"rb", |
|
) as file: |
|
model_dict = pickle.load(file) |
|
|
|
saved_models = list(model_dict.keys()) |
|
required_saved_models = [ |
|
m.split("__")[0] for m in saved_models if m.split("__")[1] == target_col |
|
] |
|
sel_model = st.selectbox( |
|
"Select the model to review", required_saved_models |
|
) |
|
sel_model_dict = model_dict[sel_model + "__" + target_col] |
|
|
|
model = sel_model_dict["Model_object"] |
|
X_train = sel_model_dict["X_train"] |
|
X_test = sel_model_dict["X_test"] |
|
best_feature_set = sel_model_dict["feature_set"] |
|
|
|
|
|
|
|
st.session_state["orig_media_data"].columns = [ |
|
col.lower() |
|
.replace(".", "_") |
|
.replace("@", "_") |
|
.replace(" ", "_") |
|
.replace("-", "") |
|
.replace(":", "") |
|
.replace("__", "_") |
|
for col in st.session_state["orig_media_data"].columns |
|
] |
|
|
|
media_data = st.session_state["media_data"] |
|
|
|
|
|
|
|
|
|
|
|
contri_df = pd.DataFrame() |
|
|
|
y = [] |
|
y_pred = [] |
|
|
|
random_eff_df = get_random_effects(media_data, panel_col, model) |
|
random_eff_df["fixed_effect"] = model.fe_params["Intercept"] |
|
random_eff_df["panel_effect"] = ( |
|
random_eff_df["random_effect"] + random_eff_df["fixed_effect"] |
|
) |
|
|
|
|
|
coef_df = pd.DataFrame(model.fe_params) |
|
coef_df.reset_index(inplace=True) |
|
coef_df.columns = ["feature", "coef"] |
|
|
|
|
|
|
|
|
|
x_train_contribution = X_train.copy() |
|
x_test_contribution = X_test.copy() |
|
|
|
|
|
|
|
|
|
|
|
|
|
x_train_contribution = mdf_predict(x_train_contribution, model, random_eff_df) |
|
x_test_contribution = mdf_predict(x_test_contribution, model, random_eff_df) |
|
|
|
x_train_contribution = pd.merge( |
|
x_train_contribution, |
|
random_eff_df[[panel_col, "panel_effect"]], |
|
on=panel_col, |
|
how="left", |
|
) |
|
x_test_contribution = pd.merge( |
|
x_test_contribution, |
|
random_eff_df[[panel_col, "panel_effect"]], |
|
on=panel_col, |
|
how="left", |
|
) |
|
|
|
for i in range(len(coef_df))[1:]: |
|
coef = coef_df.loc[i, "coef"] |
|
col = coef_df.loc[i, "feature"] |
|
x_train_contribution[str(col) + "_contr"] = coef * x_train_contribution[col] |
|
x_test_contribution[str(col) + "_contr"] = coef * x_train_contribution[col] |
|
|
|
x_train_contribution["sum_contributions"] = x_train_contribution.filter( |
|
regex="contr" |
|
).sum(axis=1) |
|
x_train_contribution["sum_contributions"] = ( |
|
x_train_contribution["sum_contributions"] |
|
+ x_train_contribution["panel_effect"] |
|
) |
|
|
|
x_test_contribution["sum_contributions"] = x_test_contribution.filter( |
|
regex="contr" |
|
).sum(axis=1) |
|
x_test_contribution["sum_contributions"] = ( |
|
x_test_contribution["sum_contributions"] |
|
+ x_test_contribution["panel_effect"] |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
overview_test_data_prep_panel( |
|
x_test_contribution, |
|
st.session_state["orig_media_data"], |
|
st.session_state["spends_data"], |
|
date_col, |
|
panel_col, |
|
target_col, |
|
) |
|
|
|
else: |
|
if st.session_state["is_tuned_model"][target_col] == True: |
|
with open( |
|
os.path.join(st.session_state["project_path"], "tuned_model.pkl"), |
|
"rb", |
|
) as file: |
|
model_dict = pickle.load(file) |
|
saved_models = list(model_dict.keys()) |
|
required_saved_models = [ |
|
m.split("__")[0] for m in saved_models if m.split("__")[1] == target_col |
|
] |
|
sel_model = st.selectbox( |
|
"Select the model to review", required_saved_models |
|
) |
|
sel_model_dict = model_dict[sel_model + "__" + target_col] |
|
|
|
model = sel_model_dict["Model_object"] |
|
X_train = sel_model_dict["X_train_tuned"] |
|
X_test = sel_model_dict["X_test_tuned"] |
|
best_feature_set = sel_model_dict["feature_set"] |
|
|
|
x_train_contribution = X_train.copy() |
|
x_test_contribution = X_test.copy() |
|
|
|
x_train_contribution["pred"] = model.predict( |
|
x_train_contribution[best_feature_set] |
|
) |
|
x_test_contribution["pred"] = model.predict( |
|
x_test_contribution[best_feature_set] |
|
) |
|
|
|
coef_df = pd.DataFrame(model.params) |
|
coef_df.reset_index(inplace=True) |
|
coef_df.columns = ["feature", "coef"] |
|
|
|
|
|
for i in range(len(coef_df)): |
|
coef = coef_df.loc[i, "coef"] |
|
col = coef_df.loc[i, "feature"] |
|
if col != "const": |
|
x_train_contribution[str(col) + "_contr"] = ( |
|
coef * x_train_contribution[col] |
|
) |
|
x_test_contribution[str(col) + "_contr"] = ( |
|
coef * x_test_contribution[col] |
|
) |
|
else: |
|
x_train_contribution["const"] = coef |
|
x_test_contribution["const"] = coef |
|
|
|
tuning_cols = [c for c in x_train_contribution.filter(regex="contr").columns if |
|
c in ["Week_number_contr", "Trend_contr", "sine_wave_contr", "cosine_wave_contr"]] |
|
flag_cols = [c for c in x_train_contribution.filter(regex="contr").columns if "_flag" in c] |
|
|
|
|
|
all_exog_vars = st.session_state['bin_dict']['Exogenous'] |
|
all_exog_vars = [ |
|
var.lower().replace(".", "_").replace("@", "_").replace(" ", "_").replace("-", "").replace(":", "").replace( |
|
"__", "_") for var in all_exog_vars] |
|
exog_cols = [] |
|
if len(all_exog_vars) > 0: |
|
for col in x_train_contribution.filter(regex="contr").columns: |
|
if len([exog_var for exog_var in all_exog_vars if exog_var in col]) > 0: |
|
exog_cols.append(col) |
|
|
|
base_cols = ["const"] + flag_cols + tuning_cols + exog_cols |
|
|
|
x_train_contribution["base_contr"] = x_train_contribution[base_cols].sum(axis=1) |
|
x_train_contribution.drop(columns=base_cols, inplace=True) |
|
|
|
x_test_contribution["base_contr"] = x_test_contribution[base_cols].sum(axis=1) |
|
x_test_contribution.drop(columns=base_cols, inplace=True) |
|
x_test_contribution.to_csv("Test/test_contr.csv", index=False) |
|
|
|
overall_contributions = pd.concat([x_train_contribution, x_test_contribution]).reset_index(drop=True) |
|
overall_contributions.to_csv("Test/overall_contributions.csv", index=False) |
|
|
|
overview_test_data_prep_nonpanel( |
|
overall_contributions, |
|
st.session_state["orig_media_data"].copy(), |
|
st.session_state["spends_data"].copy(), |
|
date_col, |
|
target_col, |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if "Panel Level 1" in st.session_state["bin_dict"].keys(): |
|
panel_col_1=st.session_state["bin_dict"]["Panel Level 1"] |
|
else: |
|
panel_col_1=None |
|
|
|
initialize_data( |
|
target_col, is_panel, panel_col_1 |
|
) |
|
scenario = st.session_state["scenario"] |
|
raw_df = st.session_state["raw_df"] |
|
st.header("Overview of previous spends") |
|
|
|
|
|
|
|
columns = st.columns((1, 1, 3)) |
|
|
|
with columns[0]: |
|
st.metric( |
|
label="Spends", |
|
value=format_numbers(float(scenario.actual_total_spends)), |
|
) |
|
|
|
with columns[1]: |
|
st.metric( |
|
label=sel_target_col_frmttd, |
|
value=format_numbers( |
|
float(scenario.actual_total_sales), include_indicator=False |
|
), |
|
) |
|
|
|
actual_summary_df = create_channel_summary(scenario) |
|
actual_summary_df["Channel"] = actual_summary_df["Channel"].apply( |
|
channel_name_formating |
|
) |
|
|
|
columns = st.columns((2, 1)) |
|
with columns[0]: |
|
with st.expander("Channel wise overview"): |
|
st.markdown( |
|
actual_summary_df.style.set_table_styles( |
|
[ |
|
{ |
|
"selector": "th", |
|
"props": [("background-color", "#11B6BD")], |
|
}, |
|
{ |
|
"selector": "tr:nth-child(even)", |
|
"props": [("background-color", "#11B6BD")], |
|
}, |
|
] |
|
).to_html(), |
|
unsafe_allow_html=True, |
|
) |
|
|
|
st.markdown("<hr>", unsafe_allow_html=True) |
|
|
|
|
|
st.plotly_chart(create_contribution_pie(scenario, sel_target_col_frmttd), use_container_width=True) |
|
st.markdown("<hr>", unsafe_allow_html=True) |
|
|
|
|
|
st.plotly_chart(create_contribuion_stacked_plot(scenario, sel_target_col_frmttd), use_container_width=True) |
|
st.markdown("<hr>", unsafe_allow_html=True) |
|
|
|
|
|
selected_channel_name = st.selectbox( |
|
"Channel", |
|
st.session_state["channels_list"] + ["non media"], |
|
format_func=channel_name_formating, |
|
) |
|
selected_channel = scenario.channels.get(selected_channel_name, None) |
|
|
|
st.plotly_chart( |
|
create_channel_spends_sales_plot(selected_channel, sel_target_col_frmttd), |
|
use_container_width=True, |
|
) |
|
|
|
st.markdown("<hr>", unsafe_allow_html=True) |
|
|
|
if st.checkbox("Save this session", key="save"): |
|
project_dct_path = os.path.join( |
|
st.session_state["project_path"], "project_dct.pkl" |
|
) |
|
with open(project_dct_path, "wb") as f: |
|
pickle.dump(st.session_state["project_dct"], f) |
|
update_db("7_Current_Media_Performance.py") |
|
|