Spaces:
Sleeping
Sleeping
""" | |
MMO Build Sprint 3 | |
additions : contributions calculated using tuned Mixed LM model | |
pending : contributions calculations using - 1. not tuned Mixed LM model, 2. tuned OLS model, 3. not tuned OLS model | |
MMO Build Sprint 4 | |
additions : response metrics selection | |
pending : contributions calculations using - 1. not tuned Mixed LM model, 2. tuned OLS model, 3. not tuned OLS model | |
""" | |
import streamlit as st | |
import pandas as pd | |
from sklearn.preprocessing import MinMaxScaler | |
import pickle | |
import os | |
from utilities_with_panel import load_local_css, set_header | |
import yaml | |
from yaml import SafeLoader | |
import streamlit_authenticator as stauth | |
import sqlite3 | |
from utilities import update_db | |
st.set_page_config(layout="wide") | |
load_local_css("styles.css") | |
set_header() | |
for k, v in st.session_state.items(): | |
# print(k, v) | |
if k not in [ | |
"logout", | |
"login", | |
"config", | |
"build_tuned_model", | |
] and not k.startswith("FormSubmitter"): | |
st.session_state[k] = v | |
with open("config.yaml") as file: | |
config = yaml.load(file, Loader=SafeLoader) | |
st.session_state["config"] = config | |
authenticator = stauth.Authenticate( | |
config["credentials"], | |
config["cookie"]["name"], | |
config["cookie"]["key"], | |
config["cookie"]["expiry_days"], | |
config["preauthorized"], | |
) | |
st.session_state["authenticator"] = authenticator | |
name, authentication_status, username = authenticator.login("Login", "main") | |
auth_status = st.session_state.get("authentication_status") | |
if auth_status == True: | |
authenticator.logout("Logout", "main") | |
is_state_initiaized = st.session_state.get("initialized", False) | |
if "project_dct" not in st.session_state: | |
st.error("Please load a project from Home page") | |
st.stop() | |
conn = sqlite3.connect( | |
r"DB/User.db", check_same_thread=False | |
) # connection with sql db | |
c = conn.cursor() | |
if not os.path.exists( | |
os.path.join(st.session_state["project_path"], "tuned_model.pkl") | |
): | |
st.error("Please save a tuned model") | |
st.stop() | |
if ( | |
"session_state_saved" | |
in st.session_state["project_dct"]["model_tuning"].keys() | |
and st.session_state["project_dct"]["model_tuning"][ | |
"session_state_saved" | |
] | |
!= [] | |
): | |
for key in [ | |
"used_response_metrics", | |
"is_tuned_model", | |
"media_data", | |
"X_test_spends", | |
]: | |
st.session_state[key] = st.session_state["project_dct"][ | |
"model_tuning" | |
]["session_state_saved"][key] | |
elif ( | |
"session_state_saved" | |
in st.session_state["project_dct"]["model_build"].keys() | |
and st.session_state["project_dct"]["model_build"][ | |
"session_state_saved" | |
] | |
!= [] | |
): | |
for key in [ | |
"used_response_metrics", | |
"date", | |
"saved_model_names", | |
"media_data", | |
"X_test_spends", | |
]: | |
st.session_state[key] = st.session_state["project_dct"][ | |
"model_build" | |
]["session_state_saved"][key] | |
else: | |
st.error("Please tune a model first") | |
st.session_state["bin_dict"] = st.session_state["project_dct"][ | |
"model_build" | |
]["session_state_saved"]["bin_dict"] | |
st.session_state["media_data"].columns = [ | |
c.lower() for c in st.session_state["media_data"].columns | |
] | |
from utilities_with_panel import ( | |
overview_test_data_prep_panel, | |
overview_test_data_prep_nonpanel, | |
initialize_data, | |
create_channel_summary, | |
create_contribution_pie, | |
create_contribuion_stacked_plot, | |
create_channel_spends_sales_plot, | |
format_numbers, | |
channel_name_formating, | |
) | |
import plotly.graph_objects as go | |
import streamlit_authenticator as stauth | |
import yaml | |
from yaml import SafeLoader | |
import time | |
def get_random_effects(media_data, panel_col, mdf): | |
random_eff_df = pd.DataFrame(columns=[panel_col, "random_effect"]) | |
for i, market in enumerate(media_data[panel_col].unique()): | |
print(i, end="\r") | |
intercept = mdf.random_effects[market].values[0] | |
random_eff_df.loc[i, "random_effect"] = intercept | |
random_eff_df.loc[i, panel_col] = market | |
return random_eff_df | |
def process_train_and_test(train, test, features, panel_col, target_col): | |
X1 = train[features] | |
ss = MinMaxScaler() | |
X1 = pd.DataFrame(ss.fit_transform(X1), columns=X1.columns) | |
X1[panel_col] = train[panel_col] | |
X1[target_col] = train[target_col] | |
if test is not None: | |
X2 = test[features] | |
X2 = pd.DataFrame(ss.transform(X2), columns=X2.columns) | |
X2[panel_col] = test[panel_col] | |
X2[target_col] = test[target_col] | |
return X1, X2 | |
return X1 | |
def mdf_predict(X_df, mdf, random_eff_df): | |
X = X_df.copy() | |
X = pd.merge( | |
X, | |
random_eff_df[[panel_col, "random_effect"]], | |
on=panel_col, | |
how="left", | |
) | |
X["pred_fixed_effect"] = mdf.predict(X) | |
X["pred"] = X["pred_fixed_effect"] + X["random_effect"] | |
X.to_csv("Test/merged_df_contri.csv", index=False) | |
X.drop(columns=["pred_fixed_effect", "random_effect"], inplace=True) | |
return X | |
# target='Revenue' | |
# is_panel=False | |
# is_panel = st.session_state['is_panel'] | |
panel_col = [ | |
col.lower() | |
.replace(".", "_") | |
.replace("@", "_") | |
.replace(" ", "_") | |
.replace("-", "") | |
.replace(":", "") | |
.replace("__", "_") | |
for col in st.session_state["bin_dict"]["Panel Level 1"] | |
][ | |
0 | |
] # set the panel column | |
is_panel = True if len(panel_col) > 0 else False | |
date_col = "date" | |
# Sprint4 - if used_response_metrics is not blank, then select one of the used_response_metrics, else target is revenue by default | |
if ( | |
"used_response_metrics" in st.session_state | |
and st.session_state["used_response_metrics"] != [] | |
): | |
sel_target_col = st.selectbox( | |
"Select the response metric", | |
st.session_state["used_response_metrics"], | |
) | |
target_col = ( | |
sel_target_col.lower() | |
.replace(" ", "_") | |
.replace("-", "") | |
.replace(":", "") | |
.replace("__", "_") | |
) | |
else: | |
sel_target_col = "Total Approved Accounts - Revenue" | |
target_col = "total_approved_accounts_revenue" | |
target = sel_target_col | |
# Sprint4 - Look through all saved tuned models, only show saved models of the sel resp metric (target_col) | |
# saved_models = st.session_state['saved_model_names'] | |
# Sprint4 - get the model obj of the selected model | |
# st.write(sel_model_dict) | |
# Sprint3 - Contribution | |
if is_panel: | |
# read tuned mixedLM model | |
# if st.session_state["tuned_model"] is not None : | |
if st.session_state["is_tuned_model"][target_col] == True: # Sprint4 | |
with open( | |
os.path.join( | |
st.session_state["project_path"], "tuned_model.pkl" | |
), | |
"rb", | |
) as file: | |
model_dict = pickle.load(file) | |
saved_models = list(model_dict.keys()) | |
# st.write(saved_models) | |
required_saved_models = [ | |
m.split("__")[0] | |
for m in saved_models | |
if m.split("__")[1] == target_col | |
] | |
sel_model = st.selectbox( | |
"Select the model to review", required_saved_models | |
) | |
sel_model_dict = model_dict[sel_model + "__" + target_col] | |
model = sel_model_dict["Model_object"] | |
X_train = sel_model_dict["X_train_tuned"] | |
X_test = sel_model_dict["X_test_tuned"] | |
best_feature_set = sel_model_dict["feature_set"] | |
else: # if non tuned model to be used # Pending | |
with open( | |
os.path.join( | |
st.session_state["project_path"], "best_models.pkl" | |
), | |
"rb", | |
) as file: | |
model_dict = pickle.load(file) | |
# st.write(model_dict) | |
saved_models = list(model_dict.keys()) | |
required_saved_models = [ | |
m.split("__")[0] | |
for m in saved_models | |
if m.split("__")[1] == target_col | |
] | |
sel_model = st.selectbox( | |
"Select the model to review", required_saved_models | |
) | |
sel_model_dict = model_dict[sel_model + "__" + target_col] | |
# st.write(sel_model, sel_model_dict) | |
model = sel_model_dict["Model_object"] | |
X_train = sel_model_dict["X_train"] | |
X_test = sel_model_dict["X_test"] | |
best_feature_set = sel_model_dict["feature_set"] | |
# Calculate contributions | |
with open( | |
os.path.join(st.session_state["project_path"], "data_import.pkl"), | |
"rb", | |
) as f: | |
data = pickle.load(f) | |
# Accessing the loaded objects | |
st.session_state["orig_media_data"] = data["final_df"] | |
st.session_state["orig_media_data"].columns = [ | |
col.lower() | |
.replace(".", "_") | |
.replace("@", "_") | |
.replace(" ", "_") | |
.replace("-", "") | |
.replace(":", "") | |
.replace("__", "_") | |
for col in st.session_state["orig_media_data"].columns | |
] | |
media_data = st.session_state["media_data"] | |
# st.session_state['orig_media_data']=st.session_state["media_data"] | |
# st.write(media_data) | |
contri_df = pd.DataFrame() | |
y = [] | |
y_pred = [] | |
random_eff_df = get_random_effects(media_data, panel_col, model) | |
random_eff_df["fixed_effect"] = model.fe_params["Intercept"] | |
random_eff_df["panel_effect"] = ( | |
random_eff_df["random_effect"] + random_eff_df["fixed_effect"] | |
) | |
# random_eff_df.to_csv("Test/random_eff_df_contri.csv", index=False) | |
coef_df = pd.DataFrame(model.fe_params) | |
coef_df.reset_index(inplace=True) | |
coef_df.columns = ["feature", "coef"] | |
# coef_df.reset_index().to_csv("Test/coef_df_contri1.csv",index=False) | |
# print(model.fe_params) | |
x_train_contribution = X_train.copy() | |
x_test_contribution = X_test.copy() | |
# preprocessing not needed since X_train is already preprocessed | |
# X1, X2 = process_train_and_test(x_train_contribution, x_test_contribution, best_feature_set, panel_col, target_col) | |
# x_train_contribution[best_feature_set] = X1[best_feature_set] | |
# x_test_contribution[best_feature_set] = X2[best_feature_set] | |
x_train_contribution = mdf_predict( | |
x_train_contribution, model, random_eff_df | |
) | |
x_test_contribution = mdf_predict( | |
x_test_contribution, model, random_eff_df | |
) | |
x_train_contribution = pd.merge( | |
x_train_contribution, | |
random_eff_df[[panel_col, "panel_effect"]], | |
on=panel_col, | |
how="left", | |
) | |
x_test_contribution = pd.merge( | |
x_test_contribution, | |
random_eff_df[[panel_col, "panel_effect"]], | |
on=panel_col, | |
how="left", | |
) | |
for i in range(len(coef_df))[1:]: | |
coef = coef_df.loc[i, "coef"] | |
col = coef_df.loc[i, "feature"] | |
x_train_contribution[str(col) + "_contr"] = ( | |
coef * x_train_contribution[col] | |
) | |
x_test_contribution[str(col) + "_contr"] = ( | |
coef * x_train_contribution[col] | |
) | |
x_train_contribution["sum_contributions"] = ( | |
x_train_contribution.filter(regex="contr").sum(axis=1) | |
) | |
x_train_contribution["sum_contributions"] = ( | |
x_train_contribution["sum_contributions"] | |
+ x_train_contribution["panel_effect"] | |
) | |
x_test_contribution["sum_contributions"] = x_test_contribution.filter( | |
regex="contr" | |
).sum(axis=1) | |
x_test_contribution["sum_contributions"] = ( | |
x_test_contribution["sum_contributions"] | |
+ x_test_contribution["panel_effect"] | |
) | |
# # test | |
x_train_contribution.to_csv( | |
"Test/x_train_contribution.csv", index=False | |
) | |
x_test_contribution.to_csv("Test/x_test_contribution.csv", index=False) | |
# | |
# st.session_state['orig_media_data'].to_csv("Test/transformed_data.csv",index=False) | |
# st.session_state['X_test_spends'].to_csv("Test/test_spends.csv",index=False) | |
# # st.write(st.session_state['orig_media_data'].columns) | |
# st.write(date_col,panel_col) | |
# st.write(x_test_contribution) | |
overview_test_data_prep_panel( | |
x_test_contribution, | |
st.session_state["orig_media_data"], | |
st.session_state["X_test_spends"], | |
date_col, | |
panel_col, | |
target_col, | |
) | |
else: # NON PANEL | |
if st.session_state["is_tuned_model"][target_col] == True: # Sprint4 | |
with open( | |
os.path.join( | |
st.session_state["project_path"], "tuned_model.pkl" | |
), | |
"rb", | |
) as file: | |
model_dict = pickle.load(file) | |
saved_models = list(model_dict.keys()) | |
required_saved_models = [ | |
m.split("__")[0] | |
for m in saved_models | |
if m.split("__")[1] == target_col | |
] | |
sel_model = st.selectbox( | |
"Select the model to review", required_saved_models | |
) | |
sel_model_dict = model_dict[sel_model + "__" + target_col] | |
model = sel_model_dict["Model_object"] | |
X_train = sel_model_dict["X_train_tuned"] | |
X_test = sel_model_dict["X_test_tuned"] | |
best_feature_set = sel_model_dict["feature_set"] | |
else: # Sprint4 | |
with open( | |
os.path.join( | |
st.session_state["project_path"], "best_models.pkl" | |
), | |
"rb", | |
) as file: | |
model_dict = pickle.load(file) | |
saved_models = list(model_dict.keys()) | |
required_saved_models = [ | |
m.split("__")[0] | |
for m in saved_models | |
if m.split("__")[1] == target_col | |
] | |
sel_model = st.selectbox( | |
"Select the model to review", required_saved_models | |
) | |
sel_model_dict = model_dict[sel_model + "__" + target_col] | |
model = sel_model_dict["Model_object"] | |
X_train = sel_model_dict["X_train"] | |
X_test = sel_model_dict["X_test"] | |
best_feature_set = sel_model_dict["feature_set"] | |
x_train_contribution = X_train.copy() | |
x_test_contribution = X_test.copy() | |
x_train_contribution["pred"] = model.predict( | |
x_train_contribution[best_feature_set] | |
) | |
x_test_contribution["pred"] = model.predict( | |
x_test_contribution[best_feature_set] | |
) | |
for num, i in enumerate(model.params.values): | |
col = best_feature_set[num] | |
x_train_contribution[col + "_contr"] = X_train[col] * i | |
x_test_contribution[col + "_contr"] = X_test[col] * i | |
x_test_contribution.to_csv( | |
"Test/x_test_contribution_non_panel.csv", index=False | |
) | |
overview_test_data_prep_nonpanel( | |
x_test_contribution, | |
st.session_state["orig_media_data"].copy(), | |
st.session_state["X_test_spends"].copy(), | |
date_col, | |
target_col, | |
) | |
# for k, v in st.session_sta | |
# te.items(): | |
# if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'): | |
# st.session_state[k] = v | |
# authenticator = st.session_state.get('authenticator') | |
# if authenticator is None: | |
# authenticator = load_authenticator() | |
# name, authentication_status, username = authenticator.login('Login', 'main') | |
# auth_status = st.session_state['authentication_status'] | |
# if auth_status: | |
# authenticator.logout('Logout', 'main') | |
# is_state_initiaized = st.session_state.get('initialized',False) | |
# if not is_state_initiaized: | |
initialize_data(target_col) | |
scenario = st.session_state["scenario"] | |
raw_df = st.session_state["raw_df"] | |
st.header("Overview of previous spends") | |
# st.write(scenario.actual_total_spends) | |
# st.write(scenario.actual_total_sales) | |
columns = st.columns((1, 1, 3)) | |
with columns[0]: | |
st.metric( | |
label="Spends", | |
value=format_numbers(float(scenario.actual_total_spends)), | |
) | |
###print(f"##################### {scenario.actual_total_sales} ##################") | |
with columns[1]: | |
st.metric( | |
label=target, | |
value=format_numbers( | |
float(scenario.actual_total_sales), include_indicator=False | |
), | |
) | |
actual_summary_df = create_channel_summary(scenario) | |
actual_summary_df["Channel"] = actual_summary_df["Channel"].apply( | |
channel_name_formating | |
) | |
columns = st.columns((2, 1)) | |
with columns[0]: | |
with st.expander("Channel wise overview"): | |
st.markdown( | |
actual_summary_df.style.set_table_styles( | |
[ | |
{ | |
"selector": "th", | |
"props": [("background-color", "#11B6BD")], | |
}, | |
{ | |
"selector": "tr:nth-child(even)", | |
"props": [("background-color", "#11B6BD")], | |
}, | |
] | |
).to_html(), | |
unsafe_allow_html=True, | |
) | |
st.markdown("<hr>", unsafe_allow_html=True) | |
############################## | |
st.plotly_chart( | |
create_contribution_pie(scenario), use_container_width=True | |
) | |
st.markdown("<hr>", unsafe_allow_html=True) | |
################################3 | |
st.plotly_chart( | |
create_contribuion_stacked_plot(scenario), use_container_width=True | |
) | |
st.markdown("<hr>", unsafe_allow_html=True) | |
####################################### | |
selected_channel_name = st.selectbox( | |
"Channel", | |
st.session_state["channels_list"] + ["non media"], | |
format_func=channel_name_formating, | |
) | |
selected_channel = scenario.channels.get(selected_channel_name, None) | |
st.plotly_chart( | |
create_channel_spends_sales_plot(selected_channel), | |
use_container_width=True, | |
) | |
st.markdown("<hr>", unsafe_allow_html=True) | |
if st.checkbox("Save this session", key="save"): | |
project_dct_path = os.path.join( | |
st.session_state["project_path"], "project_dct.pkl" | |
) | |
with open(project_dct_path, "wb") as f: | |
pickle.dump(st.session_state["project_dct"], f) | |
update_db("7_Current_Media_Performance.py") | |