Spaces:
Sleeping
Sleeping
''' | |
MMO Build Sprint 3 | |
additions : contributions calculated using tuned Mixed LM model | |
pending : contributions calculations using - 1. not tuned Mixed LM model, 2. tuned OLS model, 3. not tuned OLS model | |
MMO Build Sprint 4 | |
additions : response metrics selection | |
pending : contributions calculations using - 1. not tuned Mixed LM model, 2. tuned OLS model, 3. not tuned OLS model | |
''' | |
import streamlit as st | |
import pandas as pd | |
from sklearn.preprocessing import MinMaxScaler | |
import pickle | |
from utilities_with_panel import (set_header, | |
overview_test_data_prep_panel, | |
overview_test_data_prep_nonpanel, | |
initialize_data, | |
load_local_css, | |
create_channel_summary, | |
create_contribution_pie, | |
create_contribuion_stacked_plot, | |
create_channel_spends_sales_plot, | |
format_numbers, | |
channel_name_formating) | |
import plotly.graph_objects as go | |
import streamlit_authenticator as stauth | |
import yaml | |
from yaml import SafeLoader | |
import time | |
st.set_page_config(layout='wide') | |
load_local_css('styles.css') | |
set_header() | |
def get_random_effects(media_data, panel_col, mdf): | |
random_eff_df = pd.DataFrame(columns=[panel_col, "random_effect"]) | |
for i, market in enumerate(media_data[panel_col].unique()): | |
print(i, end='\r') | |
intercept = mdf.random_effects[market].values[0] | |
random_eff_df.loc[i, 'random_effect'] = intercept | |
random_eff_df.loc[i, panel_col] = market | |
return random_eff_df | |
def process_train_and_test(train, test, features, panel_col, target_col): | |
X1 = train[features] | |
ss = MinMaxScaler() | |
X1 = pd.DataFrame(ss.fit_transform(X1), columns=X1.columns) | |
X1[panel_col] = train[panel_col] | |
X1[target_col] = train[target_col] | |
if test is not None: | |
X2 = test[features] | |
X2 = pd.DataFrame(ss.transform(X2), columns=X2.columns) | |
X2[panel_col] = test[panel_col] | |
X2[target_col] = test[target_col] | |
return X1, X2 | |
return X1 | |
def mdf_predict(X_df, mdf, random_eff_df) : | |
X=X_df.copy() | |
X=pd.merge(X, random_eff_df[[panel_col,'random_effect']], on=panel_col, how='left') | |
X['pred_fixed_effect'] = mdf.predict(X) | |
X['pred'] = X['pred_fixed_effect'] + X['random_effect'] | |
X.to_csv('Test/merged_df_contri.csv',index=False) | |
X.drop(columns=['pred_fixed_effect', 'random_effect'], inplace=True) | |
return X | |
target='Revenue' | |
# is_panel=False | |
# is_panel = st.session_state['is_panel'] | |
panel_col = [col.lower().replace('.','_').replace('@','_').replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for col in st.session_state['bin_dict']['Panel Level 1'] ] [0]# set the panel column | |
date_col = 'date' | |
#st.write(media_data) | |
is_panel = True if len(panel_col)>0 else False | |
# panel_col='markets' | |
date_col = 'date' | |
# Sprint4 - if used_response_metrics is not blank, then select one of the used_response_metrics, else target is revenue by default | |
if "used_response_metrics" in st.session_state and st.session_state['used_response_metrics']!=[]: | |
sel_target_col = st.selectbox("Select the response metric", st.session_state['used_response_metrics']) | |
target_col = sel_target_col.lower().replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") | |
else : | |
sel_target_col = 'Total Approved Accounts - Revenue' | |
target_col = 'total_approved_accounts_revenue' | |
# Sprint4 - Look through all saved tuned models, only show saved models of the sel resp metric (target_col) | |
# saved_models = st.session_state['saved_model_names'] | |
# Sprint4 - get the model obj of the selected model | |
# st.write(sel_model_dict) | |
# Sprint3 - Contribution | |
if is_panel: | |
# read tuned mixedLM model | |
# if st.session_state["tuned_model"] is not None : | |
if st.session_state["is_tuned_model"][target_col]==True: #Sprint4 | |
with open("tuned_model.pkl", 'rb') as file: | |
model_dict = pickle.load(file) | |
saved_models = list(model_dict.keys()) | |
required_saved_models = [m.split("__")[0] for m in saved_models if m.split("__")[1] == target_col] | |
sel_model = st.selectbox("Select the model to review", required_saved_models) | |
sel_model_dict = model_dict[sel_model + "__" + target_col] | |
# model=st.session_state["tuned_model"] | |
# X_train=st.session_state["X_train_tuned"] | |
# X_test=st.session_state["X_test_tuned"] | |
# best_feature_set=st.session_state["tuned_model_features"] | |
model=sel_model_dict["Model_object"] | |
X_train=sel_model_dict["X_train_tuned"] | |
X_test=sel_model_dict["X_test_tuned"] | |
best_feature_set=sel_model_dict["feature_set"] | |
# st.write("features", best_feature_set) | |
# st.write(X_test.columns) | |
else : # if non tuned model to be used # Pending | |
with open("best_models.pkl", 'rb') as file: | |
model_dict = pickle.load(file) | |
saved_models = list(model_dict.keys()) | |
required_saved_models = [m.split("__")[0] for m in saved_models if m.split("__")[1] == target_col] | |
sel_model = st.selectbox("Select the model to review", required_saved_models) | |
sel_model_dict = model_dict[sel_model + "__" + target_col] | |
model=st.session_state["base_model"] | |
X_train = st.session_state['X_train'] | |
X_test = st.session_state['X_test'] | |
# y_train = st.session_state['y_train'] | |
# y_test = st.session_state['y_test'] | |
best_feature_set = st.session_state['base_model_feature_set'] | |
# st.write(best_feature_set) | |
# st.write(X_test.columns) | |
# Calculate contributions | |
with open("data_import.pkl", "rb") as f: | |
data = pickle.load(f) | |
# Accessing the loaded objects | |
st.session_state['orig_media_data'] = data["final_df"] | |
st.session_state['orig_media_data'].columns=[col.lower().replace('.','_').replace('@','_').replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for col in st.session_state['orig_media_data'].columns] | |
media_data = st.session_state["media_data"] | |
# st.session_state['orig_media_data']=st.session_state["media_data"] | |
#st.write(media_data) | |
contri_df = pd.DataFrame() | |
y = [] | |
y_pred = [] | |
random_eff_df = get_random_effects(media_data, panel_col, model) | |
random_eff_df['fixed_effect'] = model.fe_params['Intercept'] | |
random_eff_df['panel_effect'] = random_eff_df['random_effect'] + random_eff_df['fixed_effect'] | |
# random_eff_df.to_csv("Test/random_eff_df_contri.csv", index=False) | |
coef_df = pd.DataFrame(model.fe_params) | |
coef_df.columns = ['coef'] | |
# coef_df.reset_index().to_csv("Test/coef_df_contri1.csv",index=False) | |
# print(model.fe_params) | |
x_train_contribution = X_train.copy() | |
x_test_contribution = X_test.copy() | |
# preprocessing not needed since X_train is already preprocessed | |
# X1, X2 = process_train_and_test(x_train_contribution, x_test_contribution, best_feature_set, panel_col, target_col) | |
# x_train_contribution[best_feature_set] = X1[best_feature_set] | |
# x_test_contribution[best_feature_set] = X2[best_feature_set] | |
x_train_contribution = mdf_predict(x_train_contribution, model, random_eff_df) | |
x_test_contribution = mdf_predict(x_test_contribution, model, random_eff_df) | |
x_train_contribution = pd.merge(x_train_contribution, random_eff_df[[panel_col, 'panel_effect']], on=panel_col, | |
how='left') | |
x_test_contribution = pd.merge(x_test_contribution, random_eff_df[[panel_col, 'panel_effect']], on=panel_col, | |
how='left') | |
inp_coef = coef_df['coef'][1:].tolist() # 0th index is intercept | |
for i in range(len(inp_coef)): | |
x_train_contribution[str(best_feature_set[i]) + "_contr"] = inp_coef[i] * x_train_contribution[best_feature_set[i]] | |
x_test_contribution[str(best_feature_set[i]) + "_contr"] = inp_coef[i] * x_test_contribution[best_feature_set[i]] | |
x_train_contribution['sum_contributions'] = x_train_contribution.filter(regex="contr").sum(axis=1) | |
x_train_contribution['sum_contributions'] = x_train_contribution['sum_contributions'] + x_train_contribution['panel_effect'] | |
x_test_contribution['sum_contributions'] = x_test_contribution.filter(regex="contr").sum(axis=1) | |
x_test_contribution['sum_contributions'] = x_test_contribution['sum_contributions'] + x_test_contribution['panel_effect'] | |
# # test | |
x_train_contribution.to_csv("Test/x_train_contribution.csv",index=False) | |
x_test_contribution.to_csv("Test/x_test_contribution.csv",index=False) | |
# | |
# st.session_state['orig_media_data'].to_csv("Test/transformed_data.csv",index=False) | |
# st.session_state['X_test_spends'].to_csv("Test/test_spends.csv",index=False) | |
# # st.write(st.session_state['orig_media_data'].columns) | |
st.write(date_col,panel_col) | |
# st.write(x_test_contribution) | |
overview_test_data_prep_panel(x_test_contribution, st.session_state['orig_media_data'], st.session_state['X_test_spends'], | |
date_col, panel_col, target_col) | |
else : # NON PANEL | |
if st.session_state["is_tuned_model"][target_col]==True: #Sprint4 | |
with open("tuned_model.pkl", 'rb') as file: | |
model_dict = pickle.load(file) | |
saved_models = list(model_dict.keys()) | |
required_saved_models = [m.split("__")[0] for m in saved_models if m.split("__")[1] == target_col] | |
sel_model = st.selectbox("Select the model to review", required_saved_models) | |
sel_model_dict = model_dict[sel_model + "__" + target_col] | |
model=sel_model_dict["Model_object"] | |
X_train=sel_model_dict["X_train_tuned"] | |
X_test=sel_model_dict["X_test_tuned"] | |
best_feature_set=sel_model_dict["feature_set"] | |
else : #Sprint4 | |
with open("best_models.pkl", 'rb') as file: | |
model_dict = pickle.load(file) | |
saved_models = list(model_dict.keys()) | |
required_saved_models = [m.split("__")[0] for m in saved_models if m.split("__")[1] == target_col] | |
sel_model = st.selectbox("Select the model to review", required_saved_models) | |
sel_model_dict = model_dict[sel_model + "__" + target_col] | |
model=sel_model_dict["Model_object"] | |
X_train=sel_model_dict["X_train"] | |
X_test=sel_model_dict["X_test"] | |
best_feature_set=sel_model_dict["feature_set"] | |
x_train_contribution = X_train.copy() | |
x_test_contribution = X_test.copy() | |
x_train_contribution['pred'] = model.predict(x_train_contribution[best_feature_set]) | |
x_test_contribution['pred'] = model.predict(x_test_contribution[best_feature_set]) | |
for num,i in enumerate(model.params.values): | |
col=best_feature_set[num] | |
x_train_contribution[col + "_contr"] = X_train[col] * i | |
x_test_contribution[col + "_contr"] = X_test[col] * i | |
x_test_contribution.to_csv("Test/x_test_contribution_non_panel.csv",index=False) | |
overview_test_data_prep_nonpanel(x_test_contribution, st.session_state['orig_media_data'].copy(), st.session_state['X_test_spends'].copy(), date_col, target_col) | |
# for k, v in st.session_sta | |
# te.items(): | |
# if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'): | |
# st.session_state[k] = v | |
# authenticator = st.session_state.get('authenticator') | |
# if authenticator is None: | |
# authenticator = load_authenticator() | |
# name, authentication_status, username = authenticator.login('Login', 'main') | |
# auth_status = st.session_state['authentication_status'] | |
# if auth_status: | |
# authenticator.logout('Logout', 'main') | |
# is_state_initiaized = st.session_state.get('initialized',False) | |
# if not is_state_initiaized: | |
initialize_data(target_col) | |
scenario = st.session_state['scenario'] | |
raw_df = st.session_state['raw_df'] | |
st.header('Overview of previous spends') | |
# st.write(scenario.actual_total_spends) | |
# st.write(scenario.actual_total_sales) | |
columns = st.columns((1,1,3)) | |
with columns[0]: | |
st.metric(label='Spends', value=format_numbers(float(scenario.actual_total_spends))) | |
###print(f"##################### {scenario.actual_total_sales} ##################") | |
with columns[1]: | |
st.metric(label=target, value=format_numbers(float(scenario.actual_total_sales),include_indicator=False)) | |
actual_summary_df = create_channel_summary(scenario) | |
actual_summary_df['Channel'] = actual_summary_df['Channel'].apply(channel_name_formating) | |
columns = st.columns((2,1)) | |
with columns[0]: | |
with st.expander('Channel wise overview'): | |
st.markdown(actual_summary_df.style.set_table_styles( | |
[{ | |
'selector': 'th', | |
'props': [('background-color', '#11B6BD')] | |
}, | |
{ | |
'selector' : 'tr:nth-child(even)', | |
'props' : [('background-color', '#11B6BD')] | |
}]).to_html(), unsafe_allow_html=True) | |
st.markdown("<hr>",unsafe_allow_html=True) | |
############################## | |
st.plotly_chart(create_contribution_pie(scenario),use_container_width=True) | |
st.markdown("<hr>",unsafe_allow_html=True) | |
################################3 | |
st.plotly_chart(create_contribuion_stacked_plot(scenario),use_container_width=True) | |
st.markdown("<hr>",unsafe_allow_html=True) | |
####################################### | |
selected_channel_name = st.selectbox('Channel', st.session_state['channels_list'] + ['non media'], format_func=channel_name_formating) | |
selected_channel = scenario.channels.get(selected_channel_name,None) | |
st.plotly_chart(create_channel_spends_sales_plot(selected_channel), use_container_width=True) | |
st.markdown("<hr>",unsafe_allow_html=True) | |
# elif auth_status == False: | |
# st.error('Username/Password is incorrect') | |
# if auth_status != True: | |
# try: | |
# username_forgot_pw, email_forgot_password, random_password = authenticator.forgot_password('Forgot password') | |
# if username_forgot_pw: | |
# st.success('New password sent securely') | |
# # Random password to be transferred to user securely | |
# elif username_forgot_pw == False: | |
# st.error('Username not found') | |
# except Exception as e: | |
# st.error(e) | |