Spaces:
Sleeping
Sleeping
| """ | |
| MMO Build Sprint 3 | |
| additions : contributions calculated using tuned Mixed LM model | |
| pending : contributions calculations using - 1. not tuned Mixed LM model, 2. tuned OLS model, 3. not tuned OLS model | |
| MMO Build Sprint 4 | |
| additions : response metrics selection | |
| pending : contributions calculations using - 1. not tuned Mixed LM model, 2. tuned OLS model, 3. not tuned OLS model | |
| """ | |
| import streamlit as st | |
| import pandas as pd | |
| from sklearn.preprocessing import MinMaxScaler | |
| import pickle | |
| import os | |
| from utilities_with_panel import load_local_css, set_header | |
| import yaml | |
| from yaml import SafeLoader | |
| import streamlit_authenticator as stauth | |
| import sqlite3 | |
| from utilities import update_db | |
| st.set_page_config(layout="wide") | |
| load_local_css("styles.css") | |
| set_header() | |
| for k, v in st.session_state.items(): | |
| # print(k, v) | |
| if k not in [ | |
| "logout", | |
| "login", | |
| "config", | |
| "build_tuned_model", | |
| ] and not k.startswith("FormSubmitter"): | |
| st.session_state[k] = v | |
| with open("config.yaml") as file: | |
| config = yaml.load(file, Loader=SafeLoader) | |
| st.session_state["config"] = config | |
| authenticator = stauth.Authenticate( | |
| config["credentials"], | |
| config["cookie"]["name"], | |
| config["cookie"]["key"], | |
| config["cookie"]["expiry_days"], | |
| config["preauthorized"], | |
| ) | |
| st.session_state["authenticator"] = authenticator | |
| name, authentication_status, username = authenticator.login("Login", "main") | |
| auth_status = st.session_state.get("authentication_status") | |
| if auth_status == True: | |
| authenticator.logout("Logout", "main") | |
| is_state_initiaized = st.session_state.get("initialized", False) | |
| if "project_dct" not in st.session_state: | |
| st.error("Please load a project from Home page") | |
| st.stop() | |
| conn = sqlite3.connect( | |
| r"DB/User.db", check_same_thread=False | |
| ) # connection with sql db | |
| c = conn.cursor() | |
| if not os.path.exists( | |
| os.path.join(st.session_state["project_path"], "tuned_model.pkl") | |
| ): | |
| st.error("Please save a tuned model") | |
| st.stop() | |
| if ( | |
| "session_state_saved" | |
| in st.session_state["project_dct"]["model_tuning"].keys() | |
| and st.session_state["project_dct"]["model_tuning"][ | |
| "session_state_saved" | |
| ] | |
| != [] | |
| ): | |
| for key in [ | |
| "used_response_metrics", | |
| "is_tuned_model", | |
| "media_data", | |
| "X_test_spends", | |
| ]: | |
| st.session_state[key] = st.session_state["project_dct"][ | |
| "model_tuning" | |
| ]["session_state_saved"][key] | |
| elif ( | |
| "session_state_saved" | |
| in st.session_state["project_dct"]["model_build"].keys() | |
| and st.session_state["project_dct"]["model_build"][ | |
| "session_state_saved" | |
| ] | |
| != [] | |
| ): | |
| for key in [ | |
| "used_response_metrics", | |
| "date", | |
| "saved_model_names", | |
| "media_data", | |
| "X_test_spends", | |
| ]: | |
| st.session_state[key] = st.session_state["project_dct"][ | |
| "model_build" | |
| ]["session_state_saved"][key] | |
| else: | |
| st.error("Please tune a model first") | |
| st.session_state["bin_dict"] = st.session_state["project_dct"][ | |
| "model_build" | |
| ]["session_state_saved"]["bin_dict"] | |
| st.session_state["media_data"].columns = [ | |
| c.lower() for c in st.session_state["media_data"].columns | |
| ] | |
| from utilities_with_panel import ( | |
| overview_test_data_prep_panel, | |
| overview_test_data_prep_nonpanel, | |
| initialize_data, | |
| create_channel_summary, | |
| create_contribution_pie, | |
| create_contribuion_stacked_plot, | |
| create_channel_spends_sales_plot, | |
| format_numbers, | |
| channel_name_formating, | |
| ) | |
| import plotly.graph_objects as go | |
| import streamlit_authenticator as stauth | |
| import yaml | |
| from yaml import SafeLoader | |
| import time | |
| def get_random_effects(media_data, panel_col, mdf): | |
| random_eff_df = pd.DataFrame(columns=[panel_col, "random_effect"]) | |
| for i, market in enumerate(media_data[panel_col].unique()): | |
| print(i, end="\r") | |
| intercept = mdf.random_effects[market].values[0] | |
| random_eff_df.loc[i, "random_effect"] = intercept | |
| random_eff_df.loc[i, panel_col] = market | |
| return random_eff_df | |
| def process_train_and_test(train, test, features, panel_col, target_col): | |
| X1 = train[features] | |
| ss = MinMaxScaler() | |
| X1 = pd.DataFrame(ss.fit_transform(X1), columns=X1.columns) | |
| X1[panel_col] = train[panel_col] | |
| X1[target_col] = train[target_col] | |
| if test is not None: | |
| X2 = test[features] | |
| X2 = pd.DataFrame(ss.transform(X2), columns=X2.columns) | |
| X2[panel_col] = test[panel_col] | |
| X2[target_col] = test[target_col] | |
| return X1, X2 | |
| return X1 | |
| def mdf_predict(X_df, mdf, random_eff_df): | |
| X = X_df.copy() | |
| X = pd.merge( | |
| X, | |
| random_eff_df[[panel_col, "random_effect"]], | |
| on=panel_col, | |
| how="left", | |
| ) | |
| X["pred_fixed_effect"] = mdf.predict(X) | |
| X["pred"] = X["pred_fixed_effect"] + X["random_effect"] | |
| X.to_csv("Test/merged_df_contri.csv", index=False) | |
| X.drop(columns=["pred_fixed_effect", "random_effect"], inplace=True) | |
| return X | |
| # target='Revenue' | |
| # is_panel=False | |
| # is_panel = st.session_state['is_panel'] | |
| panel_col = [ | |
| col.lower() | |
| .replace(".", "_") | |
| .replace("@", "_") | |
| .replace(" ", "_") | |
| .replace("-", "") | |
| .replace(":", "") | |
| .replace("__", "_") | |
| for col in st.session_state["bin_dict"]["Panel Level 1"] | |
| ][ | |
| 0 | |
| ] # set the panel column | |
| is_panel = True if len(panel_col) > 0 else False | |
| date_col = "date" | |
| # Sprint4 - if used_response_metrics is not blank, then select one of the used_response_metrics, else target is revenue by default | |
| if ( | |
| "used_response_metrics" in st.session_state | |
| and st.session_state["used_response_metrics"] != [] | |
| ): | |
| sel_target_col = st.selectbox( | |
| "Select the response metric", | |
| st.session_state["used_response_metrics"], | |
| ) | |
| target_col = ( | |
| sel_target_col.lower() | |
| .replace(" ", "_") | |
| .replace("-", "") | |
| .replace(":", "") | |
| .replace("__", "_") | |
| ) | |
| else: | |
| sel_target_col = "Total Approved Accounts - Revenue" | |
| target_col = "total_approved_accounts_revenue" | |
| target = sel_target_col | |
| # Sprint4 - Look through all saved tuned models, only show saved models of the sel resp metric (target_col) | |
| # saved_models = st.session_state['saved_model_names'] | |
| # Sprint4 - get the model obj of the selected model | |
| # st.write(sel_model_dict) | |
| # Sprint3 - Contribution | |
| if is_panel: | |
| # read tuned mixedLM model | |
| # if st.session_state["tuned_model"] is not None : | |
| if st.session_state["is_tuned_model"][target_col] == True: # Sprint4 | |
| with open( | |
| os.path.join( | |
| st.session_state["project_path"], "tuned_model.pkl" | |
| ), | |
| "rb", | |
| ) as file: | |
| model_dict = pickle.load(file) | |
| saved_models = list(model_dict.keys()) | |
| # st.write(saved_models) | |
| required_saved_models = [ | |
| m.split("__")[0] | |
| for m in saved_models | |
| if m.split("__")[1] == target_col | |
| ] | |
| sel_model = st.selectbox( | |
| "Select the model to review", required_saved_models | |
| ) | |
| sel_model_dict = model_dict[sel_model + "__" + target_col] | |
| model = sel_model_dict["Model_object"] | |
| X_train = sel_model_dict["X_train_tuned"] | |
| X_test = sel_model_dict["X_test_tuned"] | |
| best_feature_set = sel_model_dict["feature_set"] | |
| else: # if non tuned model to be used # Pending | |
| with open( | |
| os.path.join( | |
| st.session_state["project_path"], "best_models.pkl" | |
| ), | |
| "rb", | |
| ) as file: | |
| model_dict = pickle.load(file) | |
| # st.write(model_dict) | |
| saved_models = list(model_dict.keys()) | |
| required_saved_models = [ | |
| m.split("__")[0] | |
| for m in saved_models | |
| if m.split("__")[1] == target_col | |
| ] | |
| sel_model = st.selectbox( | |
| "Select the model to review", required_saved_models | |
| ) | |
| sel_model_dict = model_dict[sel_model + "__" + target_col] | |
| # st.write(sel_model, sel_model_dict) | |
| model = sel_model_dict["Model_object"] | |
| X_train = sel_model_dict["X_train"] | |
| X_test = sel_model_dict["X_test"] | |
| best_feature_set = sel_model_dict["feature_set"] | |
| # Calculate contributions | |
| with open( | |
| os.path.join(st.session_state["project_path"], "data_import.pkl"), | |
| "rb", | |
| ) as f: | |
| data = pickle.load(f) | |
| # Accessing the loaded objects | |
| st.session_state["orig_media_data"] = data["final_df"] | |
| st.session_state["orig_media_data"].columns = [ | |
| col.lower() | |
| .replace(".", "_") | |
| .replace("@", "_") | |
| .replace(" ", "_") | |
| .replace("-", "") | |
| .replace(":", "") | |
| .replace("__", "_") | |
| for col in st.session_state["orig_media_data"].columns | |
| ] | |
| media_data = st.session_state["media_data"] | |
| # st.session_state['orig_media_data']=st.session_state["media_data"] | |
| # st.write(media_data) | |
| contri_df = pd.DataFrame() | |
| y = [] | |
| y_pred = [] | |
| random_eff_df = get_random_effects(media_data, panel_col, model) | |
| random_eff_df["fixed_effect"] = model.fe_params["Intercept"] | |
| random_eff_df["panel_effect"] = ( | |
| random_eff_df["random_effect"] + random_eff_df["fixed_effect"] | |
| ) | |
| # random_eff_df.to_csv("Test/random_eff_df_contri.csv", index=False) | |
| coef_df = pd.DataFrame(model.fe_params) | |
| coef_df.reset_index(inplace=True) | |
| coef_df.columns = ["feature", "coef"] | |
| # coef_df.reset_index().to_csv("Test/coef_df_contri1.csv",index=False) | |
| # print(model.fe_params) | |
| x_train_contribution = X_train.copy() | |
| x_test_contribution = X_test.copy() | |
| # preprocessing not needed since X_train is already preprocessed | |
| # X1, X2 = process_train_and_test(x_train_contribution, x_test_contribution, best_feature_set, panel_col, target_col) | |
| # x_train_contribution[best_feature_set] = X1[best_feature_set] | |
| # x_test_contribution[best_feature_set] = X2[best_feature_set] | |
| x_train_contribution = mdf_predict( | |
| x_train_contribution, model, random_eff_df | |
| ) | |
| x_test_contribution = mdf_predict( | |
| x_test_contribution, model, random_eff_df | |
| ) | |
| x_train_contribution = pd.merge( | |
| x_train_contribution, | |
| random_eff_df[[panel_col, "panel_effect"]], | |
| on=panel_col, | |
| how="left", | |
| ) | |
| x_test_contribution = pd.merge( | |
| x_test_contribution, | |
| random_eff_df[[panel_col, "panel_effect"]], | |
| on=panel_col, | |
| how="left", | |
| ) | |
| for i in range(len(coef_df))[1:]: | |
| coef = coef_df.loc[i, "coef"] | |
| col = coef_df.loc[i, "feature"] | |
| x_train_contribution[str(col) + "_contr"] = ( | |
| coef * x_train_contribution[col] | |
| ) | |
| x_test_contribution[str(col) + "_contr"] = ( | |
| coef * x_train_contribution[col] | |
| ) | |
| x_train_contribution["sum_contributions"] = ( | |
| x_train_contribution.filter(regex="contr").sum(axis=1) | |
| ) | |
| x_train_contribution["sum_contributions"] = ( | |
| x_train_contribution["sum_contributions"] | |
| + x_train_contribution["panel_effect"] | |
| ) | |
| x_test_contribution["sum_contributions"] = x_test_contribution.filter( | |
| regex="contr" | |
| ).sum(axis=1) | |
| x_test_contribution["sum_contributions"] = ( | |
| x_test_contribution["sum_contributions"] | |
| + x_test_contribution["panel_effect"] | |
| ) | |
| # # test | |
| x_train_contribution.to_csv( | |
| "Test/x_train_contribution.csv", index=False | |
| ) | |
| x_test_contribution.to_csv("Test/x_test_contribution.csv", index=False) | |
| # | |
| # st.session_state['orig_media_data'].to_csv("Test/transformed_data.csv",index=False) | |
| # st.session_state['X_test_spends'].to_csv("Test/test_spends.csv",index=False) | |
| # # st.write(st.session_state['orig_media_data'].columns) | |
| # st.write(date_col,panel_col) | |
| # st.write(x_test_contribution) | |
| overview_test_data_prep_panel( | |
| x_test_contribution, | |
| st.session_state["orig_media_data"], | |
| st.session_state["X_test_spends"], | |
| date_col, | |
| panel_col, | |
| target_col, | |
| ) | |
| else: # NON PANEL | |
| if st.session_state["is_tuned_model"][target_col] == True: # Sprint4 | |
| with open( | |
| os.path.join( | |
| st.session_state["project_path"], "tuned_model.pkl" | |
| ), | |
| "rb", | |
| ) as file: | |
| model_dict = pickle.load(file) | |
| saved_models = list(model_dict.keys()) | |
| required_saved_models = [ | |
| m.split("__")[0] | |
| for m in saved_models | |
| if m.split("__")[1] == target_col | |
| ] | |
| sel_model = st.selectbox( | |
| "Select the model to review", required_saved_models | |
| ) | |
| sel_model_dict = model_dict[sel_model + "__" + target_col] | |
| model = sel_model_dict["Model_object"] | |
| X_train = sel_model_dict["X_train_tuned"] | |
| X_test = sel_model_dict["X_test_tuned"] | |
| best_feature_set = sel_model_dict["feature_set"] | |
| else: # Sprint4 | |
| with open( | |
| os.path.join( | |
| st.session_state["project_path"], "best_models.pkl" | |
| ), | |
| "rb", | |
| ) as file: | |
| model_dict = pickle.load(file) | |
| saved_models = list(model_dict.keys()) | |
| required_saved_models = [ | |
| m.split("__")[0] | |
| for m in saved_models | |
| if m.split("__")[1] == target_col | |
| ] | |
| sel_model = st.selectbox( | |
| "Select the model to review", required_saved_models | |
| ) | |
| sel_model_dict = model_dict[sel_model + "__" + target_col] | |
| model = sel_model_dict["Model_object"] | |
| X_train = sel_model_dict["X_train"] | |
| X_test = sel_model_dict["X_test"] | |
| best_feature_set = sel_model_dict["feature_set"] | |
| x_train_contribution = X_train.copy() | |
| x_test_contribution = X_test.copy() | |
| x_train_contribution["pred"] = model.predict( | |
| x_train_contribution[best_feature_set] | |
| ) | |
| x_test_contribution["pred"] = model.predict( | |
| x_test_contribution[best_feature_set] | |
| ) | |
| for num, i in enumerate(model.params.values): | |
| col = best_feature_set[num] | |
| x_train_contribution[col + "_contr"] = X_train[col] * i | |
| x_test_contribution[col + "_contr"] = X_test[col] * i | |
| x_test_contribution.to_csv( | |
| "Test/x_test_contribution_non_panel.csv", index=False | |
| ) | |
| overview_test_data_prep_nonpanel( | |
| x_test_contribution, | |
| st.session_state["orig_media_data"].copy(), | |
| st.session_state["X_test_spends"].copy(), | |
| date_col, | |
| target_col, | |
| ) | |
| # for k, v in st.session_sta | |
| # te.items(): | |
| # if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'): | |
| # st.session_state[k] = v | |
| # authenticator = st.session_state.get('authenticator') | |
| # if authenticator is None: | |
| # authenticator = load_authenticator() | |
| # name, authentication_status, username = authenticator.login('Login', 'main') | |
| # auth_status = st.session_state['authentication_status'] | |
| # if auth_status: | |
| # authenticator.logout('Logout', 'main') | |
| # is_state_initiaized = st.session_state.get('initialized',False) | |
| # if not is_state_initiaized: | |
| initialize_data(target_col) | |
| scenario = st.session_state["scenario"] | |
| raw_df = st.session_state["raw_df"] | |
| st.header("Overview of previous spends") | |
| # st.write(scenario.actual_total_spends) | |
| # st.write(scenario.actual_total_sales) | |
| columns = st.columns((1, 1, 3)) | |
| with columns[0]: | |
| st.metric( | |
| label="Spends", | |
| value=format_numbers(float(scenario.actual_total_spends)), | |
| ) | |
| ###print(f"##################### {scenario.actual_total_sales} ##################") | |
| with columns[1]: | |
| st.metric( | |
| label=target, | |
| value=format_numbers( | |
| float(scenario.actual_total_sales), include_indicator=False | |
| ), | |
| ) | |
| actual_summary_df = create_channel_summary(scenario) | |
| actual_summary_df["Channel"] = actual_summary_df["Channel"].apply( | |
| channel_name_formating | |
| ) | |
| columns = st.columns((2, 1)) | |
| with columns[0]: | |
| with st.expander("Channel wise overview"): | |
| st.markdown( | |
| actual_summary_df.style.set_table_styles( | |
| [ | |
| { | |
| "selector": "th", | |
| "props": [("background-color", "#11B6BD")], | |
| }, | |
| { | |
| "selector": "tr:nth-child(even)", | |
| "props": [("background-color", "#11B6BD")], | |
| }, | |
| ] | |
| ).to_html(), | |
| unsafe_allow_html=True, | |
| ) | |
| st.markdown("<hr>", unsafe_allow_html=True) | |
| ############################## | |
| st.plotly_chart( | |
| create_contribution_pie(scenario), use_container_width=True | |
| ) | |
| st.markdown("<hr>", unsafe_allow_html=True) | |
| ################################3 | |
| st.plotly_chart( | |
| create_contribuion_stacked_plot(scenario), use_container_width=True | |
| ) | |
| st.markdown("<hr>", unsafe_allow_html=True) | |
| ####################################### | |
| selected_channel_name = st.selectbox( | |
| "Channel", | |
| st.session_state["channels_list"] + ["non media"], | |
| format_func=channel_name_formating, | |
| ) | |
| selected_channel = scenario.channels.get(selected_channel_name, None) | |
| st.plotly_chart( | |
| create_channel_spends_sales_plot(selected_channel), | |
| use_container_width=True, | |
| ) | |
| st.markdown("<hr>", unsafe_allow_html=True) | |
| if st.checkbox("Save this session", key="save"): | |
| project_dct_path = os.path.join( | |
| st.session_state["session_path"], "project_dct.pkl" | |
| ) | |
| with open(project_dct_path, "wb") as f: | |
| pickle.dump(st.session_state["project_dct"], f) | |
| update_db("7_Current_Media_Performance.py") | |