Spaces:
Sleeping
Sleeping
| """ | |
| MMO Build Sprint 3 | |
| date : | |
| changes : capability to tune MixedLM as well as simple LR in the same page | |
| """ | |
| import os | |
| import streamlit as st | |
| import pandas as pd | |
| from Eda_functions import format_numbers | |
| import pickle | |
| from utilities import set_header, load_local_css | |
| import statsmodels.api as sm | |
| import re | |
| from sklearn.preprocessing import MinMaxScaler | |
| import matplotlib.pyplot as plt | |
| from statsmodels.stats.outliers_influence import variance_inflation_factor | |
| import yaml | |
| from yaml import SafeLoader | |
| import streamlit_authenticator as stauth | |
| st.set_option("deprecation.showPyplotGlobalUse", False) | |
| import statsmodels.formula.api as smf | |
| from Data_prep_functions import * | |
| import sqlite3 | |
| from utilities import update_db | |
| # for i in ["model_tuned", "X_train_tuned", "X_test_tuned", "tuned_model_features", "tuned_model", "tuned_model_dict"] : | |
| st.set_page_config( | |
| page_title="Model Tuning", | |
| page_icon=":shark:", | |
| layout="wide", | |
| initial_sidebar_state="collapsed", | |
| ) | |
| load_local_css("styles.css") | |
| set_header() | |
| # Check for authentication status | |
| for k, v in st.session_state.items(): | |
| # print(k, v) | |
| if k not in [ | |
| "logout", | |
| "login", | |
| "config", | |
| "build_tuned_model", | |
| ] and not k.startswith("FormSubmitter"): | |
| st.session_state[k] = v | |
| with open("config.yaml") as file: | |
| config = yaml.load(file, Loader=SafeLoader) | |
| st.session_state["config"] = config | |
| authenticator = stauth.Authenticate( | |
| config["credentials"], | |
| config["cookie"]["name"], | |
| config["cookie"]["key"], | |
| config["cookie"]["expiry_days"], | |
| config["preauthorized"], | |
| ) | |
| st.session_state["authenticator"] = authenticator | |
| name, authentication_status, username = authenticator.login("Login", "main") | |
| auth_status = st.session_state.get("authentication_status") | |
| if auth_status == True: | |
| authenticator.logout("Logout", "main") | |
| is_state_initiaized = st.session_state.get("initialized", False) | |
| if "project_dct" not in st.session_state: | |
| st.error("Please load a project from Home page") | |
| st.stop() | |
| if not os.path.exists( | |
| os.path.join(st.session_state["project_path"], "best_models.pkl") | |
| ): | |
| st.error("Please save a model before tuning") | |
| st.stop() | |
| conn = sqlite3.connect( | |
| r"DB/User.db", check_same_thread=False | |
| ) # connection with sql db | |
| c = conn.cursor() | |
| if not is_state_initiaized: | |
| if "session_name" not in st.session_state: | |
| st.session_state["session_name"] = None | |
| if ( | |
| "session_state_saved" | |
| in st.session_state["project_dct"]["model_build"].keys() | |
| ): | |
| for key in [ | |
| "Model", | |
| "date", | |
| "saved_model_names", | |
| "media_data", | |
| "X_test_spends", | |
| ]: | |
| if key not in st.session_state: | |
| st.session_state[key] = st.session_state["project_dct"][ | |
| "model_build" | |
| ]["session_state_saved"][key] | |
| st.session_state["bin_dict"] = st.session_state["project_dct"][ | |
| "model_build" | |
| ]["session_state_saved"]["bin_dict"] | |
| if ( | |
| "used_response_metrics" not in st.session_state | |
| or st.session_state["used_response_metrics"] == [] | |
| ): | |
| st.session_state["used_response_metrics"] = st.session_state[ | |
| "project_dct" | |
| ]["model_build"]["session_state_saved"][ | |
| "used_response_metrics" | |
| ] | |
| else: | |
| st.error("Please load a session with a built model") | |
| st.stop() | |
| # if 'sel_model' not in st.session_state["project_dct"]["model_tuning"].keys(): | |
| # st.session_state["project_dct"]["model_tuning"]['sel_model']= {} | |
| for key in ["select_all_flags_check", "selected_flags", "sel_model"]: | |
| if key not in st.session_state["project_dct"]["model_tuning"].keys(): | |
| st.session_state["project_dct"]["model_tuning"][key] = {} | |
| # Sprint3 | |
| # is_panel = st.session_state['is_panel'] | |
| # panel_col = 'markets' # set the panel column | |
| date_col = "date" | |
| panel_col = [ | |
| col.lower() | |
| .replace(".", "_") | |
| .replace("@", "_") | |
| .replace(" ", "_") | |
| .replace("-", "") | |
| .replace(":", "") | |
| .replace("__", "_") | |
| for col in st.session_state["bin_dict"]["Panel Level 1"] | |
| ][ | |
| 0 | |
| ] # set the panel column | |
| is_panel = True if len(panel_col) > 0 else False | |
| # flag indicating there is not tuned model till now | |
| # Sprint4 - model tuned dict | |
| if "Model_Tuned" not in st.session_state: | |
| st.session_state["Model_Tuned"] = {} | |
| st.title("1. Model Tuning") | |
| if "is_tuned_model" not in st.session_state: | |
| st.session_state["is_tuned_model"] = {} | |
| # Sprint4 - if used_response_metrics is not blank, then select one of the used_response_metrics, else target is revenue by default | |
| if ( | |
| "used_response_metrics" in st.session_state | |
| and st.session_state["used_response_metrics"] != [] | |
| ): | |
| default_target_idx = ( | |
| st.session_state["project_dct"]["model_tuning"].get( | |
| "sel_target_col", None | |
| ) | |
| if st.session_state["project_dct"]["model_tuning"].get( | |
| "sel_target_col", None | |
| ) | |
| is not None | |
| else st.session_state["used_response_metrics"][0] | |
| ) | |
| sel_target_col = st.selectbox( | |
| "Select the response metric", | |
| st.session_state["used_response_metrics"], | |
| index=st.session_state["used_response_metrics"].index( | |
| default_target_idx | |
| ), | |
| ) | |
| target_col = ( | |
| sel_target_col.lower() | |
| .replace(" ", "_") | |
| .replace("-", "") | |
| .replace(":", "") | |
| .replace("__", "_") | |
| ) | |
| st.session_state["project_dct"]["model_tuning"][ | |
| "sel_target_col" | |
| ] = sel_target_col | |
| else: | |
| sel_target_col = "Total Approved Accounts - Revenue" | |
| target_col = "total_approved_accounts_revenue" | |
| # Sprint4 - Look through all saved models, only show saved models of the sel resp metric (target_col) | |
| # saved_models = st.session_state['saved_model_names'] | |
| with open( | |
| os.path.join(st.session_state["project_path"], "best_models.pkl"), "rb" | |
| ) as file: | |
| model_dict = pickle.load(file) | |
| saved_models = model_dict.keys() | |
| required_saved_models = [ | |
| m.split("__")[0] | |
| for m in saved_models | |
| if m.split("__")[1] == target_col | |
| ] | |
| if len(required_saved_models) > 0: | |
| default_model_idx = st.session_state["project_dct"]["model_tuning"][ | |
| "sel_model" | |
| ].get(sel_target_col, required_saved_models[0]) | |
| sel_model = st.selectbox( | |
| "Select the model to tune", | |
| required_saved_models, | |
| index=required_saved_models.index(default_model_idx), | |
| ) | |
| else: | |
| default_model_idx = st.session_state["project_dct"]["model_tuning"][ | |
| "sel_model" | |
| ].get(sel_target_col, 0) | |
| sel_model = st.selectbox( | |
| "Select the model to tune", required_saved_models | |
| ) | |
| st.session_state["project_dct"]["model_tuning"]["sel_model"][ | |
| sel_target_col | |
| ] = default_model_idx | |
| sel_model_dict = model_dict[ | |
| sel_model + "__" + target_col | |
| ] # Sprint4 - get the model obj of the selected model | |
| X_train = sel_model_dict["X_train"] | |
| X_test = sel_model_dict["X_test"] | |
| y_train = sel_model_dict["y_train"] | |
| y_test = sel_model_dict["y_test"] | |
| df = st.session_state["media_data"] | |
| if "selected_model" not in st.session_state: | |
| st.session_state["selected_model"] = 0 | |
| st.markdown("### 1.1 Event Flags") | |
| st.markdown( | |
| "Helps in quantifying the impact of specific occurrences of events" | |
| ) | |
| flag_expander_default = ( | |
| st.session_state["project_dct"]["model_tuning"].get( | |
| "flag_expander", None | |
| ) | |
| if st.session_state["project_dct"]["model_tuning"].get( | |
| "flag_expander", None | |
| ) | |
| is not None | |
| else False | |
| ) | |
| with st.expander("Apply Event Flags", flag_expander_default): | |
| st.session_state["project_dct"]["model_tuning"]["flag_expander"] = True | |
| model = sel_model_dict["Model_object"] | |
| date = st.session_state["date"] | |
| date = pd.to_datetime(date) | |
| X_train = sel_model_dict["X_train"] | |
| # features_set= model_dict[st.session_state["selected_model"]]['feature_set'] | |
| features_set = sel_model_dict["feature_set"] | |
| col = st.columns(3) | |
| min_date = min(date) | |
| max_date = max(date) | |
| start_date_default = ( | |
| st.session_state["project_dct"]["model_tuning"].get( | |
| "start_date_default" | |
| ) | |
| if st.session_state["project_dct"]["model_tuning"].get( | |
| "start_date_default" | |
| ) | |
| is not None | |
| else min_date | |
| ) | |
| end_date_default = ( | |
| st.session_state["project_dct"]["model_tuning"].get( | |
| "end_date_default" | |
| ) | |
| if st.session_state["project_dct"]["model_tuning"].get( | |
| "end_date_default" | |
| ) | |
| is not None | |
| else max_date | |
| ) | |
| with col[0]: | |
| start_date = st.date_input( | |
| "Select Start Date", | |
| start_date_default, | |
| min_value=min_date, | |
| max_value=max_date, | |
| ) | |
| with col[1]: | |
| end_date_default = ( | |
| end_date_default | |
| if end_date_default >= start_date | |
| else start_date | |
| ) | |
| end_date = st.date_input( | |
| "Select End Date", | |
| end_date_default, | |
| min_value=max(min_date, start_date), | |
| max_value=max_date, | |
| ) | |
| with col[2]: | |
| repeat_default = ( | |
| st.session_state["project_dct"]["model_tuning"].get( | |
| "repeat_default" | |
| ) | |
| if st.session_state["project_dct"]["model_tuning"].get( | |
| "repeat_default" | |
| ) | |
| is not None | |
| else "No" | |
| ) | |
| repeat_default_idx = 0 if repeat_default.lower() == "yes" else 1 | |
| repeat = st.selectbox( | |
| "Repeat Annually", ["Yes", "No"], index=repeat_default_idx | |
| ) | |
| st.session_state["project_dct"]["model_tuning"][ | |
| "start_date_default" | |
| ] = start_date | |
| st.session_state["project_dct"]["model_tuning"][ | |
| "end_date_default" | |
| ] = end_date | |
| st.session_state["project_dct"]["model_tuning"][ | |
| "repeat_default" | |
| ] = repeat | |
| if repeat == "Yes": | |
| repeat = True | |
| else: | |
| repeat = False | |
| if "Flags" not in st.session_state: | |
| st.session_state["Flags"] = {} | |
| if "flags" in st.session_state["project_dct"]["model_tuning"].keys(): | |
| st.session_state["Flags"] = st.session_state["project_dct"][ | |
| "model_tuning" | |
| ]["flags"] | |
| # print("**"*50) | |
| # print(y_train) | |
| # print("**"*50) | |
| # print(model.fittedvalues) | |
| if is_panel: # Sprint3 | |
| met, line_values, fig_flag = plot_actual_vs_predicted( | |
| X_train[date_col], | |
| y_train, | |
| model.fittedvalues, | |
| model, | |
| target_column=sel_target_col, | |
| flag=(start_date, end_date), | |
| repeat_all_years=repeat, | |
| is_panel=True, | |
| ) | |
| st.plotly_chart(fig_flag, use_container_width=True) | |
| # create flag on test | |
| met, test_line_values, fig_flag = plot_actual_vs_predicted( | |
| X_test[date_col], | |
| y_test, | |
| sel_model_dict["pred_test"], | |
| model, | |
| target_column=sel_target_col, | |
| flag=(start_date, end_date), | |
| repeat_all_years=repeat, | |
| is_panel=True, | |
| ) | |
| else: | |
| pred_train = model.predict(X_train[features_set]) | |
| met, line_values, fig_flag = plot_actual_vs_predicted( | |
| X_train[date_col], | |
| y_train, | |
| pred_train, | |
| model, | |
| flag=(start_date, end_date), | |
| repeat_all_years=repeat, | |
| is_panel=False, | |
| ) | |
| st.plotly_chart(fig_flag, use_container_width=True) | |
| pred_test = model.predict(X_test[features_set]) | |
| met, test_line_values, fig_flag = plot_actual_vs_predicted( | |
| X_test[date_col], | |
| y_test, | |
| pred_test, | |
| model, | |
| flag=(start_date, end_date), | |
| repeat_all_years=repeat, | |
| is_panel=False, | |
| ) | |
| flag_name = "f1_flag" | |
| flag_name = st.text_input("Enter Flag Name") | |
| # Sprint4 - add selected target col to flag name | |
| if st.button("Update flag"): | |
| st.session_state["Flags"][flag_name + "__" + target_col] = {} | |
| st.session_state["Flags"][flag_name + "__" + target_col][ | |
| "train" | |
| ] = line_values | |
| st.session_state["Flags"][flag_name + "__" + target_col][ | |
| "test" | |
| ] = test_line_values | |
| st.success(f'{flag_name + "__" + target_col} stored') | |
| st.session_state["project_dct"]["model_tuning"]["flags"] = ( | |
| st.session_state["Flags"] | |
| ) | |
| # Sprint4 - only show flag created for the particular target col | |
| if st.session_state["Flags"] is None: | |
| st.session_state["Flags"] = {} | |
| target_model_flags = [ | |
| f.split("__")[0] | |
| for f in st.session_state["Flags"].keys() | |
| if f.split("__")[1] == target_col | |
| ] | |
| options = list(target_model_flags) | |
| selected_options = [] | |
| num_columns = 4 | |
| num_rows = -(-len(options) // num_columns) | |
| tick = False | |
| if st.checkbox( | |
| "Select all", | |
| value=st.session_state["project_dct"]["model_tuning"][ | |
| "select_all_flags_check" | |
| ].get(sel_target_col, False), | |
| ): | |
| tick = True | |
| st.session_state["project_dct"]["model_tuning"][ | |
| "select_all_flags_check" | |
| ][sel_target_col] = True | |
| else: | |
| st.session_state["project_dct"]["model_tuning"][ | |
| "select_all_flags_check" | |
| ][sel_target_col] = False | |
| selection_defualts = st.session_state["project_dct"]["model_tuning"][ | |
| "selected_flags" | |
| ].get(sel_target_col, []) | |
| selected_options = selection_defualts | |
| for row in range(num_rows): | |
| cols = st.columns(num_columns) | |
| for col in cols: | |
| if options: | |
| option = options.pop(0) | |
| option_default = ( | |
| True if option in selection_defualts else False | |
| ) | |
| selected = col.checkbox(option, value=(tick or option_default)) | |
| if selected: | |
| selected_options.append(option) | |
| st.session_state["project_dct"]["model_tuning"]["selected_flags"][ | |
| sel_target_col | |
| ] = selected_options | |
| st.markdown("### 1.2 Select Parameters to Apply") | |
| parameters = st.columns(3) | |
| with parameters[0]: | |
| Trend = st.checkbox( | |
| "**Trend**", | |
| value=st.session_state["project_dct"]["model_tuning"].get( | |
| "trend_check", False | |
| ), | |
| ) | |
| st.markdown( | |
| "Helps account for long-term trends or seasonality that could influence advertising effectiveness" | |
| ) | |
| with parameters[1]: | |
| week_number = st.checkbox( | |
| "**Week_number**", | |
| value=st.session_state["project_dct"]["model_tuning"].get( | |
| "week_num_check", False | |
| ), | |
| ) | |
| st.markdown( | |
| "Assists in detecting and incorporating weekly patterns or seasonality" | |
| ) | |
| with parameters[2]: | |
| sine_cosine = st.checkbox( | |
| "**Sine and Cosine Waves**", | |
| value=st.session_state["project_dct"]["model_tuning"].get( | |
| "sine_cosine_check", False | |
| ), | |
| ) | |
| st.markdown( | |
| "Helps in capturing cyclical patterns or seasonality in the data" | |
| ) | |
| # | |
| # def get_tuned_model(): | |
| # st.session_state['build_tuned_model']=True | |
| if st.button( | |
| "Build model with Selected Parameters and Flags", | |
| key="build_tuned_model", | |
| ): | |
| new_features = features_set | |
| st.header("2.1 Results Summary") | |
| # date=list(df.index) | |
| # df = df.reset_index(drop=True) | |
| # X_train=df[features_set] | |
| ss = MinMaxScaler() | |
| if is_panel == True: | |
| X_train_tuned = X_train[features_set] | |
| # X_train_tuned = pd.DataFrame(ss.fit_transform(X), columns=X.columns) | |
| X_train_tuned[target_col] = X_train[target_col] | |
| X_train_tuned[date_col] = X_train[date_col] | |
| X_train_tuned[panel_col] = X_train[panel_col] | |
| X_test_tuned = X_test[features_set] | |
| # X_test_tuned = pd.DataFrame(ss.transform(X), columns=X.columns) | |
| X_test_tuned[target_col] = X_test[target_col] | |
| X_test_tuned[date_col] = X_test[date_col] | |
| X_test_tuned[panel_col] = X_test[panel_col] | |
| else: | |
| X_train_tuned = X_train[features_set] | |
| # X_train_tuned = pd.DataFrame(ss.fit_transform(X_train_tuned), columns=X_train_tuned.columns) | |
| X_test_tuned = X_test[features_set] | |
| # X_test_tuned = pd.DataFrame(ss.transform(X_test_tuned), columns=X_test_tuned.columns) | |
| for flag in selected_options: | |
| # Spirnt4 - added target_col in flag name | |
| X_train_tuned[flag] = st.session_state["Flags"][ | |
| flag + "__" + target_col | |
| ]["train"] | |
| X_test_tuned[flag] = st.session_state["Flags"][ | |
| flag + "__" + target_col | |
| ]["test"] | |
| # test | |
| # X_train_tuned.to_csv("Test/X_train_tuned_flag.csv",index=False) | |
| # X_test_tuned.to_csv("Test/X_test_tuned_flag.csv",index=False) | |
| # print("()()"*20,flag, len(st.session_state['Flags'][flag])) | |
| if Trend: | |
| st.session_state["project_dct"]["model_tuning"][ | |
| "trend_check" | |
| ] = True | |
| # Sprint3 - group by panel, calculate trend of each panel spearately. Add trend to new feature set | |
| if is_panel: | |
| newdata = pd.DataFrame() | |
| panel_wise_end_point_train = {} | |
| for panel, groupdf in X_train_tuned.groupby(panel_col): | |
| groupdf.sort_values(date_col, inplace=True) | |
| groupdf["Trend"] = np.arange(1, len(groupdf) + 1, 1) | |
| newdata = pd.concat([newdata, groupdf]) | |
| panel_wise_end_point_train[panel] = len(groupdf) | |
| X_train_tuned = newdata.copy() | |
| test_newdata = pd.DataFrame() | |
| for panel, test_groupdf in X_test_tuned.groupby(panel_col): | |
| test_groupdf.sort_values(date_col, inplace=True) | |
| start = panel_wise_end_point_train[panel] + 1 | |
| end = start + len(test_groupdf) # should be + 1? - Sprint4 | |
| # print("??"*20, panel, len(test_groupdf), len(np.arange(start, end, 1)), start) | |
| test_groupdf["Trend"] = np.arange(start, end, 1) | |
| test_newdata = pd.concat([test_newdata, test_groupdf]) | |
| X_test_tuned = test_newdata.copy() | |
| new_features = new_features + ["Trend"] | |
| else: | |
| X_train_tuned["Trend"] = np.arange( | |
| 1, len(X_train_tuned) + 1, 1 | |
| ) | |
| X_test_tuned["Trend"] = np.arange( | |
| len(X_train_tuned) + 1, | |
| len(X_train_tuned) + len(X_test_tuned) + 1, | |
| 1, | |
| ) | |
| new_features = new_features + ["Trend"] | |
| else: | |
| st.session_state["project_dct"]["model_tuning"][ | |
| "trend_check" | |
| ] = False | |
| if week_number: | |
| st.session_state["project_dct"]["model_tuning"][ | |
| "week_num_check" | |
| ] = True | |
| # Sprint3 - create weeknumber from date column in xtrain tuned. add week num to new feature set | |
| if is_panel: | |
| X_train_tuned[date_col] = pd.to_datetime( | |
| X_train_tuned[date_col] | |
| ) | |
| X_train_tuned["Week_number"] = X_train_tuned[ | |
| date_col | |
| ].dt.day_of_week | |
| if X_train_tuned["Week_number"].nunique() == 1: | |
| st.write( | |
| "All dates in the data are of the same week day. Hence Week number can't be used." | |
| ) | |
| else: | |
| X_test_tuned[date_col] = pd.to_datetime( | |
| X_test_tuned[date_col] | |
| ) | |
| X_test_tuned["Week_number"] = X_test_tuned[ | |
| date_col | |
| ].dt.day_of_week | |
| new_features = new_features + ["Week_number"] | |
| else: | |
| date = pd.to_datetime(date.values) | |
| X_train_tuned["Week_number"] = pd.to_datetime( | |
| X_train[date_col] | |
| ).dt.day_of_week | |
| X_test_tuned["Week_number"] = pd.to_datetime( | |
| X_test[date_col] | |
| ).dt.day_of_week | |
| new_features = new_features + ["Week_number"] | |
| else: | |
| st.session_state["project_dct"]["model_tuning"][ | |
| "week_num_check" | |
| ] = False | |
| if sine_cosine: | |
| st.session_state["project_dct"]["model_tuning"][ | |
| "sine_cosine_check" | |
| ] = True | |
| # Sprint3 - create panel wise sine cosine waves in xtrain tuned. add to new feature set | |
| if is_panel: | |
| new_features = new_features + ["sine_wave", "cosine_wave"] | |
| newdata = pd.DataFrame() | |
| newdata_test = pd.DataFrame() | |
| groups = X_train_tuned.groupby(panel_col) | |
| frequency = 2 * np.pi / 365 # Adjust the frequency as needed | |
| train_panel_wise_end_point = {} | |
| for panel, groupdf in groups: | |
| num_samples = len(groupdf) | |
| train_panel_wise_end_point[panel] = num_samples | |
| days_since_start = np.arange(num_samples) | |
| sine_wave = np.sin(frequency * days_since_start) | |
| cosine_wave = np.cos(frequency * days_since_start) | |
| sine_cosine_df = pd.DataFrame( | |
| {"sine_wave": sine_wave, "cosine_wave": cosine_wave} | |
| ) | |
| assert len(sine_cosine_df) == len(groupdf) | |
| # groupdf = pd.concat([groupdf, sine_cosine_df], axis=1) | |
| groupdf["sine_wave"] = sine_wave | |
| groupdf["cosine_wave"] = cosine_wave | |
| newdata = pd.concat([newdata, groupdf]) | |
| X_train_tuned = newdata.copy() | |
| test_groups = X_test_tuned.groupby(panel_col) | |
| for panel, test_groupdf in test_groups: | |
| num_samples = len(test_groupdf) | |
| start = train_panel_wise_end_point[panel] | |
| days_since_start = np.arange(start, start + num_samples, 1) | |
| # print("##", panel, num_samples, start, len(np.arange(start, start+num_samples, 1))) | |
| sine_wave = np.sin(frequency * days_since_start) | |
| cosine_wave = np.cos(frequency * days_since_start) | |
| sine_cosine_df = pd.DataFrame( | |
| {"sine_wave": sine_wave, "cosine_wave": cosine_wave} | |
| ) | |
| assert len(sine_cosine_df) == len(test_groupdf) | |
| # groupdf = pd.concat([groupdf, sine_cosine_df], axis=1) | |
| test_groupdf["sine_wave"] = sine_wave | |
| test_groupdf["cosine_wave"] = cosine_wave | |
| newdata_test = pd.concat([newdata_test, test_groupdf]) | |
| X_test_tuned = newdata_test.copy() | |
| else: | |
| new_features = new_features + ["sine_wave", "cosine_wave"] | |
| num_samples = len(X_train_tuned) | |
| frequency = 2 * np.pi / 365 # Adjust the frequency as needed | |
| days_since_start = np.arange(num_samples) | |
| sine_wave = np.sin(frequency * days_since_start) | |
| cosine_wave = np.cos(frequency * days_since_start) | |
| sine_cosine_df = pd.DataFrame( | |
| {"sine_wave": sine_wave, "cosine_wave": cosine_wave} | |
| ) | |
| # Concatenate the sine and cosine waves with the scaled X DataFrame | |
| X_train_tuned = pd.concat( | |
| [X_train_tuned, sine_cosine_df], axis=1 | |
| ) | |
| test_num_samples = len(X_test_tuned) | |
| start = num_samples | |
| days_since_start = np.arange( | |
| start, start + test_num_samples, 1 | |
| ) | |
| sine_wave = np.sin(frequency * days_since_start) | |
| cosine_wave = np.cos(frequency * days_since_start) | |
| sine_cosine_df = pd.DataFrame( | |
| {"sine_wave": sine_wave, "cosine_wave": cosine_wave} | |
| ) | |
| # Concatenate the sine and cosine waves with the scaled X DataFrame | |
| X_test_tuned = pd.concat( | |
| [X_test_tuned, sine_cosine_df], axis=1 | |
| ) | |
| else: | |
| st.session_state["project_dct"]["model_tuning"][ | |
| "sine_cosine_check" | |
| ] = False | |
| # model | |
| if selected_options: | |
| new_features = new_features + selected_options | |
| if is_panel: | |
| inp_vars_str = " + ".join(new_features) | |
| new_features = list(set(new_features)) | |
| md_str = target_col + " ~ " + inp_vars_str | |
| md_tuned = smf.mixedlm( | |
| md_str, | |
| data=X_train_tuned[[target_col] + new_features], | |
| groups=X_train_tuned[panel_col], | |
| ) | |
| model_tuned = md_tuned.fit() | |
| # plot act v pred for original model and tuned model | |
| metrics_table, line, actual_vs_predicted_plot = ( | |
| plot_actual_vs_predicted( | |
| X_train[date_col], | |
| y_train, | |
| model.fittedvalues, | |
| model, | |
| target_column=sel_target_col, | |
| is_panel=True, | |
| ) | |
| ) | |
| metrics_table_tuned, line, actual_vs_predicted_plot_tuned = ( | |
| plot_actual_vs_predicted( | |
| X_train_tuned[date_col], | |
| X_train_tuned[target_col], | |
| model_tuned.fittedvalues, | |
| model_tuned, | |
| target_column=sel_target_col, | |
| is_panel=True, | |
| ) | |
| ) | |
| else: | |
| new_features = list(set(new_features)) | |
| model_tuned = sm.OLS(y_train, X_train_tuned[new_features]).fit() | |
| metrics_table, line, actual_vs_predicted_plot = ( | |
| plot_actual_vs_predicted( | |
| date[:130], | |
| y_train, | |
| model.predict(X_train[features_set]), | |
| model, | |
| target_column=sel_target_col, | |
| ) | |
| ) | |
| metrics_table_tuned, line, actual_vs_predicted_plot_tuned = ( | |
| plot_actual_vs_predicted( | |
| date[:130], | |
| y_train, | |
| model_tuned.predict(X_train_tuned), | |
| model_tuned, | |
| target_column=sel_target_col, | |
| ) | |
| ) | |
| mape = np.round(metrics_table.iloc[0, 1], 2) | |
| r2 = np.round(metrics_table.iloc[1, 1], 2) | |
| adjr2 = np.round(metrics_table.iloc[2, 1], 2) | |
| mape_tuned = np.round(metrics_table_tuned.iloc[0, 1], 2) | |
| r2_tuned = np.round(metrics_table_tuned.iloc[1, 1], 2) | |
| adjr2_tuned = np.round(metrics_table_tuned.iloc[2, 1], 2) | |
| parameters_ = st.columns(3) | |
| with parameters_[0]: | |
| st.metric("R2", r2_tuned, np.round(r2_tuned - r2, 2)) | |
| with parameters_[1]: | |
| st.metric( | |
| "Adjusted R2", adjr2_tuned, np.round(adjr2_tuned - adjr2, 2) | |
| ) | |
| with parameters_[2]: | |
| st.metric( | |
| "MAPE", mape_tuned, np.round(mape_tuned - mape, 2), "inverse" | |
| ) | |
| st.write(model_tuned.summary()) | |
| X_train_tuned[date_col] = X_train[date_col] | |
| X_test_tuned[date_col] = X_test[date_col] | |
| X_train_tuned[target_col] = y_train | |
| X_test_tuned[target_col] = y_test | |
| st.header("2.2 Actual vs. Predicted Plot") | |
| # if is_panel: | |
| # metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(date, y_train, model.predict(X_train), | |
| # model, target_column='Revenue',is_panel=True) | |
| # else: | |
| # metrics_table,line,actual_vs_predicted_plot=plot_actual_vs_predicted(date, y_train, model.predict(X_train), model,target_column='Revenue') | |
| if is_panel: | |
| metrics_table, line, actual_vs_predicted_plot = ( | |
| plot_actual_vs_predicted( | |
| X_train_tuned[date_col], | |
| X_train_tuned[target_col], | |
| model_tuned.fittedvalues, | |
| model_tuned, | |
| target_column=sel_target_col, | |
| is_panel=True, | |
| ) | |
| ) | |
| else: | |
| metrics_table, line, actual_vs_predicted_plot = ( | |
| plot_actual_vs_predicted( | |
| X_train_tuned[date_col], | |
| X_train_tuned[target_col], | |
| model_tuned.predict(X_train_tuned[new_features]), | |
| model_tuned, | |
| target_column=sel_target_col, | |
| is_panel=False, | |
| ) | |
| ) | |
| # plot_actual_vs_predicted(X_train[date_col], y_train, | |
| # model.fittedvalues, model, | |
| # target_column='Revenue', | |
| # is_panel=is_panel) | |
| st.plotly_chart(actual_vs_predicted_plot, use_container_width=True) | |
| st.markdown("## 2.3 Residual Analysis") | |
| if is_panel: | |
| columns = st.columns(2) | |
| with columns[0]: | |
| fig = plot_residual_predicted( | |
| y_train, model_tuned.fittedvalues, X_train_tuned | |
| ) | |
| st.plotly_chart(fig) | |
| with columns[1]: | |
| st.empty() | |
| fig = qqplot(y_train, model_tuned.fittedvalues) | |
| st.plotly_chart(fig) | |
| with columns[0]: | |
| fig = residual_distribution(y_train, model_tuned.fittedvalues) | |
| st.pyplot(fig) | |
| else: | |
| columns = st.columns(2) | |
| with columns[0]: | |
| fig = plot_residual_predicted( | |
| y_train, | |
| model_tuned.predict(X_train_tuned[new_features]), | |
| X_train, | |
| ) | |
| st.plotly_chart(fig) | |
| with columns[1]: | |
| st.empty() | |
| fig = qqplot( | |
| y_train, model_tuned.predict(X_train_tuned[new_features]) | |
| ) | |
| st.plotly_chart(fig) | |
| with columns[0]: | |
| fig = residual_distribution( | |
| y_train, model_tuned.predict(X_train_tuned[new_features]) | |
| ) | |
| st.pyplot(fig) | |
| # st.session_state['is_tuned_model'][target_col] = True | |
| # Sprint4 - saved tuned model in a dict | |
| st.session_state["Model_Tuned"][sel_model + "__" + target_col] = { | |
| "Model_object": model_tuned, | |
| "feature_set": new_features, | |
| "X_train_tuned": X_train_tuned, | |
| "X_test_tuned": X_test_tuned, | |
| } | |
| # Pending | |
| # if st.session_state['build_tuned_model']==True: | |
| if st.session_state["Model_Tuned"] is not None: | |
| if st.checkbox( | |
| "Use this model to build response curves", key="save_model" | |
| ): | |
| # save_model = st.button('Use this model to build response curves', key='saved_tuned_model') | |
| # if save_model: | |
| st.session_state["is_tuned_model"][target_col] = True | |
| with open( | |
| os.path.join( | |
| st.session_state["project_path"], "tuned_model.pkl" | |
| ), | |
| "wb", | |
| ) as f: | |
| # pickle.dump(st.session_state['tuned_model'], f) | |
| pickle.dump(st.session_state["Model_Tuned"], f) # Sprint4 | |
| st.session_state["project_dct"]["model_tuning"][ | |
| "session_state_saved" | |
| ] = {} | |
| for key in [ | |
| "bin_dict", | |
| "used_response_metrics", | |
| "is_tuned_model", | |
| "media_data", | |
| "X_test_spends", | |
| ]: | |
| st.session_state["project_dct"]["model_tuning"][ | |
| "session_state_saved" | |
| ][key] = st.session_state[key] | |
| project_dct_path = os.path.join( | |
| st.session_state["project_path"], "project_dct.pkl" | |
| ) | |
| with open(project_dct_path, "wb") as f: | |
| pickle.dump(st.session_state["project_dct"], f) | |
| update_db("5_Model_Tuning.py") | |
| st.success(sel_model + "__" + target_col + " Tuned saved!") | |