Mastercard / dump /3_Model_Tuning.py
BlendMMM's picture
Upload 4 files
9d7bf1d verified
import streamlit as st
import pandas as pd
from Eda_functions import format_numbers
import pickle
from utilities import set_header,load_local_css
import statsmodels.api as sm
import re
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from statsmodels.stats.outliers_influence import variance_inflation_factor
st.set_option('deprecation.showPyplotGlobalUse', False)
from Data_prep_functions import *
st.set_page_config(
page_title="Model Tuning",
page_icon=":shark:",
layout="wide",
initial_sidebar_state='collapsed'
)
load_local_css('styles.css')
set_header()
st.title('1. Model Tuning')
if "X_train" not in st.session_state:
st.error(
"Oops! It seems there are no saved models available. Please build and save a model from the previous page to proceed.")
st.stop()
X_train=st.session_state['X_train']
X_test=st.session_state['X_test']
y_train=st.session_state['y_train']
y_test=st.session_state['y_test']
df=st.session_state['media_data']
with open("best_models.pkl", 'rb') as file:
model_dict= pickle.load(file)
if 'selected_model' not in st.session_state:
st.session_state['selected_model']=0
st.markdown('### 1.1 Event Flags')
st.markdown('Helps in quantifying the impact of specific occurrences of events')
with st.expander('Apply Event Flags'):
st.session_state["selected_model"]=st.selectbox('Select Model to apply flags',model_dict.keys())
model =model_dict[st.session_state["selected_model"]]['Model_object']
date=st.session_state['date']
date=pd.to_datetime(date)
X_train =model_dict[st.session_state["selected_model"]]['X_train']
features_set= model_dict[st.session_state["selected_model"]]['feature_set']
col=st.columns(3)
min_date=min(date)
max_date=max(date)
with col[0]:
start_date=st.date_input('Select Start Date',min_date,min_value=min_date,max_value=max_date)
with col[1]:
end_date=st.date_input('Select End Date',max_date,min_value=min_date,max_value=max_date)
with col[2]:
repeat=st.selectbox('Repeat Annually',['Yes','No'],index=1)
if repeat =='Yes':
repeat=True
else:
repeat=False
# X_train=sm.add_constant(X_train)
if 'Flags' not in st.session_state:
st.session_state['Flags']={}
met,line_values,fig_flag=plot_actual_vs_predicted(date[:150], y_train, model.predict(X_train), model,flag=(start_date,end_date),repeat_all_years=repeat)
st.plotly_chart(fig_flag,use_container_width=True)
flag_name='f1'
flag_name=st.text_input('Enter Flag Name')
if st.button('Update flag'):
st.session_state['Flags'][flag_name]=line_values
st.success(f'{flag_name} stored')
options=list(st.session_state['Flags'].keys())
selected_options = []
num_columns = 4
num_rows = -(-len(options) // num_columns)
tick=False
if st.checkbox('Select all'):
tick=True
selected_options = []
for row in range(num_rows):
cols = st.columns(num_columns)
for col in cols:
if options:
option = options.pop(0)
selected = col.checkbox(option,value=tick)
if selected:
selected_options.append(option)
st.markdown('### 1.2 Select Parameters to Apply')
parameters=st.columns(3)
with parameters[0]:
Trend=st.checkbox("**Trend**")
st.markdown('Helps account for long-term trends or seasonality that could influence advertising effectiveness')
with parameters[1]:
week_number=st.checkbox('**Week_number**')
st.markdown('Assists in detecting and incorporating weekly patterns or seasonality')
with parameters[2]:
sine_cosine=st.checkbox('**Sine and Cosine Waves**')
st.markdown('Helps in capturing cyclical patterns or seasonality in the data')
if st.button('Build model with Selected Parameters and Flags'):
st.header('2.1 Results Summary')
# date=list(df.index)
# df = df.reset_index(drop=True)
# st.write(df.head(2))
# X_train=df[features_set]
ss = MinMaxScaler()
X_train_tuned = pd.DataFrame(ss.fit_transform(X_train), columns=X_train.columns)
X_train_tuned=sm.add_constant(X_train_tuned)
for flag in selected_options:
X_train_tuned[flag]=st.session_state['Flags'][flag]
if Trend:
X_train_tuned['Trend']=np.arange(1,len(X_train_tuned)+1,1)
# if week_number:
# st.write(date)
date=pd.to_datetime(date.values)
X_train_tuned['Week_number']=date.day_of_week[:150]
model_tuned = sm.OLS(y_train, X_train_tuned).fit()
metrics_table,line,actual_vs_predicted_plot=plot_actual_vs_predicted(date[:150], y_train, model.predict(X_train), model,target_column='Revenue')
metrics_table_tuned,line,actual_vs_predicted_plot_tuned=plot_actual_vs_predicted(date[:150], y_train, model_tuned.predict(X_train_tuned), model_tuned,target_column='Revenue')
# st.write(metrics_table)
mape=np.round(metrics_table.iloc[0,1],2)
r2=np.round(metrics_table.iloc[1,1],2)
adjr2=np.round(metrics_table.iloc[2,1],2)
mape_tuned=np.round(metrics_table_tuned.iloc[0,1],2)
r2_tuned=np.round(metrics_table_tuned.iloc[1,1],2)
adjr2_tuned=np.round(metrics_table_tuned.iloc[2,1],2)
parameters_=st.columns(3)
with parameters_[0]:
st.metric('R2',r2_tuned,np.round(r2_tuned-r2,2))
with parameters_[1]:
st.metric('Adjusted R2',adjr2_tuned,np.round(adjr2_tuned-adjr2,2))
with parameters_[2]:
st.metric('MAPE',mape_tuned,np.round(mape_tuned-mape,2),'inverse')
st.header('2.2 Actual vs. Predicted Plot')
metrics_table,line,actual_vs_predicted_plot=plot_actual_vs_predicted(date, y_train, model.predict(X_train), model,target_column='Revenue')
st.plotly_chart(actual_vs_predicted_plot,use_container_width=True)
st.markdown('## 2.3 Residual Analysis')
columns=st.columns(2)
with columns[0]:
fig=plot_residual_predicted(y_train,model.predict(X_train),X_train)
st.plotly_chart(fig)
with columns[1]:
st.empty()
fig = qqplot(y_train,model.predict(X_train))
st.plotly_chart(fig)
with columns[0]:
fig=residual_distribution(y_train,model.predict(X_train))
st.pyplot(fig)
# if st.checkbox('Use this model to build response curves',key='123'):
# raw_data=df[features_set]
# columns_raw=[re.split(r"(_lag|_adst)",col)[0] for col in raw_data.columns]
# raw_data.columns=columns_raw
# columns_media=[col for col in columns_raw if Categorised_data[col]['BB']=='Media']
# raw_data=raw_data[columns_media]
# raw_data['Date']=list(df.index)
# spends_var=[col for col in df.columns if "spends" in col.lower() and 'adst' not in col.lower() and 'lag' not in col.lower()]
# spends_df=df[spends_var]
# spends_df['Week']=list(df.index)
# j=0
# X1=X.copy()
# col=X1.columns
# for i in model.params.values:
# X1[col[j]]=X1.iloc[:,j]*i
# j+=1
# contribution_df=X1
# contribution_df['Date']=list(df.index)
# excel_file='Overview_data.xlsx'
# with pd.ExcelWriter(excel_file,engine='xlsxwriter') as writer:
# raw_data.to_excel(writer,sheet_name='RAW DATA MMM',index=False)
# spends_df.to_excel(writer,sheet_name='SPEND INPUT',index=False)
# contribution_df.to_excel(writer,sheet_name='CONTRIBUTION MMM')