Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
from Eda_functions import format_numbers | |
import pickle | |
from utilities import set_header,load_local_css | |
import statsmodels.api as sm | |
import re | |
from sklearn.preprocessing import MinMaxScaler | |
import matplotlib.pyplot as plt | |
from statsmodels.stats.outliers_influence import variance_inflation_factor | |
st.set_option('deprecation.showPyplotGlobalUse', False) | |
from Data_prep_functions import * | |
st.set_page_config( | |
page_title="Model Tuning", | |
page_icon=":shark:", | |
layout="wide", | |
initial_sidebar_state='collapsed' | |
) | |
load_local_css('styles.css') | |
set_header() | |
st.title('1. Model Tuning') | |
if "X_train" not in st.session_state: | |
st.error( | |
"Oops! It seems there are no saved models available. Please build and save a model from the previous page to proceed.") | |
st.stop() | |
X_train=st.session_state['X_train'] | |
X_test=st.session_state['X_test'] | |
y_train=st.session_state['y_train'] | |
y_test=st.session_state['y_test'] | |
df=st.session_state['media_data'] | |
with open("best_models.pkl", 'rb') as file: | |
model_dict= pickle.load(file) | |
if 'selected_model' not in st.session_state: | |
st.session_state['selected_model']=0 | |
st.markdown('### 1.1 Event Flags') | |
st.markdown('Helps in quantifying the impact of specific occurrences of events') | |
with st.expander('Apply Event Flags'): | |
st.session_state["selected_model"]=st.selectbox('Select Model to apply flags',model_dict.keys()) | |
model =model_dict[st.session_state["selected_model"]]['Model_object'] | |
date=st.session_state['date'] | |
date=pd.to_datetime(date) | |
X_train =model_dict[st.session_state["selected_model"]]['X_train'] | |
features_set= model_dict[st.session_state["selected_model"]]['feature_set'] | |
col=st.columns(3) | |
min_date=min(date) | |
max_date=max(date) | |
with col[0]: | |
start_date=st.date_input('Select Start Date',min_date,min_value=min_date,max_value=max_date) | |
with col[1]: | |
end_date=st.date_input('Select End Date',max_date,min_value=min_date,max_value=max_date) | |
with col[2]: | |
repeat=st.selectbox('Repeat Annually',['Yes','No'],index=1) | |
if repeat =='Yes': | |
repeat=True | |
else: | |
repeat=False | |
# X_train=sm.add_constant(X_train) | |
if 'Flags' not in st.session_state: | |
st.session_state['Flags']={} | |
met,line_values,fig_flag=plot_actual_vs_predicted(date[:150], y_train, model.predict(X_train), model,flag=(start_date,end_date),repeat_all_years=repeat) | |
st.plotly_chart(fig_flag,use_container_width=True) | |
flag_name='f1' | |
flag_name=st.text_input('Enter Flag Name') | |
if st.button('Update flag'): | |
st.session_state['Flags'][flag_name]=line_values | |
st.success(f'{flag_name} stored') | |
options=list(st.session_state['Flags'].keys()) | |
selected_options = [] | |
num_columns = 4 | |
num_rows = -(-len(options) // num_columns) | |
tick=False | |
if st.checkbox('Select all'): | |
tick=True | |
selected_options = [] | |
for row in range(num_rows): | |
cols = st.columns(num_columns) | |
for col in cols: | |
if options: | |
option = options.pop(0) | |
selected = col.checkbox(option,value=tick) | |
if selected: | |
selected_options.append(option) | |
st.markdown('### 1.2 Select Parameters to Apply') | |
parameters=st.columns(3) | |
with parameters[0]: | |
Trend=st.checkbox("**Trend**") | |
st.markdown('Helps account for long-term trends or seasonality that could influence advertising effectiveness') | |
with parameters[1]: | |
week_number=st.checkbox('**Week_number**') | |
st.markdown('Assists in detecting and incorporating weekly patterns or seasonality') | |
with parameters[2]: | |
sine_cosine=st.checkbox('**Sine and Cosine Waves**') | |
st.markdown('Helps in capturing cyclical patterns or seasonality in the data') | |
if st.button('Build model with Selected Parameters and Flags'): | |
st.header('2.1 Results Summary') | |
# date=list(df.index) | |
# df = df.reset_index(drop=True) | |
# st.write(df.head(2)) | |
# X_train=df[features_set] | |
ss = MinMaxScaler() | |
X_train_tuned = pd.DataFrame(ss.fit_transform(X_train), columns=X_train.columns) | |
X_train_tuned=sm.add_constant(X_train_tuned) | |
for flag in selected_options: | |
X_train_tuned[flag]=st.session_state['Flags'][flag] | |
if Trend: | |
X_train_tuned['Trend']=np.arange(1,len(X_train_tuned)+1,1) | |
# if week_number: | |
# st.write(date) | |
date=pd.to_datetime(date.values) | |
X_train_tuned['Week_number']=date.day_of_week[:150] | |
model_tuned = sm.OLS(y_train, X_train_tuned).fit() | |
metrics_table,line,actual_vs_predicted_plot=plot_actual_vs_predicted(date[:150], y_train, model.predict(X_train), model,target_column='Revenue') | |
metrics_table_tuned,line,actual_vs_predicted_plot_tuned=plot_actual_vs_predicted(date[:150], y_train, model_tuned.predict(X_train_tuned), model_tuned,target_column='Revenue') | |
# st.write(metrics_table) | |
mape=np.round(metrics_table.iloc[0,1],2) | |
r2=np.round(metrics_table.iloc[1,1],2) | |
adjr2=np.round(metrics_table.iloc[2,1],2) | |
mape_tuned=np.round(metrics_table_tuned.iloc[0,1],2) | |
r2_tuned=np.round(metrics_table_tuned.iloc[1,1],2) | |
adjr2_tuned=np.round(metrics_table_tuned.iloc[2,1],2) | |
parameters_=st.columns(3) | |
with parameters_[0]: | |
st.metric('R2',r2_tuned,np.round(r2_tuned-r2,2)) | |
with parameters_[1]: | |
st.metric('Adjusted R2',adjr2_tuned,np.round(adjr2_tuned-adjr2,2)) | |
with parameters_[2]: | |
st.metric('MAPE',mape_tuned,np.round(mape_tuned-mape,2),'inverse') | |
st.header('2.2 Actual vs. Predicted Plot') | |
metrics_table,line,actual_vs_predicted_plot=plot_actual_vs_predicted(date, y_train, model.predict(X_train), model,target_column='Revenue') | |
st.plotly_chart(actual_vs_predicted_plot,use_container_width=True) | |
st.markdown('## 2.3 Residual Analysis') | |
columns=st.columns(2) | |
with columns[0]: | |
fig=plot_residual_predicted(y_train,model.predict(X_train),X_train) | |
st.plotly_chart(fig) | |
with columns[1]: | |
st.empty() | |
fig = qqplot(y_train,model.predict(X_train)) | |
st.plotly_chart(fig) | |
with columns[0]: | |
fig=residual_distribution(y_train,model.predict(X_train)) | |
st.pyplot(fig) | |
# if st.checkbox('Use this model to build response curves',key='123'): | |
# raw_data=df[features_set] | |
# columns_raw=[re.split(r"(_lag|_adst)",col)[0] for col in raw_data.columns] | |
# raw_data.columns=columns_raw | |
# columns_media=[col for col in columns_raw if Categorised_data[col]['BB']=='Media'] | |
# raw_data=raw_data[columns_media] | |
# raw_data['Date']=list(df.index) | |
# spends_var=[col for col in df.columns if "spends" in col.lower() and 'adst' not in col.lower() and 'lag' not in col.lower()] | |
# spends_df=df[spends_var] | |
# spends_df['Week']=list(df.index) | |
# j=0 | |
# X1=X.copy() | |
# col=X1.columns | |
# for i in model.params.values: | |
# X1[col[j]]=X1.iloc[:,j]*i | |
# j+=1 | |
# contribution_df=X1 | |
# contribution_df['Date']=list(df.index) | |
# excel_file='Overview_data.xlsx' | |
# with pd.ExcelWriter(excel_file,engine='xlsxwriter') as writer: | |
# raw_data.to_excel(writer,sheet_name='RAW DATA MMM',index=False) | |
# spends_df.to_excel(writer,sheet_name='SPEND INPUT',index=False) | |
# contribution_df.to_excel(writer,sheet_name='CONTRIBUTION MMM') | |