Spaces:

BlendMMM
/

Mastercard

Sleeping

App Files Files Community

BlendMMM commited on Apr 8, 2024

Commit

bd80083

verified ·

1 Parent(s): 317f3b2

Upload 10 files

Browse files

Files changed (10) hide show

pages/10_Optimized_Result_Analysis.py +399 -0
pages/1_Data_Validation.py +251 -0
pages/2_Transformations.py +522 -0
pages/4_Model_Build.py +826 -0
pages/4_Saved_Model_Results.py +607 -0
pages/5_Model_Tuning_with_panel.py +527 -0
pages/6_Model_Result_Overview.py +348 -0
pages/7_Build_Response_Curves.py +185 -0
pages/8_Scenario_Planner.py +1424 -0
pages/9_Saved_Scenarios.py +276 -0

pages/10_Optimized_Result_Analysis.py ADDED Viewed

	@@ -0,0 +1,399 @@

+import streamlit as st
+from numerize.numerize import numerize
+import pandas as pd
+from utilities import (format_numbers,decimal_formater,
+                       load_local_css,set_header,
+                       initialize_data,
+                       load_authenticator)
+import pickle
+import streamlit_authenticator as stauth
+import yaml
+from yaml import SafeLoader
+from classes import class_from_dict
+import plotly.express as px
+import numpy as np
+import plotly.graph_objects as go
+import pandas as pd
+def summary_plot(data, x, y, title, text_column, color, format_as_percent=False, format_as_decimal=False):
+    fig = px.bar(data, x=x, y=y, orientation='h',
+                 title=title, text=text_column, color=color)
+    fig.update_layout(showlegend=False)
+    data[text_column] = pd.to_numeric(data[text_column], errors='coerce')
+    # Update the format of the displayed text based on the chosen format
+    if format_as_percent:
+        fig.update_traces(texttemplate='%{text:.0%}', textposition='outside', hovertemplate='%{x:.0%}')
+    elif format_as_decimal:
+        fig.update_traces(texttemplate='%{text:.2f}', textposition='outside', hovertemplate='%{x:.2f}')
+    else:
+        fig.update_traces(texttemplate='%{text:.2s}', textposition='outside', hovertemplate='%{x:.2s}')
+    fig.update_layout(xaxis_title=x, yaxis_title='Channel Name', showlegend=False)
+    return fig
+def stacked_summary_plot(data, x, y, title, text_column, color_column, stack_column=None, format_as_percent=False, format_as_decimal=False):
+    fig = px.bar(data, x=x, y=y, orientation='h',
+                 title=title, text=text_column, color=color_column, facet_col=stack_column)
+    fig.update_layout(showlegend=False)
+    data[text_column] = pd.to_numeric(data[text_column], errors='coerce')
+    # Update the format of the displayed text based on the chosen format
+    if format_as_percent:
+        fig.update_traces(texttemplate='%{text:.0%}', textposition='outside', hovertemplate='%{x:.0%}')
+    elif format_as_decimal:
+        fig.update_traces(texttemplate='%{text:.2f}', textposition='outside', hovertemplate='%{x:.2f}')
+    else:
+        fig.update_traces(texttemplate='%{text:.2s}', textposition='outside', hovertemplate='%{x:.2s}')
+    fig.update_layout(xaxis_title=x, yaxis_title='', showlegend=False)
+    return fig
+def funnel_plot(data, x, y, title, text_column, color_column, format_as_percent=False, format_as_decimal=False):
+    data[text_column] = pd.to_numeric(data[text_column], errors='coerce')
+    # Round the numeric values in the text column to two decimal points
+    data[text_column] = data[text_column].round(2)
+    # Create a color map for categorical data
+    color_map = {category: f'rgb({i * 30 % 255},{i * 50 % 255},{i * 70 % 255})' for i, category in enumerate(data[color_column].unique())}
+    fig = go.Figure(go.Funnel(
+        y=data[y],
+        x=data[x],
+        text=data[text_column],
+        marker=dict(color=data[color_column].map(color_map)),
+        textinfo="value",
+        hoverinfo='y+x+text'
+    ))
+    # Update the format of the displayed text based on the chosen format
+    if format_as_percent:
+        fig.update_layout(title=title, funnelmode="percent")
+    elif format_as_decimal:
+        fig.update_layout(title=title, funnelmode="overlay")
+    else:
+        fig.update_layout(title=title, funnelmode="group")
+    return fig
+st.set_page_config(layout='wide')
+load_local_css('styles.css')
+set_header()
+# for k, v in st.session_state.items():
+#     if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
+#         st.session_state[k] = v
+st.empty()
+st.header('Model Result Analysis')
+spends_data=pd.read_excel('Overview_data_test.xlsx')
+with open('summary_df.pkl', 'rb') as file:
+  summary_df_sorted = pickle.load(file)
+selected_scenario= st.selectbox('Select Saved Scenarios',['S1','S2'])
+st.header('Optimized Spends Overview')
+___columns=st.columns(3)
+with ___columns[2]:
+    fig=summary_plot(summary_df_sorted, x='Delta_percent', y='Channel_name', title='Delta', text_column='Delta_percent',color='Channel_name')
+    st.plotly_chart(fig,use_container_width=True)
+with ___columns[0]:
+    fig=summary_plot(summary_df_sorted, x='Actual_spend', y='Channel_name', title='Actual Spend', text_column='Actual_spend',color='Channel_name')
+    st.plotly_chart(fig,use_container_width=True)
+with ___columns[1]:
+    fig=summary_plot(summary_df_sorted, x='Optimized_spend', y='Channel_name', title='Planned Spend', text_column='Optimized_spend',color='Channel_name')
+    st.plotly_chart(fig,use_container_width=False)
+st.header(' Budget Allocation')
+summary_df_sorted['Perc_alloted']=np.round(summary_df_sorted['Optimized_spend']/summary_df_sorted['Optimized_spend'].sum(),2)
+columns2=st.columns(2)
+with columns2[0]:
+    fig=summary_plot(summary_df_sorted, x='Optimized_spend', y='Channel_name', title='Planned Spend', text_column='Optimized_spend',color='Channel_name')
+    st.plotly_chart(fig,use_container_width=True)
+with columns2[1]:
+    fig=summary_plot(summary_df_sorted, x='Perc_alloted', y='Channel_name', title='% Split', text_column='Perc_alloted',color='Channel_name',format_as_percent=True)
+    st.plotly_chart(fig,use_container_width=True)
+if 'raw_data' not in st.session_state:
+    st.session_state['raw_data']=pd.read_excel('raw_data_nov7_combined1.xlsx')
+    st.session_state['raw_data']=st.session_state['raw_data'][st.session_state['raw_data']['MediaChannelName'].isin(summary_df_sorted['Channel_name'].unique())]
+    st.session_state['raw_data']=st.session_state['raw_data'][st.session_state['raw_data']['Date'].isin(spends_data["Date"].unique())]
+#st.write(st.session_state['raw_data']['ResponseMetricName'])
+# st.write(st.session_state['raw_data'])
+st.header('Response Forecast Overview')
+raw_data=st.session_state['raw_data']
+effectiveness_overall=raw_data.groupby('ResponseMetricName').agg({'ResponseMetricValue': 'sum'}).reset_index()
+effectiveness_overall['Efficiency']=effectiveness_overall['ResponseMetricValue'].map(lambda x: x/raw_data['Media Spend'].sum() )
+# st.write(effectiveness_overall)
+columns6=st.columns(3)
+effectiveness_overall.sort_values(by=['ResponseMetricValue'],ascending=False,inplace=True)
+effectiveness_overall=np.round(effectiveness_overall,2)
+effectiveness_overall['ResponseMetric'] = effectiveness_overall['ResponseMetricName'].apply(lambda x: 'BAU' if 'BAU' in x else ('Gamified' if 'Gamified' in x else x))
+# effectiveness_overall=np.where(effectiveness_overall[effectiveness_overall['ResponseMetricName']=="Adjusted Account Approval BAU"],"Adjusted Account Approval BAU",effectiveness_overall['ResponseMetricName'])
+effectiveness_overall.replace({'ResponseMetricName':{'BAU approved clients - Appsflyer':'Approved clients - Appsflyer',
+                                                     'Gamified approved clients - Appsflyer':'Approved clients - Appsflyer'}},inplace=True)
+# st.write(effectiveness_overall.sort_values(by=['ResponseMetricValue'],ascending=False))
+condition = effectiveness_overall['ResponseMetricName'] == "Adjusted Account Approval BAU"
+condition1= effectiveness_overall['ResponseMetricName'] == "Approved clients - Appsflyer"
+effectiveness_overall['ResponseMetric'] = np.where(condition, "Adjusted Account Approval BAU", effectiveness_overall['ResponseMetric'])
+effectiveness_overall['ResponseMetricName'] = np.where(condition1, "Approved clients - Appsflyer (BAU, Gamified)", effectiveness_overall['ResponseMetricName'])
+# effectiveness_overall=pd.DataFrame({'ResponseMetricName':["App Installs - Appsflyer",'Account Requests - Appsflyer',
+#                                                           'Total Adjusted Account Approval','Adjusted Account Approval BAU',
+#                                                           'Approved clients - Appsflyer','Approved clients - Appsflyer'],
+#                                     'ResponseMetricValue':[683067,367020,112315,79768,36661,16834],
+#                                     'Efficiency':[1.24,0.67,0.2,0.14,0.07,0.03],
+custom_colors = {
+    'App Installs - Appsflyer': 'rgb(255, 135, 0)',       # Steel Blue (Blue)
+    'Account Requests - Appsflyer': 'rgb(125, 239, 161)',  # Cornflower Blue (Blue)
+    'Adjusted Account Approval': 'rgb(129, 200, 255)',      # Dodger Blue (Blue)
+    'Adjusted Account Approval BAU': 'rgb(255, 207, 98)',  # Light Sky Blue (Blue)
+    'Approved clients - Appsflyer': 'rgb(0, 97, 198)',  # Light Blue (Blue)
+    "BAU": 'rgb(41, 176, 157)',                              # Steel Blue (Blue)
+     "Gamified": 'rgb(213, 218, 229)'                      # Silver (Gray)
+    # Add more categories and their respective shades of blue as needed
+}
+with columns6[0]:
+    revenue=(effectiveness_overall[effectiveness_overall['ResponseMetricName']=='Total Approved Accounts - Revenue']['ResponseMetricValue']).iloc[0]
+    revenue=round(revenue / 1_000_000, 2)
+#     st.metric('Total Revenue', f"${revenue} M")
+# with columns6[1]:
+#     BAU=(effectiveness_overall[effectiveness_overall['ResponseMetricName']=='BAU approved clients - Revenue']['ResponseMetricValue']).iloc[0]
+#     BAU=round(BAU / 1_000_000, 2)
+#     st.metric('BAU approved clients - Revenue', f"${BAU} M")
+# with columns6[2]:
+#     Gam=(effectiveness_overall[effectiveness_overall['ResponseMetricName']=='Gamified approved clients - Revenue']['ResponseMetricValue']).iloc[0]
+#     Gam=round(Gam / 1_000_000, 2)
+#     st.metric('Gamified approved clients - Revenue', f"${Gam} M")
+# st.write(effectiveness_overall)
+data = {'Revenue': ['BAU approved clients - Revenue', 'Gamified approved clients- Revenue'],
+        'ResponseMetricValue': [70200000, 1770000],
+        'Efficiency':[127.54,3.21]}
+df = pd.DataFrame(data)
+columns9=st.columns([0.60,0.40])
+with columns9[0]:
+    figd = px.pie(df,
+              names='Revenue',
+              values='ResponseMetricValue',
+              hole=0.3,  # set the size of the hole in the donut
+              title='Effectiveness')
+    figd.update_layout(
+        margin=dict(l=0, r=0, b=0, t=0),width=100, height=180,legend=dict(
+        orientation='v',  # set orientation to horizontal
+        x=0,  # set x to 0 to move to the left
+        y=0.8  # adjust y as needed
+    )
+    )
+    st.plotly_chart(figd, use_container_width=True)
+with columns9[1]:
+    figd1 = px.pie(df,
+              names='Revenue',
+              values='Efficiency',
+              hole=0.3,  # set the size of the hole in the donut
+              title='Efficiency')
+    figd1.update_layout(
+    margin=dict(l=0, r=0, b=0, t=0),width=100,height=180,showlegend=False
+)
+    st.plotly_chart(figd1, use_container_width=True)
+effectiveness_overall['Response Metric Name']=effectiveness_overall['ResponseMetricName']
+columns4= st.columns([0.55,0.45])
+with columns4[0]:
+    fig=px.funnel(effectiveness_overall[~(effectiveness_overall['ResponseMetricName'].isin(['Total Approved Accounts - Revenue',
+                                                                                          'BAU approved clients - Revenue',
+                                                                                          'Gamified approved clients - Revenue',
+                                                                                          "Total Approved Accounts - Appsflyer"]))],
+                                                                                            x='ResponseMetricValue', y='Response Metric Name',color='ResponseMetric',
+                                                                                            color_discrete_map=custom_colors,title='Effectiveness',
+                                                                                            labels=None)
+    custom_y_labels=['App Installs - Appsflyer','Account Requests - Appsflyer','Adjusted Account Approval','Adjusted Account Approval BAU',
+                     "Approved clients - Appsflyer (BAU, Gamified)"
+                     ]
+    fig.update_layout(showlegend=False,
+    yaxis=dict(
+        tickmode='array',
+        ticktext=custom_y_labels,
+        )
+        )
+    fig.update_traces(textinfo='value', textposition='inside', texttemplate='%{x:.2s} ', hoverinfo='y+x+percent initial')
+    last_trace_index = len(fig.data) - 1
+    fig.update_traces(marker=dict(line=dict(color='black', width=2)), selector=dict(marker=dict(color='blue')))
+    st.plotly_chart(fig,use_container_width=True)
+with columns4[1]:
+# Your existing code for creating the bar chart
+    fig1 = px.bar((effectiveness_overall[~(effectiveness_overall['ResponseMetricName'].isin(['Total Approved Accounts - Revenue',
+                                                                                            'BAU approved clients - Revenue',
+                                                                                            'Gamified approved clients - Revenue',
+                                                                                            "Total Approved Accounts - Appsflyer"]))]).sort_values(by='ResponseMetricValue'),
+                x='Efficiency', y='Response Metric Name',
+                color_discrete_map=custom_colors, color='ResponseMetric',
+                labels=None,text_auto=True,title='Efficiency'
+                )
+    # Update layout and traces
+    fig1.update_traces(customdata=effectiveness_overall['Efficiency'],
+                   textposition='auto')
+    fig1.update_layout(showlegend=False)
+    fig1.update_yaxes(title='',showticklabels=False)
+    fig1.update_xaxes(title='',showticklabels=False)
+    fig1.update_xaxes(tickfont=dict(size=20))
+    fig1.update_yaxes(tickfont=dict(size=20))
+    st.plotly_chart(fig1, use_container_width=True)
+effectiveness_overall_revenue=pd.DataFrame({'ResponseMetricName':['Approved Clients','Approved Clients'],
+                                            'ResponseMetricValue':[70201070,1768900],
+                                            'Efficiency':[127.54,3.21],
+                                            'ResponseMetric':['BAU','Gamified']
+                                            })
+# from plotly.subplots import make_subplots
+# fig = make_subplots(rows=1, cols=2,
+#                     subplot_titles=["Effectiveness", "Efficiency"])
+# # Add first plot as subplot
+# fig.add_trace(go.Funnel(
+#     x = fig.data[0].x,
+#     y = fig.data[0].y,
+#     textinfo = 'value+percent initial',
+#     hoverinfo = 'x+y+percent initial'
+# ), row=1, col=1)
+# # Update layout for first subplot
+# fig.update_xaxes(title_text="Response Metric Value", row=1, col=1)
+# fig.update_yaxes(ticktext = custom_y_labels, row=1, col=1)
+# # Add second plot as subplot
+# fig.add_trace(go.Bar(
+#     x = fig1.data[0].x,
+#     y = fig1.data[0].y,
+#     customdata = fig1.data[0].customdata,
+#     textposition = 'auto'
+# ), row=1, col=2)
+# # Update layout for second subplot
+# fig.update_xaxes(title_text="Efficiency", showticklabels=False, row=1, col=2)
+# fig.update_yaxes(title='', showticklabels=False, row=1, col=2)
+# fig.update_layout(height=600, width=800, title_text="Key Metrics")
+# st.plotly_chart(fig)
+st.header('Return Forecast by Media Channel')
+with st.expander("Return Forecast by Media Channel"):
+    metric_data=[val for val in list(st.session_state['raw_data']['ResponseMetricName'].unique()) if val!=np.NaN]
+    # st.write(metric_data)
+    metric=st.selectbox('Select Metric',metric_data,index=1)
+    selected_metric=st.session_state['raw_data'][st.session_state['raw_data']['ResponseMetricName']==metric]
+    # st.dataframe(selected_metric.head(2))
+    selected_metric=st.session_state['raw_data'][st.session_state['raw_data']['ResponseMetricName']==metric]
+    effectiveness=selected_metric.groupby(by=['MediaChannelName'])['ResponseMetricValue'].sum()
+    effectiveness_df=pd.DataFrame({'Channel':effectiveness.index,"ResponseMetricValue":effectiveness.values})
+    summary_df_sorted=summary_df_sorted.merge(effectiveness_df,left_on="Channel_name",right_on='Channel')
+    # st.dataframe(summary_df_sorted.head(2))
+    summary_df_sorted['Efficiency']=summary_df_sorted['ResponseMetricValue']/summary_df_sorted['Optimized_spend']
+# # # st.dataframe(summary_df_sorted.head(2))
+# st.dataframe(summary_df_sorted.head(2))
+    columns= st.columns(3)
+    with columns[0]:
+        fig=summary_plot(summary_df_sorted, x='Optimized_spend', y='Channel_name', title='', text_column='Optimized_spend',color='Channel_name')
+        st.plotly_chart(fig,use_container_width=True)
+    with columns[1]:
+        # effectiveness=(selected_metric.groupby(by=['MediaChannelName'])['ResponseMetricValue'].sum()).values
+        # effectiveness_df=pd.DataFrame({'Channel':st.session_state['raw_data']['MediaChannelName'].unique(),"ResponseMetricValue":effectiveness})
+        # # effectiveness.reset_index(inplace=True)
+        # # st.dataframe(effectiveness.head())
+        fig=summary_plot(summary_df_sorted, x='ResponseMetricValue', y='Channel_name', title='Effectiveness', text_column='ResponseMetricValue',color='Channel_name')
+        st.plotly_chart(fig,use_container_width=True)
+    with columns[2]:
+        fig=summary_plot(summary_df_sorted, x='Efficiency', y='Channel_name', title='Efficiency', text_column='Efficiency',color='Channel_name',format_as_decimal=True)
+        st.plotly_chart(fig,use_container_width=True)
+import plotly.express as px
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+# Create figure with subplots
+# fig = make_subplots(rows=1, cols=2)
+# # Add funnel plot to subplot 1
+# fig.add_trace(
+#     go.Funnel(
+#         x=effectiveness_overall[~(effectiveness_overall['ResponseMetricName'].isin(['Total Approved Accounts - Revenue', 'BAU approved clients - Revenue', 'Gamified approved clients - Revenue', "Total Approved Accounts - Appsflyer"]))]['ResponseMetricValue'],
+#         y=effectiveness_overall[~(effectiveness_overall['ResponseMetricName'].isin(['Total Approved Accounts - Revenue', 'BAU approved clients - Revenue', 'Gamified approved clients - Revenue', "Total Approved Accounts - Appsflyer"]))]['ResponseMetricName'],
+#         textposition="inside",
+#         texttemplate="%{x:.2s}",
+#         customdata=effectiveness_overall['Efficiency'],
+#         hovertemplate="%{customdata:.2f}<extra></extra>"
+#     ),
+#     row=1, col=1
+# )
+# # Add bar plot to subplot 2
+# fig.add_trace(
+#     go.Bar(
+#         x=effectiveness_overall.sort_values(by='ResponseMetricValue')['Efficiency'],
+#         y=effectiveness_overall.sort_values(by='ResponseMetricValue')['ResponseMetricName'],
+#         marker_color=effectiveness_overall['ResponseMetric'],
+#         customdata=effectiveness_overall['Efficiency'],
+#         hovertemplate="%{customdata:.2f}<extra></extra>",
+#         textposition="outside"
+#     ),
+#     row=1, col=2
+# )
+# # Update layout
+# fig.update_layout(title_text="Effectiveness")
+# fig.update_yaxes(title_text="", row=1, col=1)
+# fig.update_yaxes(title_text="", showticklabels=False, row=1, col=2)
+# fig.update_xaxes(title_text="Efficiency", showticklabels=False, row=1, col=2)
+# # Show figure
+# st.plotly_chart(fig)

pages/1_Data_Validation.py ADDED Viewed

	@@ -0,0 +1,251 @@

+import streamlit as st
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+from Eda_functions import *
+import numpy as np
+import pickle
+from streamlit_pandas_profiling import st_profile_report
+import streamlit as st
+import streamlit.components.v1 as components
+import sweetviz as sv
+from utilities import set_header,load_local_css
+from st_aggrid import GridOptionsBuilder,GridUpdateMode
+from st_aggrid import GridOptionsBuilder
+from st_aggrid import AgGrid
+import base64
+import os
+import tempfile
+from ydata_profiling import ProfileReport
+import re
+st.set_page_config(
+  page_title="Data Validation",
+  page_icon=":shark:",
+  layout="wide",
+  initial_sidebar_state='collapsed'
+)
+load_local_css('styles.css')
+set_header()
+with open('data_import.pkl', 'rb') as f:
+    data = pickle.load(f)
+st.session_state['cleaned_data']= data['final_df']
+st.session_state['category_dict'] = data['bin_dict']
+st.title('Data Validation and Insights')
+target_variables=[st.session_state['category_dict'][key] for key in st.session_state['category_dict'].keys() if key =='Response Metrics']
+target_column = st.selectbox('Select the Target Feature/Dependent Variable (will be used in all charts as reference)',list(*target_variables))
+st.session_state['target_column']=target_column
+panels=st.session_state['category_dict']['Panel Level 1'][0]
+selected_panels=st.multiselect('Please choose the panels you wish to analyze.If no panels are selected, insights will be derived from the overall data.',st.session_state['cleaned_data'][panels].unique())
+aggregation_dict = {item: 'sum' if key == 'Media' else 'mean' for key, value in st.session_state['category_dict'].items()  for item in value if item not in ['date','Panel_1']}
+with st.expander('**Reponse Metric Analysis**'):
+    if len(selected_panels)>0:
+        st.session_state['Cleaned_data_panel']=st.session_state['cleaned_data'][st.session_state['cleaned_data']['Panel_1'].isin(selected_panels)]
+        st.session_state['Cleaned_data_panel']=st.session_state['Cleaned_data_panel'].groupby(by='date').agg(aggregation_dict)
+        st.session_state['Cleaned_data_panel']=st.session_state['Cleaned_data_panel'].reset_index()
+    else:
+        st.session_state['Cleaned_data_panel']=st.session_state['cleaned_data'].groupby(by='date').agg(aggregation_dict)
+        st.session_state['Cleaned_data_panel']=st.session_state['Cleaned_data_panel'].reset_index()
+    fig=line_plot_target(st.session_state['Cleaned_data_panel'], target=target_column, title=f'{target_column} Over Time')
+    st.plotly_chart(fig, use_container_width=True)
+    media_channel=list(*[st.session_state['category_dict'][key] for key in st.session_state['category_dict'].keys() if key =='Media'])
+    # st.write(media_channel)
+    Non_media_variables=list(*[st.session_state['category_dict'][key] for key in st.session_state['category_dict'].keys() if key =='Exogenous' or key=='Internal'])
+    st.markdown('### Annual Data Summary')
+    st.dataframe(summary(st.session_state['Cleaned_data_panel'], media_channel+[target_column], spends=None,Target=True), use_container_width=True)
+    if st.checkbox('Show raw data'):
+        st.write(pd.concat([pd.to_datetime(st.session_state['Cleaned_data_panel']['date']).dt.strftime('%m/%d/%Y'),st.session_state['Cleaned_data_panel'].select_dtypes(np.number).applymap(format_numbers)],axis=1))
+col1 = st.columns(1)
+if "selected_feature" not in st.session_state:
+    st.session_state['selected_feature']=None
+def generate_report_with_target(channel_data, target_feature):
+        report = sv.analyze([channel_data, "Dataset"], target_feat=target_feature)
+        temp_dir = tempfile.mkdtemp()
+        report_path = os.path.join(temp_dir, "report.html")
+        report.show_html(filepath=report_path, open_browser=False)  # Generate the report as an HTML file
+        return report_path
+def generate_profile_report(df):
+    pr = df.profile_report()
+    temp_dir = tempfile.mkdtemp()
+    report_path = os.path.join(temp_dir, "report.html")
+    pr.to_file(report_path)
+    return report_path
+#st.header()
+with st.expander('Univariate and Bivariate Report'):
+    eda_columns=st.columns(2)
+    with eda_columns[0]:
+        if st.button('Generate Profile Report',help='Univariate report which inlcudes all statistical analysis'):
+            with st.spinner('Generating Report'):
+                report_file = generate_profile_report(st.session_state['Cleaned_data_panel'])
+                if os.path.exists(report_file):
+                    with open(report_file, 'rb') as f:
+                        st.success('Report Generated')
+                        st.download_button(
+                            label="Download EDA Report",
+                            data=f.read(),
+                            file_name="pandas_profiling_report.html",
+                            mime="text/html"
+                        )
+                else:
+                    st.warning("Report generation failed. Unable to find the report file.")
+with eda_columns[1]:
+    if st.button('Generate Sweetviz Report',help='Bivariate report for selected response metric'):
+       with st.spinner('Generating Report'):
+            report_file = generate_report_with_target(st.session_state['Cleaned_data_panel'], target_column)
+            if os.path.exists(report_file):
+                with open(report_file, 'rb') as f:
+                    st.success('Report Generated')
+                    st.download_button(
+                        label="Download EDA Report",
+                        data=f.read(),
+                        file_name="report.html",
+                        mime="text/html"
+                    )
+            else:
+                st.warning("Report generation failed. Unable to find the report file.")
+#st.warning('Work in Progress')
+with st.expander('Media Variables Analysis'):
+# Get the selected feature
+    st.session_state["selected_feature"]= st.selectbox('Select media', [col for col in media_channel if 'cost' not in col.lower() and 'spend' not in col.lower()])
+    # Filter spends features based on the selected feature
+    spends_features = [col for col in st.session_state['Cleaned_data_panel'].columns if any(keyword in col.lower() for keyword in ['cost', 'spend'])]
+    spends_feature = [col for col in spends_features if re.split(r'_cost|_spend', col.lower())[0] in st.session_state["selected_feature"]]
+    if 'validation' not in st.session_state:
+        st.session_state['validation']=[]
+    val_variables=[col for col in media_channel if col!='date']
+    if len(spends_feature)==0:
+        st.warning('No spends varaible available for the selected metric in data')
+    else:
+        fig_row1 = line_plot(st.session_state['Cleaned_data_panel'], x_col='date', y1_cols=[st.session_state["selected_feature"]], y2_cols=[target_column], title=f'Analysis of {st.session_state["selected_feature"]} and {[target_column][0]} Over Time')
+        st.plotly_chart(fig_row1, use_container_width=True)
+        st.markdown('### Summary')
+        st.dataframe(summary(st.session_state['cleaned_data'],[st.session_state["selected_feature"]],spends=spends_feature[0]),use_container_width=True)
+        cols2=st.columns(2)
+        with cols2[0]:
+            if st.button('Validate'):
+                st.session_state['validation'].append(st.session_state["selected_feature"])
+        with cols2[1]:
+            if st.checkbox('Validate all'):
+                st.session_state['validation'].extend(val_variables)
+                st.success('All media variables are validated ✅')
+        if len(set(st.session_state['validation']).intersection(val_variables))!=len(val_variables):
+            validation_data=pd.DataFrame({'Validate':[True if col in st.session_state['validation'] else False for col in val_variables],
+                                        'Variables':val_variables
+                                        })
+            cols3=st.columns([1,30])
+            with cols3[1]:
+                validation_df=st.data_editor(validation_data,
+                                            # column_config={
+                                            # 'Validate':st.column_config.CheckboxColumn(wi)
+                                            # },
+                                            column_config={
+                                                "Validate": st.column_config.CheckboxColumn(
+                                                    default=False,
+                                                    width=100,
+                                                ),
+                                                'Variables':st.column_config.TextColumn(
+                                                    width=1000
+                                                )
+                                                },hide_index=True)
+                selected_rows = validation_df[validation_df['Validate']==True]['Variables']
+                #st.write(selected_rows)
+                st.session_state['validation'].extend(selected_rows)
+                not_validated_variables = [col for col in val_variables if col not in st.session_state["validation"]]
+                if not_validated_variables:
+                    not_validated_message = f'The following variables are not validated:\n{" , ".join(not_validated_variables)}'
+                    st.warning(not_validated_message)
+with st.expander('Non Media Variables Analysis'):
+    selected_columns_row4 = st.selectbox('Select Channel',Non_media_variables,index=1)
+    #     # Create the dual-axis line plot
+    fig_row4 = line_plot(st.session_state['Cleaned_data_panel'], x_col='date', y1_cols=[selected_columns_row4], y2_cols=[target_column], title=f'Analysis of {selected_columns_row4} and {target_column} Over Time')
+    st.plotly_chart(fig_row4, use_container_width=True)
+    selected_non_media=selected_columns_row4
+    sum_df = st.session_state['Cleaned_data_panel'][['date', selected_non_media,target_column]]
+    sum_df['Year']=pd.to_datetime(st.session_state['Cleaned_data_panel']['date']).dt.year
+    #st.dataframe(df)
+    #st.dataframe(sum_df.head(2))
+    sum_df=sum_df.groupby('Year').agg('sum')
+    sum_df.loc['Grand Total']=sum_df.sum()
+    sum_df=sum_df.applymap(format_numbers)
+    sum_df.fillna('-',inplace=True)
+    sum_df=sum_df.replace({"0.0":'-','nan':'-'})
+    st.markdown('### Summary')
+    st.dataframe(sum_df,use_container_width=True)
+with st.expander('Correlation Analysis'):
+    options = list(st.session_state['Cleaned_data_panel'].select_dtypes(np.number).columns)
+    # selected_options = []
+    # num_columns = 4
+    # num_rows = -(-len(options) // num_columns)  # Ceiling division to calculate rows
+    # # Create a grid of checkboxes
+    # st.header('Select Features for Correlation Plot')
+    # tick=False
+    # if st.checkbox('Select all'):
+    #     tick=True
+    # selected_options = []
+    # for row in range(num_rows):
+    #     cols = st.columns(num_columns)
+    #     for col in cols:
+    #         if options:
+    #             option = options.pop(0)
+    #             selected = col.checkbox(option,value=tick)
+    #             if selected:
+    #                 selected_options.append(option)
+    # # Display selected options
+    selected_options=st.multiselect('Select Variables For correlation plot',[var for var in options if var!= target_column],default=options[3])
+    st.pyplot(correlation_plot(st.session_state['Cleaned_data_panel'],selected_options,target_column))

pages/2_Transformations.py ADDED Viewed

	@@ -0,0 +1,522 @@

+# Importing necessary libraries
+import streamlit as st
+st.set_page_config(
+    page_title="Transformations",
+    page_icon=":shark:",
+    layout="wide",
+    initial_sidebar_state="collapsed",
+)
+import pickle
+import numpy as np
+import pandas as pd
+from utilities import set_header, load_local_css
+import streamlit_authenticator as stauth
+import yaml
+from yaml import SafeLoader
+load_local_css("styles.css")
+set_header()
+# Check for authentication status
+for k, v in st.session_state.items():
+    if k not in ["logout", "login", "config"] and not k.startswith(
+        "FormSubmitter"
+    ):
+        st.session_state[k] = v
+with open("config.yaml") as file:
+    config = yaml.load(file, Loader=SafeLoader)
+    st.session_state["config"] = config
+authenticator = stauth.Authenticate(
+    config["credentials"],
+    config["cookie"]["name"],
+    config["cookie"]["key"],
+    config["cookie"]["expiry_days"],
+    config["preauthorized"],
+)
+st.session_state["authenticator"] = authenticator
+name, authentication_status, username = authenticator.login("Login", "main")
+auth_status = st.session_state.get("authentication_status")
+if auth_status == True:
+    authenticator.logout("Logout", "main")
+    is_state_initiaized = st.session_state.get("initialized", False)
+    if not is_state_initiaized:
+        if 'session_name' not in st.session_state:
+            st.session_state['session_name']=None
+# Deserialize and load the objects from the pickle file
+        with open("data_import.pkl", "rb") as f:
+            data = pickle.load(f)
+        # Accessing the loaded objects
+        final_df_loaded = data["final_df"]
+        bin_dict_loaded = data["bin_dict"]
+        # Initialize session state
+        if "transformed_columns_dict" not in st.session_state:
+            st.session_state["transformed_columns_dict"] = {}  # Default empty dictionary
+        if "final_df" not in st.session_state:
+            st.session_state["final_df"] = final_df_loaded  # Default as original dataframe
+        if "summary_string" not in st.session_state:
+            st.session_state["summary_string"] = None  # Default as None
+# Extract original columns for specified categories
+    original_columns = {
+        category: bin_dict_loaded[category]
+        for category in ["Media", "Internal", "Exogenous"]
+        if category in bin_dict_loaded
+    }
+    # Retrive Panel columns
+    panel_1 = bin_dict_loaded.get("Panel Level 1")
+    panel_2 = bin_dict_loaded.get("Panel Level 2")
+# # For testing on non panel level
+# final_df_loaded = final_df_loaded.drop("Panel_1", axis=1)
+# final_df_loaded = final_df_loaded.groupby("date").mean().reset_index()
+# panel_1 = None
+# Apply transformations on panel level
+    st.write("")
+    if panel_1:
+        panel = panel_1 + panel_2 if panel_2 else panel_1
+    else:
+        panel = []
+    # Function to build transformation widgets
+    def transformation_widgets(category, transform_params, date_granularity):
+        # Transformation Options
+        transformation_options = {
+            "Media": ["Lag", "Moving Average", "Saturation", "Power", "Adstock"],
+            "Internal": ["Lead", "Lag", "Moving Average"],
+            "Exogenous": ["Lead", "Lag", "Moving Average"],
+        }
+        with st.expander(f"{category} Transformations"):
+            # Let users select which transformations to apply
+            transformations_to_apply = st.multiselect(
+                "Select transformations to apply",
+                options=transformation_options[category],
+                default=[],
+                key=f"transformation_{category}",
+            )
+            # Determine the number of transformations to put in each column
+            transformations_per_column = (
+                len(transformations_to_apply) // 2 + len(transformations_to_apply) % 2
+            )
+            # Create two columns
+            col1, col2 = st.columns(2)
+            # Assign transformations to each column
+            transformations_col1 = transformations_to_apply[:transformations_per_column]
+            transformations_col2 = transformations_to_apply[transformations_per_column:]
+            # Define a helper function to create widgets for each transformation
+            def create_transformation_widgets(column, transformations):
+                with column:
+                    for transformation in transformations:
+                        # Conditionally create widgets for selected transformations
+                        if transformation == "Lead":
+                            st.markdown(f"**Lead ({date_granularity})**")
+                            lead = st.slider(
+                                "Lead periods",
+                                1,
+                                10,
+                                (1, 2),
+                                1,
+                                key=f"lead_{category}",
+                                label_visibility="collapsed",
+                            )
+                            start = lead[0]
+                            end = lead[1]
+                            step = 1
+                            transform_params[category]["Lead"] = np.arange(
+                                start, end + step, step
+                            )
+                        if transformation == "Lag":
+                            st.markdown(f"**Lag ({date_granularity})**")
+                            lag = st.slider(
+                                "Lag periods",
+                                1,
+                                10,
+                                (1, 2),
+                                1,
+                                key=f"lag_{category}",
+                                label_visibility="collapsed",
+                            )
+                            start = lag[0]
+                            end = lag[1]
+                            step = 1
+                            transform_params[category]["Lag"] = np.arange(
+                                start, end + step, step
+                            )
+                        if transformation == "Moving Average":
+                            st.markdown(f"**Moving Average ({date_granularity})**")
+                            window = st.slider(
+                                "Window size for Moving Average",
+                                1,
+                                10,
+                                (1, 2),
+                                1,
+                                key=f"ma_{category}",
+                                label_visibility="collapsed",
+                            )
+                            start = window[0]
+                            end = window[1]
+                            step = 1
+                            transform_params[category]["Moving Average"] = np.arange(
+                                start, end + step, step
+                            )
+                        if transformation == "Saturation":
+                            st.markdown("**Saturation (%)**")
+                            saturation_point = st.slider(
+                                f"Saturation Percentage",
+                                0,
+                                100,
+                                (10, 20),
+                                10,
+                                key=f"sat_{category}",
+                                label_visibility="collapsed",
+                            )
+                            start = saturation_point[0]
+                            end = saturation_point[1]
+                            step = 10
+                            transform_params[category]["Saturation"] = np.arange(
+                                start, end + step, step
+                            )
+                        if transformation == "Power":
+                            st.markdown("**Power**")
+                            power = st.slider(
+                                f"Power",
+                                0,
+                                10,
+                                (2, 4),
+                                1,
+                                key=f"power_{category}",
+                                label_visibility="collapsed",
+                            )
+                            start = power[0]
+                            end = power[1]
+                            step = 1
+                            transform_params[category]["Power"] = np.arange(
+                                start, end + step, step
+                            )
+                        if transformation == "Adstock":
+                            st.markdown("**Adstock**")
+                            rate = st.slider(
+                                f"Factor ({category})",
+                                0.0,
+                                1.0,
+                                (0.5, 0.7),
+                                0.05,
+                                key=f"adstock_{category}",
+                                label_visibility="collapsed",
+                            )
+                            start = rate[0]
+                            end = rate[1]
+                            step = 0.05
+                            adstock_range = [
+                                round(a, 3) for a in np.arange(start, end + step, step)
+                            ]
+                            transform_params[category]["Adstock"] = adstock_range
+            # Create widgets in each column
+            create_transformation_widgets(col1, transformations_col1)
+            create_transformation_widgets(col2, transformations_col2)
+    # Function to apply Lag transformation
+    def apply_lag(df, lag):
+        return df.shift(lag)
+    # Function to apply Lead transformation
+    def apply_lead(df, lead):
+        return df.shift(-lead)
+    # Function to apply Moving Average transformation
+    def apply_moving_average(df, window_size):
+        return df.rolling(window=window_size).mean()
+    # Function to apply Saturation transformation
+    def apply_saturation(df, saturation_percent_100):
+        # Convert saturation percentage from 100-based to fraction
+        saturation_percent = saturation_percent_100 / 100.0
+        # Calculate saturation point and steepness
+        column_max = df.max()
+        column_min = df.min()
+        saturation_point = (column_min + column_max) / 2
+        numerator = np.log(
+            (1 / (saturation_percent if saturation_percent != 1 else 1 - 1e-9)) - 1
+        )
+        denominator = np.log(saturation_point / max(column_max, 1e-9))
+        steepness = numerator / max(
+            denominator, 1e-9
+        )  # Avoid division by zero with a small constant
+        # Apply the saturation transformation
+        transformed_series = df.apply(
+            lambda x: (1 / (1 + (saturation_point / x) ** steepness)) * x
+        )
+        return transformed_series
+    # Function to apply Power transformation
+    def apply_power(df, power):
+        return df**power
+    # Function to apply Adstock transformation
+    def apply_adstock(df, factor):
+        x = 0
+        # Use the walrus operator to update x iteratively with the Adstock formula
+        adstock_var = [x := x * factor + v for v in df]
+        ans = pd.Series(adstock_var, index=df.index)
+        return ans
+    # Function to generate transformed columns names
+    @st.cache_resource(show_spinner=False)
+    def generate_transformed_columns(original_columns, transform_params):
+        transformed_columns, summary = {}, {}
+        for category, columns in original_columns.items():
+            for column in columns:
+                transformed_columns[column] = []
+                summary_details = (
+                    []
+                )  # List to hold transformation details for the current column
+                if category in transform_params:
+                    for transformation, values in transform_params[category].items():
+                        # Generate transformed column names for each value
+                        for value in values:
+                            transformed_name = f"{column}@{transformation}_{value}"
+                            transformed_columns[column].append(transformed_name)
+                        # Format the values list as a string with commas and "and" before the last item
+                        if len(values) > 1:
+                            formatted_values = (
+                                ", ".join(map(str, values[:-1])) + " and " + str(values[-1])
+                            )
+                        else:
+                            formatted_values = str(values[0])
+                        # Add transformation details
+                        summary_details.append(f"{transformation} ({formatted_values})")
+                # Only add to summary if there are transformation details for the column
+                if summary_details:
+                    formatted_summary = "⮕ ".join(summary_details)
+                    # Use <strong> tags to make the column name bold
+                    summary[column] = f"<strong>{column}</strong>: {formatted_summary}"
+        # Generate a comprehensive summary string for all columns
+        summary_items = [
+            f"{idx + 1}. {details}" for idx, details in enumerate(summary.values())
+        ]
+        summary_string = "\n".join(summary_items)
+        return transformed_columns, summary_string
+    # Function to apply transformations to DataFrame slices based on specified categories and parameters
+    @st.cache_resource(show_spinner=False)
+    def apply_category_transformations(df, bin_dict, transform_params, panel):
+        # Dictionary for function mapping
+        transformation_functions = {
+            "Lead": apply_lead,
+            "Lag": apply_lag,
+            "Moving Average": apply_moving_average,
+            "Saturation": apply_saturation,
+            "Power": apply_power,
+            "Adstock": apply_adstock,
+        }
+        # Initialize category_df as an empty DataFrame
+        category_df = pd.DataFrame()
+        # Iterate through each category specified in transform_params
+        for category in ["Media", "Internal", "Exogenous"]:
+            if (
+                category not in transform_params
+                or category not in bin_dict
+                or not transform_params[category]
+            ):
+                continue  # Skip categories without transformations
+            # Slice the DataFrame based on the columns specified in bin_dict for the current category
+            df_slice = df[bin_dict[category] + panel]
+            # Iterate through each transformation and its parameters for the current category
+            for transformation, parameters in transform_params[category].items():
+                transformation_function = transformation_functions[transformation]
+                # Check if there is panel data to group by
+                if len(panel) > 0:
+                    # Apply the transformation to each group
+                    category_df = pd.concat(
+                        [
+                            df_slice.groupby(panel)
+                            .transform(transformation_function, p)
+                            .add_suffix(f"@{transformation}_{p}")
+                            for p in parameters
+                        ],
+                        axis=1,
+                    )
+                    # Replace all NaN or null values in category_df with 0
+                    category_df.fillna(0, inplace=True)
+                    # Update df_slice
+                    df_slice = pd.concat(
+                        [df[panel], category_df],
+                        axis=1,
+                    )
+                else:
+                    for p in parameters:
+                        # Apply the transformation function to each column
+                        temp_df = df_slice.apply(
+                            lambda x: transformation_function(x, p), axis=0
+                        ).rename(lambda x: f"{x}@{transformation}_{p}", axis="columns")
+                        # Concatenate the transformed DataFrame slice to the category DataFrame
+                        category_df = pd.concat([category_df, temp_df], axis=1)
+                    # Replace all NaN or null values in category_df with 0
+                    category_df.fillna(0, inplace=True)
+                    # Update df_slice
+                    df_slice = pd.concat(
+                        [df[panel], category_df],
+                        axis=1,
+                    )
+        # If category_df has been modified, concatenate it with the panel and response metrics from the original DataFrame
+        if not category_df.empty:
+            final_df = pd.concat([df, category_df], axis=1)
+        else:
+            # If no transformations were applied, use the original DataFrame
+            final_df = df
+        return final_df
+    # Function to infers the granularity of the date column in a DataFrame
+    @st.cache_resource(show_spinner=False)
+    def infer_date_granularity(df):
+        # Find the most common difference
+        common_freq = pd.Series(df["date"].unique()).diff().dt.days.dropna().mode()[0]
+        # Map the most common difference to a granularity
+        if common_freq == 1:
+            return "daily"
+        elif common_freq == 7:
+            return "weekly"
+        elif 28 <= common_freq <= 31:
+            return "monthly"
+        else:
+            return "irregular"
+    #########################################################################################################################################################
+    # User input for transformations
+    #########################################################################################################################################################
+    # Infer date granularity
+    date_granularity = infer_date_granularity(final_df_loaded)
+    # Initialize the main dictionary to store the transformation parameters for each category
+    transform_params = {"Media": {}, "Internal": {}, "Exogenous": {}}
+    # User input for transformations
+    st.markdown("### Select Transformations to Apply")
+    for category in ["Media", "Internal", "Exogenous"]:
+        # Skip Internal
+        if category == "Internal":
+            continue
+        transformation_widgets(category, transform_params, date_granularity)
+    #########################################################################################################################################################
+    # Apply transformations
+    #########################################################################################################################################################
+    # Apply category-based transformations to the DataFrame
+    if st.button("Accept and Proceed", use_container_width=True):
+        with st.spinner("Applying transformations..."):
+            final_df = apply_category_transformations(
+                final_df_loaded, bin_dict_loaded, transform_params, panel
+            )
+            # Generate a dictionary mapping original column names to lists of transformed column names
+            transformed_columns_dict, summary_string = generate_transformed_columns(
+                original_columns, transform_params
+            )
+            # Store into transformed dataframe and summary session state
+            st.session_state["final_df"] = final_df
+            st.session_state["summary_string"] = summary_string
+    #########################################################################################################################################################
+    # Display the transformed DataFrame and summary
+    #########################################################################################################################################################
+    # Display the transformed DataFrame in the Streamlit app
+    st.markdown("### Transformed DataFrame")
+    st.dataframe(st.session_state["final_df"], hide_index=True)
+    # Total rows and columns
+    total_rows, total_columns = st.session_state["final_df"].shape
+    st.markdown(
+        f"<p style='text-align: justify;'>The transformed DataFrame contains <strong>{total_rows}</strong> rows and <strong>{total_columns}</strong> columns.</p>",
+        unsafe_allow_html=True,
+    )
+    # Display the summary of transformations as markdown
+    if st.session_state["summary_string"]:
+        with st.expander("Summary of Transformations"):
+            st.markdown("### Summary of Transformations")
+            st.markdown(st.session_state["summary_string"], unsafe_allow_html=True)
+    @st.cache_resource(show_spinner=False)
+    def save_to_pickle(file_path, final_df):
+        # Open the file in write-binary mode and dump the objects
+        with open(file_path, "wb") as f:
+            pickle.dump({"final_df_transformed": final_df}, f)
+            # Data is now saved to file
+    if st.button("Accept and Save", use_container_width=True):
+        save_to_pickle(
+            "final_df_transformed.pkl", st.session_state["final_df"]
+        )
+        st.toast("💾 Saved Successfully!")

pages/4_Model_Build.py ADDED Viewed

	@@ -0,0 +1,826 @@

+'''
+MMO Build Sprint 3
+additions : adding more variables to session state for saved model : random effect, predicted train & test
+MMO Build Sprint 4
+additions : ability to run models for different response metrics
+'''
+import streamlit as st
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+from Eda_functions import format_numbers
+import numpy as np
+import pickle
+from st_aggrid import AgGrid
+from st_aggrid import GridOptionsBuilder, GridUpdateMode
+from utilities import set_header, load_local_css
+from st_aggrid import GridOptionsBuilder
+import time
+import itertools
+import statsmodels.api as sm
+import numpy as npc
+import re
+import itertools
+from sklearn.metrics import mean_absolute_error, r2_score, mean_absolute_percentage_error
+from sklearn.preprocessing import MinMaxScaler
+import os
+import matplotlib.pyplot as plt
+from statsmodels.stats.outliers_influence import variance_inflation_factor
+st.set_option('deprecation.showPyplotGlobalUse', False)
+import statsmodels.api as sm
+import statsmodels.formula.api as smf
+from datetime import datetime
+import seaborn as sns
+from Data_prep_functions import *
+def get_random_effects(media_data, panel_col, mdf):
+    random_eff_df = pd.DataFrame(columns=[panel_col, "random_effect"])
+    for i, market in enumerate(media_data[panel_col].unique()):
+        print(i, end='\r')
+        intercept = mdf.random_effects[market].values[0]
+        random_eff_df.loc[i, 'random_effect'] = intercept
+        random_eff_df.loc[i, panel_col] = market
+    return random_eff_df
+def mdf_predict(X_df, mdf, random_eff_df):
+    X = X_df.copy()
+    X['fixed_effect'] = mdf.predict(X)
+    X = pd.merge(X, random_eff_df, on=panel_col, how='left')
+    X['pred'] = X['fixed_effect'] + X['random_effect']
+    # X.to_csv('Test/megred_df.csv',index=False)
+    X.drop(columns=['fixed_effect', 'random_effect'], inplace=True)
+    return X['pred']
+st.set_page_config(
+    page_title="Model Build",
+    page_icon=":shark:",
+    layout="wide",
+    initial_sidebar_state='collapsed'
+)
+load_local_css('styles.css')
+set_header()
+st.title('1. Build Your Model')
+with open("data_import.pkl", "rb") as f:
+    data = pickle.load(f)
+    st.session_state['bin_dict'] = data["bin_dict"]
+#st.write(data["bin_dict"])
+with open("final_df_transformed.pkl", "rb") as f:
+    data = pickle.load(f)
+# Accessing the loaded objects
+    media_data = data["final_df_transformed"]
+# Sprint4 - available response metrics is a list of all reponse metrics in the data
+## these will be put in a drop down
+    st.session_state['media_data']=media_data
+if 'available_response_metrics' not in st.session_state:
+    # st.session_state['available_response_metrics'] = ['Total Approved Accounts - Revenue',
+    #                                                   'Total Approved Accounts - Appsflyer',
+    #                                                   'Account Requests - Appsflyer',
+    #                                                   'App Installs - Appsflyer']
+    st.session_state['available_response_metrics']= st.session_state['bin_dict']["Response Metrics"]
+# Sprint4
+if "is_tuned_model" not in st.session_state:
+    st.session_state["is_tuned_model"] = {}
+for resp_metric in st.session_state['available_response_metrics'] :
+    resp_metric=resp_metric.lower().replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_")
+    st.session_state["is_tuned_model"][resp_metric] = False
+# Sprint4 - used_response_metrics is a list of resp metrics for which user has created & saved a model
+if 'used_response_metrics' not in st.session_state:
+    st.session_state['used_response_metrics'] = []
+# Sprint4 - saved_model_names
+if 'saved_model_names' not in st.session_state:
+    st.session_state['saved_model_names'] = []
+# if "model_save_flag" not in st.session_state:
+#     st.session_state["model_save_flag"]=False
+# def reset_save():
+#     st.session_state["model_save_flag"]=False
+# def set_save():
+#     st.session_state["model_save_flag"]=True
+# Sprint4 - select a response metric
+sel_target_col = st.selectbox("Select the response metric",
+                              st.session_state['available_response_metrics'])
+ # , on_change=reset_save())
+target_col = sel_target_col.lower().replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_")
+new_name_dct={col:col.lower().replace('.','_').lower().replace('@','_').replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for col in media_data.columns}
+media_data.columns=[col.lower().replace('.','_').replace('@','_').replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for col in media_data.columns]
+#st.write(st.session_state['bin_dict'])
+panel_col = [col.lower().replace('.','_').replace('@','_').replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for col in  st.session_state['bin_dict']['Panel Level 1']  ] [0]# set the panel column
+date_col = 'date'
+#st.write(media_data)
+is_panel = True if len(panel_col)>0 else False
+if 'is_panel' not in st.session_state:
+    st.session_state['is_panel']=False
+# if st.toggle('Apply Transformations on DMA/Panel Level'):
+#     media_data = pd.read_csv(r'C:\Users\SrishtiVerma\Mastercard\Sprint2\upf_data_converted_randomized_resp_metrics.csv')
+#     media_data.columns = [i.lower().replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for i in
+#                           media_data.columns]
+#     dma = st.selectbox('Select the Level of data ',
+#                        [col for col in media_data.columns if col.lower() in ['dma', 'panel', 'markets']])
+#     # is_panel = True
+#     # st.session_state['is_panel']=True
+#
+# else:
+#     # """ code to aggregate data on date """
+#     media_data = pd.read_excel(r'C:\Users\SrishtiVerma\Mastercard\Sprint1\Tactic Level Models\Tactic_level_data_imp_clicks_spends.xlsx')
+#     media_data.columns = [i.lower().replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for i in
+#                           media_data.columns]
+#     dma = None
+#     # is_panel = False
+#     # st.session_state['is_panel']=False
+#media_data = st.session_state["final_df"]
+# st.write(media_data.columns)
+media_data.sort_values(date_col, inplace=True)
+media_data.reset_index(drop=True, inplace=True)
+date = media_data[date_col]
+st.session_state['date'] = date
+# revenue=media_data[target_col]
+y = media_data[target_col]
+if is_panel:
+    spends_data = media_data[
+        [c for c in media_data.columns if "_cost" in c.lower() or "_spend" in c.lower()] + [date_col, panel_col]]
+    # Sprint3 - spends for resp curves
+else:
+    spends_data = media_data[
+        [c for c in media_data.columns if "_cost" in c.lower() or "_spend" in c.lower()] + [date_col]]
+y = media_data[target_col]
+# media_data.drop([target_col],axis=1,inplace=True)
+media_data.drop([date_col], axis=1, inplace=True)
+media_data.reset_index(drop=True, inplace=True)
+# dma_dict={ dm:media_data[media_data[dma]==dm] for dm in media_data[dma].unique()}
+# st.markdown('## Select the Range of Transformations')
+columns = st.columns(2)
+old_shape = media_data.shape
+if "old_shape" not in st.session_state:
+    st.session_state['old_shape'] = old_shape
+# with columns[0]:
+#     slider_value_adstock = st.slider('Select Adstock Range (only applied to media)', 0.0, 1.0, (0.2, 0.4), step=0.1,
+#                                      format="%.2f")
+# with columns[1]:
+#     slider_value_lag = st.slider('Select Lag Range (applied to media, seasonal, macroeconomic variables)', 1, 7, (1, 3),
+#                                  step=1)
+# with columns[2]:
+#    slider_value_power=st.slider('Select Power range (only applied to media )',0,4,(1,2),step=1)
+# with columns[1]:
+#    st.number_input('Select the range of half saturation point ',min_value=1,max_value=5)
+#    st.number_input('Select the range of  ')
+# Section 1 - Transformations Functions
+# def lag(data, features, lags, dma=None):
+#     if dma:
+#
+#         transformed_data = pd.concat(
+#             [data.groupby([dma])[features].shift(lag).add_suffix(f'_lag_{lag}') for lag in lags], axis=1)
+#         # transformed_data = transformed_data.fillna(method='bfill')
+#         transformed_data = transformed_data.bfill() # Sprint4 - fillna getting deprecated
+#         return pd.concat([transformed_data, data], axis=1)
+#
+#     else:
+#
+#         # ''' data should be aggregated on date'''
+#
+#         transformed_data = pd.concat([data[features].shift(lag).add_suffix(f'_lag_{lag}') for lag in lags], axis=1)
+#         # transformed_data = transformed_data.fillna(method='bfill')
+#         transformed_data = transformed_data.bfill()
+#
+#         return pd.concat([transformed_data, data], axis=1)
+#
+#
+# # adstock
+# def adstock(df, alphas, cutoff, features, dma=None):
+#     if dma:
+#         transformed_data = pd.DataFrame()
+#         for d in df[dma].unique():
+#             dma_sub_df = df[df[dma] == d]
+#             n = len(dma_sub_df)
+#
+#             weights = np.array(
+#                 [[[alpha ** (i - j) if i >= j and j >= i - cutoff else 0. for j in range(n)] for i in range(n)] for
+#                  alpha in alphas])
+#             X = dma_sub_df[features].to_numpy()
+#
+#             res = pd.DataFrame(np.hstack(weights @ X),
+#                                columns=[f'{col}_adstock_{alpha}' for alpha in alphas for col in features])
+#
+#             transformed_data = pd.concat([transformed_data, res], axis=0)
+#             transformed_data.reset_index(drop=True, inplace=True)
+#         return pd.concat([transformed_data, df], axis=1)
+#
+#     else:
+#
+#         n = len(df)
+#
+#         weights = np.array(
+#             [[[alpha ** (i - j) if i >= j and j >= i - cutoff else 0. for j in range(n)] for i in range(n)] for alpha in
+#              alphas])
+#
+#         X = df[features].to_numpy()
+#         res = pd.DataFrame(np.hstack(weights @ X),
+#                            columns=[f'{col}_adstock_{alpha}' for alpha in alphas for col in features])
+#         return pd.concat([res, df], axis=1)
+# Section 2 - Begin Transformations
+if 'media_data' not in st.session_state:
+    st.session_state['media_data'] = pd.DataFrame()
+# Sprint3
+if "orig_media_data" not in st.session_state:
+    st.session_state['orig_media_data'] = pd.DataFrame()
+# Sprint3 additions
+if 'random_effects' not in st.session_state:
+    st.session_state['random_effects'] = pd.DataFrame()
+if 'pred_train' not in st.session_state:
+    st.session_state['pred_train'] = []
+if 'pred_test' not in st.session_state:
+    st.session_state['pred_test'] = []
+# end of Sprint3 additions
+# variables_to_be_transformed=[col for col in media_data.columns if col.lower() not in ['dma','panel'] ] # change for buckets
+# variables_to_be_transformed = [col for col in media_data.columns if
+#                                '_clicks' in col.lower() or '_impress' in col.lower()]  # srishti - change
+#
+# with columns[0]:
+#     if st.button('Apply Transformations'):
+#         with st.spinner('Applying Transformations'):
+#             transformed_data_lag = lag(media_data, features=variables_to_be_transformed,
+#                                        lags=np.arange(slider_value_lag[0], slider_value_lag[1] + 1, 1), dma=dma)
+#
+#             # variables_to_be_transformed=[col for col in list(transformed_data_lag.columns) if col not in ['Date','DMA','Panel']] #change for buckets
+#             variables_to_be_transformed = [col for col in media_data.columns if
+#                                            '_clicks' in col.lower() or '_impress' in col.lower()]  # srishti - change
+#
+#             transformed_data_adstock = adstock(df=transformed_data_lag,
+#                                                alphas=np.arange(slider_value_adstock[0], slider_value_adstock[1], 0.1),
+#                                                cutoff=8, features=variables_to_be_transformed, dma=dma)
+#
+#             # st.success('Done')
+#             st.success("Transformations complete!")
+#
+#             st.write(f'old shape {old_shape}, new shape {transformed_data_adstock.shape}')
+#
+#             transformed_data_adstock.columns = [c.replace(".", "_") for c in
+#                                                 transformed_data_adstock.columns]  # srishti
+#             st.session_state['media_data'] = transformed_data_adstock  # srishti
+#             # Sprint3
+#             orig_media_data = media_data.copy()
+#             orig_media_data[date_col] = date
+#             orig_media_data[target_col] = y
+#             st.session_state['orig_media_data'] = orig_media_data  # srishti
+#
+#         # with st.spinner('Applying Transformations'):
+#         #   time.sleep(2)
+#         #   st.success("Transformations complete!")
+#
+# # if st.session_state['media_data'].shape[1]>old_shape[1]:
+# # with columns[0]:
+# # st.write(f'Total no.of variables before transformation: {old_shape[1]}, Total no.of variables after transformation: {st.session_state["media_data"].shape[1]}')
+# # st.write(f'Total no.of variables after transformation: {st.session_state["media_data"].shape[1]}')
+# Section 3 - Create combinations
+# bucket=['paid_search', 'kwai','indicacao','infleux', 'influencer','FB: Level Achieved - Tier 1 Impressions',
+#       ' FB: Level Achieved - Tier 2 Impressions','paid_social_others',
+#         ' GA App: Will And Cid Pequena Baixo Risco Clicks',
+#       'digital_tactic_others',"programmatic"
+#       ]
+# srishti - bucket names changed
+bucket = ['paid_search', 'kwai', 'indicacao', 'infleux', 'influencer', 'fb_level_achieved_tier_2',
+          'fb_level_achieved_tier_1', 'paid_social_others',
+          'ga_app',
+          'digital_tactic_others', "programmatic"
+          ]
+with columns[0]:
+    if st.button('Create Combinations of Variables'):
+        top_3_correlated_features = []
+        # # for col in st.session_state['media_data'].columns[:19]:
+        # original_cols = [c for c in st.session_state['media_data'].columns if
+        #                  "_clicks" in c.lower() or "_impressions" in c.lower()]
+        #original_cols = [c for c in original_cols if "_lag" not in c.lower() and "_adstock" not in c.lower()]
+        original_cols=st.session_state['bin_dict']['Media'] + st.session_state['bin_dict']['Internal']
+        original_cols=[col.lower().replace('.','_').replace('@','_').replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for col in original_cols]
+        #st.write(original_cols)
+        # for col in st.session_state['media_data'].columns[:19]:
+        for col in original_cols:  # srishti - new
+            corr_df = pd.concat([st.session_state['media_data'].filter(regex=col),
+                                 y], axis=1).corr()[target_col].iloc[:-1]
+            top_3_correlated_features.append(list(corr_df.sort_values(ascending=False).head(2).index))
+        flattened_list = [item for sublist in top_3_correlated_features for item in sublist]
+        # all_features_set={var:[col for col in flattened_list if var in col] for var in bucket}
+        all_features_set = {var: [col for col in flattened_list if var in col] for var in bucket if
+                            len([col for col in flattened_list if var in col]) > 0}  # srishti
+        channels_all = [values for values in all_features_set.values()]
+        st.session_state['combinations'] = list(itertools.product(*channels_all))
+        # if 'combinations' not in st.session_state:
+        #   st.session_state['combinations']=combinations_all
+        st.session_state['final_selection'] = st.session_state['combinations']
+        st.success('Done')
+        # revenue.reset_index(drop=True,inplace=True)
+    y.reset_index(drop=True, inplace=True)
+    if 'Model_results' not in st.session_state:
+        st.session_state['Model_results'] = {'Model_object': [],
+                                             'Model_iteration': [],
+                                             'Feature_set': [],
+                                             'MAPE': [],
+                                             'R2': [],
+                                             'ADJR2': [],
+                                             'pos_count': []
+                                             }
+    def reset_model_result_dct():
+        st.session_state['Model_results'] = {'Model_object': [],
+                                             'Model_iteration': [],
+                                             'Feature_set': [],
+                                             'MAPE': [],
+                                             'R2': [],
+                                             'ADJR2': [],
+                                             'pos_count': []
+                                             }
+        # if st.button('Build Model'):
+    if 'iterations' not in st.session_state:
+        st.session_state['iterations'] = 0
+    if 'final_selection' not in st.session_state:
+        st.session_state['final_selection'] = False
+save_path = r"Model/"
+with columns[1]:
+    if st.session_state['final_selection']:
+        st.write(f'Total combinations created {format_numbers(len(st.session_state["final_selection"]))}')
+if st.checkbox('Build all iterations'):
+    iterations = len(st.session_state['final_selection'])
+else:
+    iterations = st.number_input('Select the number of iterations to perform', min_value=0, step=100,
+                                 value=st.session_state['iterations'], on_change=reset_model_result_dct)
+#  st.write("iterations=", iterations)
+if st.button('Build Model', on_click=reset_model_result_dct):
+    st.session_state['iterations'] = iterations
+    # Section 4 - Model
+    # st.session_state['media_data'] = st.session_state['media_data'].fillna(method='ffill')
+    st.session_state['media_data'] = st.session_state['media_data'].ffill()
+    st.markdown(
+        'Data Split -- Training Period: May 9th, 2023 - October 5th,2023 , Testing Period: October 6th, 2023 - November 7th, 2023 ')
+    progress_bar = st.progress(0)  # Initialize the progress bar
+    # time_remaining_text = st.empty()  # Create an empty space for time remaining text
+    start_time = time.time()  # Record the start time
+    progress_text = st.empty()
+    # time_elapsed_text = st.empty()
+    # for i, selected_features in enumerate(st.session_state["final_selection"][40000:40000 + int(iterations)]):
+    # st.write(st.session_state["final_selection"])
+    # for i, selected_features in enumerate(st.session_state["final_selection"]):
+    if is_panel == True:
+        for i, selected_features in enumerate(st.session_state["final_selection"][0:int(iterations)]):  # srishti
+            df = st.session_state['media_data']
+            fet = [var for var in selected_features if len(var) > 0]
+            inp_vars_str = " + ".join(fet)  # new
+            X = df[fet]
+            y = df[target_col]
+            ss = MinMaxScaler()
+            X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
+            X[target_col] = y  # Sprint2
+            X[panel_col] = df[panel_col]  # Sprint2
+            X_train = X.iloc[:8000]
+            X_test = X.iloc[8000:]
+            y_train = y.iloc[:8000]
+            y_test = y.iloc[8000:]
+            print(X_train.shape)
+            # model = sm.OLS(y_train, X_train).fit()
+            md_str = target_col + " ~ " + inp_vars_str
+            # md = smf.mixedlm("total_approved_accounts_revenue ~ {}".format(inp_vars_str),
+            #                 data=X_train[[target_col] + fet],
+            #                 groups=X_train[panel_col])
+            md = smf.mixedlm(md_str,
+                             data=X_train[[target_col] + fet],
+                             groups=X_train[panel_col])
+            mdf = md.fit()
+            predicted_values = mdf.fittedvalues
+            coefficients = mdf.fe_params.to_dict()
+            model_positive = [col for col in coefficients.keys() if coefficients[col] > 0]
+            pvalues = [var for var in list(mdf.pvalues) if var <= 0.06]
+            if (len(model_positive) / len(selected_features)) > 0 and (
+                    len(pvalues) / len(selected_features)) >= 0:  # srishti - changed just for testing, revert later
+                # predicted_values = model.predict(X_train)
+                mape = mean_absolute_percentage_error(y_train, predicted_values)
+                r2 = r2_score(y_train, predicted_values)
+                adjr2 = 1 - (1 - r2) * (len(y_train) - 1) / (len(y_train) - len(selected_features) - 1)
+                filename = os.path.join(save_path, f"model_{i}.pkl")
+                with open(filename, "wb") as f:
+                    pickle.dump(mdf, f)
+                # with open(r"C:\Users\ManojP\Documents\MMM\simopt\Model\model.pkl", 'rb') as file:
+                #   model = pickle.load(file)
+                st.session_state['Model_results']['Model_object'].append(filename)
+                st.session_state['Model_results']['Model_iteration'].append(i)
+                st.session_state['Model_results']['Feature_set'].append(fet)
+                st.session_state['Model_results']['MAPE'].append(mape)
+                st.session_state['Model_results']['R2'].append(r2)
+                st.session_state['Model_results']['pos_count'].append(len(model_positive))
+                st.session_state['Model_results']['ADJR2'].append(adjr2)
+            current_time = time.time()
+            time_taken = current_time - start_time
+            time_elapsed_minutes = time_taken / 60
+            completed_iterations_text = f"{i + 1}/{iterations}"
+            progress_bar.progress((i + 1) / int(iterations))
+            progress_text.text(
+                f'Completed iterations: {completed_iterations_text},Time Elapsed (min): {time_elapsed_minutes:.2f}')
+        st.write(
+            f'Out of {st.session_state["iterations"]} iterations : {len(st.session_state["Model_results"]["Model_object"])} valid models')
+    else:
+        for i, selected_features in enumerate(st.session_state["final_selection"][0:int(iterations)]):  # srishti
+            df = st.session_state['media_data']
+            fet = [var for var in selected_features if len(var) > 0]
+            inp_vars_str = " + ".join(fet)
+            X = df[fet]
+            y = df[target_col]
+            ss = MinMaxScaler()
+            X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
+            X = sm.add_constant(X)
+            X_train = X.iloc[:130]
+            X_test = X.iloc[130:]
+            y_train = y.iloc[:130]
+            y_test = y.iloc[130:]
+            model = sm.OLS(y_train, X_train).fit()
+            coefficients = model.params.to_list()
+            model_positive = [coef for coef in coefficients if coef > 0]
+            predicted_values = model.predict(X_train)
+            pvalues = [var for var in list(model.pvalues) if var <= 0.06]
+            # if (len(model_possitive) / len(selected_features)) > 0.9 and (len(pvalues) / len(selected_features)) >= 0.8:
+            if (len(model_positive) / len(selected_features)) > 0 and (len(pvalues) / len(
+                    selected_features)) >= 0.5:  # srishti - changed just for testing, revert later VALID MODEL CRITERIA
+                # predicted_values = model.predict(X_train)
+                mape = mean_absolute_percentage_error(y_train, predicted_values)
+                adjr2 = model.rsquared_adj
+                r2 = model.rsquared
+                filename = os.path.join(save_path, f"model_{i}.pkl")
+                with open(filename, "wb") as f:
+                    pickle.dump(model, f)
+                # with open(r"C:\Users\ManojP\Documents\MMM\simopt\Model\model.pkl", 'rb') as file:
+                #   model = pickle.load(file)
+                st.session_state['Model_results']['Model_object'].append(filename)
+                st.session_state['Model_results']['Model_iteration'].append(i)
+                st.session_state['Model_results']['Feature_set'].append(fet)
+                st.session_state['Model_results']['MAPE'].append(mape)
+                st.session_state['Model_results']['R2'].append(r2)
+                st.session_state['Model_results']['ADJR2'].append(adjr2)
+                st.session_state['Model_results']['pos_count'].append(len(model_positive))
+            current_time = time.time()
+            time_taken = current_time - start_time
+            time_elapsed_minutes = time_taken / 60
+            completed_iterations_text = f"{i + 1}/{iterations}"
+            progress_bar.progress((i + 1) / int(iterations))
+            progress_text.text(
+                f'Completed iterations: {completed_iterations_text},Time Elapsed (min): {time_elapsed_minutes:.2f}')
+        st.write(
+            f'Out of {st.session_state["iterations"]} iterations : {len(st.session_state["Model_results"]["Model_object"])} valid models')
+    pd.DataFrame(st.session_state['Model_results']).to_csv('model_output.csv')
+    def to_percentage(value):
+        return f'{value * 100:.1f}%'
+## Section 5 - Select Model
+st.title('2. Select Models')
+if 'tick' not in st.session_state:
+    st.session_state['tick'] = False
+if st.checkbox('Show results of top 10 models (based on MAPE and Adj. R2)', value=st.session_state['tick']):
+    st.session_state['tick'] = True
+    st.write('Select one model iteration to generate performance metrics for it:')
+    data = pd.DataFrame(st.session_state['Model_results'])
+    data = data[data['pos_count']==data['pos_count'].max()].reset_index(drop=True) # Sprint4 -- Srishti -- only show models with the lowest num of neg coeffs
+    data.sort_values(by=['ADJR2'], ascending=False, inplace=True)
+    data.drop_duplicates(subset='Model_iteration', inplace=True)
+    top_10 = data.head(10)
+    top_10['Rank'] = np.arange(1, len(top_10) + 1, 1)
+    top_10[['MAPE', 'R2', 'ADJR2']] = np.round(top_10[['MAPE', 'R2', 'ADJR2']], 4).applymap(to_percentage)
+    top_10_table = top_10[['Rank', 'Model_iteration', 'MAPE', 'ADJR2', 'R2']]
+    # top_10_table.columns=[['Rank','Model Iteration Index','MAPE','Adjusted R2','R2']]
+    gd = GridOptionsBuilder.from_dataframe(top_10_table)
+    gd.configure_pagination(enabled=True)
+    gd.configure_selection(
+        use_checkbox=True,
+        selection_mode="single",
+        pre_select_all_rows=False,
+        pre_selected_rows=[1],
+    )
+    gridoptions = gd.build()
+    table = AgGrid(top_10, gridOptions=gridoptions, update_mode=GridUpdateMode.SELECTION_CHANGED)
+    selected_rows = table.selected_rows
+    # if st.session_state["selected_rows"] != selected_rows:
+    #   st.session_state["build_rc_cb"] = False
+    st.session_state["selected_rows"] = selected_rows
+    if 'Model' not in st.session_state:
+        st.session_state['Model'] = {}
+    # Section 6 - Display Results
+    if len(selected_rows) > 0:
+        st.header('2.1 Results Summary')
+        model_object = data[data['Model_iteration'] == selected_rows[0]['Model_iteration']]['Model_object']
+        features_set = data[data['Model_iteration'] == selected_rows[0]['Model_iteration']]['Feature_set']
+        with open(str(model_object.values[0]), 'rb') as file:
+            # print(file)
+            model = pickle.load(file)
+        st.write(model.summary())
+        st.header('2.2 Actual vs. Predicted Plot')
+        if is_panel :
+            df = st.session_state['media_data']
+            X = df[features_set.values[0]]
+            y = df[target_col]
+            ss = MinMaxScaler()
+            X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
+            # Sprint2 changes
+            X[target_col] = y  # new
+            X[panel_col] = df[panel_col]
+            X[date_col] = date
+            X_train = X.iloc[:8000]
+            X_test = X.iloc[8000:].reset_index(drop=True)
+            y_train = y.iloc[:8000]
+            y_test = y.iloc[8000:].reset_index(drop=True)
+            test_spends = spends_data[8000:]  # Sprint3 - test spends for resp curves
+            random_eff_df = get_random_effects(media_data, panel_col, model)
+            train_pred = model.fittedvalues
+            test_pred = mdf_predict(X_test, model, random_eff_df)
+            print("__" * 20, test_pred.isna().sum())
+        else :
+            df = st.session_state['media_data']
+            X = df[features_set.values[0]]
+            y = df[target_col]
+            ss = MinMaxScaler()
+            X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
+            X = sm.add_constant(X)
+            X[date_col] = date
+            X_train = X.iloc[:130]
+            X_test = X.iloc[130:].reset_index(drop=True)
+            y_train = y.iloc[:130]
+            y_test = y.iloc[130:].reset_index(drop=True)
+            test_spends = spends_data[130:]  # Sprint3 - test spends for resp curves
+            train_pred = model.predict(X_train[features_set.values[0]+['const']])
+            test_pred = model.predict(X_test[features_set.values[0]+['const']])
+        # save x test to test - srishti
+        x_test_to_save = X_test.copy()
+        x_test_to_save['Actuals'] = y_test
+        x_test_to_save['Predictions'] = test_pred
+        x_train_to_save = X_train.copy()
+        x_train_to_save['Actuals'] = y_train
+        x_train_to_save['Predictions'] = train_pred
+        x_train_to_save.to_csv('Test/x_train_to_save.csv', index=False)
+        x_test_to_save.to_csv('Test/x_test_to_save.csv', index=False)
+        st.session_state['X'] = X_train
+        st.session_state['features_set'] = features_set.values[0]
+        print("**" * 20, "selected model features : ", features_set.values[0])
+        metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(X_train[date_col], y_train, train_pred,
+                                                                                 model, target_column=sel_target_col,
+                                                                                 is_panel=is_panel)  # Sprint2
+        st.plotly_chart(actual_vs_predicted_plot, use_container_width=True)
+        st.markdown('## 2.3 Residual Analysis')
+        columns = st.columns(2)
+        with columns[0]:
+            fig = plot_residual_predicted(y_train, train_pred, X_train)  # Sprint2
+            st.plotly_chart(fig)
+        with columns[1]:
+            st.empty()
+            fig = qqplot(y_train, train_pred)  # Sprint2
+            st.plotly_chart(fig)
+        with columns[0]:
+            fig = residual_distribution(y_train, train_pred)  # Sprint2
+            st.pyplot(fig)
+        vif_data = pd.DataFrame()
+        # X=X.drop('const',axis=1)
+        X_train_orig = X_train.copy()  # Sprint2 -- creating a copy of xtrain. Later deleting panel, target & date from xtrain
+        del_col_list = list(set([target_col, panel_col, date_col]).intersection(list(X_train.columns)))
+        X_train.drop(columns=del_col_list, inplace=True)  # Sprint2
+        vif_data["Variable"] = X_train.columns
+        vif_data["VIF"] = [variance_inflation_factor(X_train.values, i) for i in range(X_train.shape[1])]
+        vif_data.sort_values(by=['VIF'], ascending=False, inplace=True)
+        vif_data = np.round(vif_data)
+        vif_data['VIF'] = vif_data['VIF'].astype(float)
+        st.header('2.4 Variance Inflation Factor (VIF)')
+        # st.dataframe(vif_data)
+        color_mapping = {
+            'darkgreen': (vif_data['VIF'] < 3),
+            'orange': (vif_data['VIF'] >= 3) & (vif_data['VIF'] <= 10),
+            'darkred': (vif_data['VIF'] > 10)
+        }
+        # Create a horizontal bar plot
+        fig, ax = plt.subplots()
+        fig.set_figwidth(10)  # Adjust the width of the figure as needed
+        # Sort the bars by descending VIF values
+        vif_data = vif_data.sort_values(by='VIF', ascending=False)
+        # Iterate through the color mapping and plot bars with corresponding colors
+        for color, condition in color_mapping.items():
+            subset = vif_data[condition]
+            bars = ax.barh(subset["Variable"], subset["VIF"], color=color, label=color)
+            # Add text annotations on top of the bars
+            for bar in bars:
+                width = bar.get_width()
+                ax.annotate(f'{width:}', xy=(width, bar.get_y() + bar.get_height() / 2), xytext=(5, 0),
+                            textcoords='offset points', va='center')
+        # Customize the plot
+        ax.set_xlabel('VIF Values')
+        # ax.set_title('2.4 Variance Inflation Factor (VIF)')
+        # ax.legend(loc='upper right')
+        # Display the plot in Streamlit
+        st.pyplot(fig)
+        with st.expander('Results Summary Test data'):
+            # ss = MinMaxScaler()
+            # X_test = pd.DataFrame(ss.fit_transform(X_test), columns=X_test.columns)
+            st.header('2.2 Actual vs. Predicted Plot')
+            metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(X_test[date_col], y_test,
+                                                                                     test_pred, model,
+                                                                                     target_column=sel_target_col,
+                                                                                     is_panel=is_panel)  # Sprint2
+            st.plotly_chart(actual_vs_predicted_plot, use_container_width=True)
+            st.markdown('## 2.3 Residual Analysis')
+            columns = st.columns(2)
+            with columns[0]:
+                fig = plot_residual_predicted(y, test_pred, X_test)  # Sprint2
+                st.plotly_chart(fig)
+            with columns[1]:
+                st.empty()
+                fig = qqplot(y, test_pred)  # Sprint2
+                st.plotly_chart(fig)
+            with columns[0]:
+                fig = residual_distribution(y, test_pred)  # Sprint2
+                st.pyplot(fig)
+        value = False
+        save_button_model = st.checkbox('Save this model to tune', key='build_rc_cb')  # , on_click=set_save())
+        if save_button_model:
+            mod_name = st.text_input('Enter model name')
+            if len(mod_name) > 0:
+                mod_name = mod_name + "__" + target_col  # Sprint4 - adding target col to model name
+                if is_panel :
+                    pred_train= model.fittedvalues
+                    pred_test= mdf_predict(X_test, model, random_eff_df)
+                else :
+                    st.session_state['features_set'] = st.session_state['features_set'] + ['const']
+                    pred_train= model.predict(X_train_orig[st.session_state['features_set']])
+                    pred_test= model.predict(X_test[st.session_state['features_set']])
+                st.session_state['Model'][mod_name] = {"Model_object": model,
+                                                       'feature_set': st.session_state['features_set'],
+                                                       'X_train': X_train_orig,
+                                                       'X_test': X_test,
+                                                       'y_train': y_train,
+                                                       'y_test': y_test,
+                                                       'pred_train':pred_train,
+                                                       'pred_test': pred_test
+                                                       }
+                st.session_state['X_train'] = X_train_orig
+                # st.session_state['X_test'] = X_test
+                # st.session_state['y_train'] = y_train
+                # st.session_state['y_test'] = y_test
+                st.session_state['X_test_spends'] = test_spends
+                # st.session_state['base_model'] = model
+                # st.session_state['base_model_feature_set'] = st.session_state['features_set']
+                st.session_state['saved_model_names'].append(mod_name)
+                # Sprint3 additions
+                if is_panel :
+                    random_eff_df = get_random_effects(media_data, panel_col, model)
+                    st.session_state['random_effects'] = random_eff_df
+                # st.session_state['pred_train'] = model.fittedvalues
+                # st.session_state['pred_test'] = mdf_predict(X_test, model, random_eff_df)
+                # # End of Sprint3 additions
+                with open("best_models.pkl", "wb") as f:
+                    pickle.dump(st.session_state['Model'], f)
+                    st.success(mod_name + ' model saved! Proceed to the next page to tune the model')
+                    urm = st.session_state['used_response_metrics']
+                    urm.append(sel_target_col)
+                    st.session_state['used_response_metrics'] = list(set(urm))
+                    mod_name = ""
+                    # Sprint4 - add the formatted name of the target col to used resp metrics
+                value = False

pages/4_Saved_Model_Results.py ADDED Viewed

	@@ -0,0 +1,607 @@

+import plotly.express as px
+import numpy as np
+import plotly.graph_objects as go
+import streamlit as st
+import pandas as pd
+import statsmodels.api as sm
+from sklearn.metrics import mean_absolute_percentage_error
+import sys
+import os
+from utilities import set_header, load_local_css, load_authenticator
+import seaborn as sns
+import matplotlib.pyplot as plt
+import sweetviz as sv
+import tempfile
+from sklearn.preprocessing import MinMaxScaler
+from st_aggrid import AgGrid
+from st_aggrid import GridOptionsBuilder, GridUpdateMode
+from st_aggrid import GridOptionsBuilder
+import sys
+import re
+sys.setrecursionlimit(10**6)
+original_stdout = sys.stdout
+sys.stdout = open("temp_stdout.txt", "w")
+sys.stdout.close()
+sys.stdout = original_stdout
+st.set_page_config(layout="wide")
+load_local_css("styles.css")
+set_header()
+for k, v in st.session_state.items():
+    if k not in ["logout", "login", "config"] and not k.startswith("FormSubmitter"):
+        st.session_state[k] = v
+authenticator = st.session_state.get("authenticator")
+if authenticator is None:
+    authenticator = load_authenticator()
+name, authentication_status, username = authenticator.login("Login", "main")
+auth_status = st.session_state.get("authentication_status")
+if auth_status == True:
+    is_state_initiaized = st.session_state.get("initialized", False)
+    if not is_state_initiaized:
+        a = 1
+    def plot_residual_predicted(actual, predicted, df_):
+        df_["Residuals"] = actual - pd.Series(predicted)
+        df_["StdResidual"] = (df_["Residuals"] - df_["Residuals"].mean()) / df_[
+            "Residuals"
+        ].std()
+        # Create a Plotly scatter plot
+        fig = px.scatter(
+            df_,
+            x=predicted,
+            y="StdResidual",
+            opacity=0.5,
+            color_discrete_sequence=["#11B6BD"],
+        )
+        # Add horizontal lines
+        fig.add_hline(y=0, line_dash="dash", line_color="darkorange")
+        fig.add_hline(y=2, line_color="red")
+        fig.add_hline(y=-2, line_color="red")
+        fig.update_xaxes(title="Predicted")
+        fig.update_yaxes(title="Standardized Residuals (Actual - Predicted)")
+        # Set the same width and height for both figures
+        fig.update_layout(
+            title="Residuals over Predicted Values",
+            autosize=False,
+            width=600,
+            height=400,
+        )
+        return fig
+    def residual_distribution(actual, predicted):
+        Residuals = actual - pd.Series(predicted)
+        # Create a Seaborn distribution plot
+        sns.set(style="whitegrid")
+        plt.figure(figsize=(6, 4))
+        sns.histplot(Residuals, kde=True, color="#11B6BD")
+        plt.title(" Distribution of Residuals")
+        plt.xlabel("Residuals")
+        plt.ylabel("Probability Density")
+        return plt
+    def qqplot(actual, predicted):
+        Residuals = actual - pd.Series(predicted)
+        Residuals = pd.Series(Residuals)
+        Resud_std = (Residuals - Residuals.mean()) / Residuals.std()
+        # Create a QQ plot using Plotly with custom colors
+        fig = go.Figure()
+        fig.add_trace(
+            go.Scatter(
+                x=sm.ProbPlot(Resud_std).theoretical_quantiles,
+                y=sm.ProbPlot(Resud_std).sample_quantiles,
+                mode="markers",
+                marker=dict(size=5, color="#11B6BD"),
+                name="QQ Plot",
+            )
+        )
+        # Add the 45-degree reference line
+        diagonal_line = go.Scatter(
+            x=[-2, 2],  # Adjust the x values as needed to fit the range of your data
+            y=[-2, 2],  # Adjust the y values accordingly
+            mode="lines",
+            line=dict(color="red"),  # Customize the line color and style
+            name=" ",
+        )
+        fig.add_trace(diagonal_line)
+        # Customize the layout
+        fig.update_layout(
+            title="QQ Plot of Residuals",
+            title_x=0.5,
+            autosize=False,
+            width=600,
+            height=400,
+            xaxis_title="Theoretical Quantiles",
+            yaxis_title="Sample Quantiles",
+        )
+        return fig
+    def plot_actual_vs_predicted(date, y, predicted_values, model):
+        fig = go.Figure()
+        fig.add_trace(
+            go.Scatter(
+                x=date, y=y, mode="lines", name="Actual", line=dict(color="blue")
+            )
+        )
+        fig.add_trace(
+            go.Scatter(
+                x=date,
+                y=predicted_values,
+                mode="lines",
+                name="Predicted",
+                line=dict(color="orange"),
+            )
+        )
+        # Calculate MAPE
+        mape = mean_absolute_percentage_error(y, predicted_values) * 100
+        # Calculate R-squared
+        rss = np.sum((y - predicted_values) ** 2)
+        tss = np.sum((y - np.mean(y)) ** 2)
+        r_squared = 1 - (rss / tss)
+        # Get the number of predictors
+        num_predictors = model.df_model
+        # Get the number of samples
+        num_samples = len(y)
+        # Calculate Adjusted R-squared
+        adj_r_squared = 1 - (
+            (1 - r_squared) * ((num_samples - 1) / (num_samples - num_predictors - 1))
+        )
+        metrics_table = pd.DataFrame(
+            {
+                "Metric": ["MAPE", "R-squared", "AdjR-squared"],
+                "Value": [mape, r_squared, adj_r_squared],
+            }
+        )
+        fig.update_layout(
+            xaxis=dict(title="Date"),
+            yaxis=dict(title="Value"),
+            title=f"MAPE : {mape:.2f}%, AdjR2: {adj_r_squared:.2f}",
+            xaxis_tickangle=-30,
+        )
+        return metrics_table, fig
+    def contributions(X, model):
+        X1 = X.copy()
+        for j, col in enumerate(X1.columns):
+            X1[col] = X1[col] * model.params.values[j]
+        return np.round(
+            (X1.sum() / sum(X1.sum()) * 100).sort_values(ascending=False), 2
+        )
+    transformed_data = pd.read_csv("transformed_data.csv")
+    # hard coded for now, need to get features set from model
+    feature_set_dct = {
+        "app_installs_-_appsflyer": [
+            "paid_search_clicks",
+            "fb:_level_achieved_-_tier_1_impressions_lag2",
+            "fb:_level_achieved_-_tier_2_clicks_lag2",
+            "paid_social_others_impressions_adst.1",
+            "ga_app:_will_and_cid_pequena_baixo_risco_clicks_lag2",
+            "digital_tactic_others_clicks",
+            "kwai_clicks_adst.3",
+            "programmaticclicks",
+            "indicacao_clicks_adst.1",
+            "infleux_clicks_adst.4",
+            "influencer_clicks",
+        ],
+        "account_requests_-_appsflyer": [
+            "paid_search_impressions",
+            "fb:_level_achieved_-_tier_1_clicks_adst.1",
+            "fb:_level_achieved_-_tier_2_clicks_adst.1",
+            "paid_social_others_clicks_lag2",
+            "ga_app:_will_and_cid_pequena_baixo_risco_clicks_lag5_adst.1",
+            "digital_tactic_others_clicks_adst.1",
+            "kwai_clicks_adst.2",
+            "programmaticimpressions_lag4_adst.1",
+            "indicacao_clicks",
+            "infleux_clicks_adst.2",
+            "influencer_clicks",
+        ],
+        "total_approved_accounts_-_appsflyer": [
+            "paid_search_clicks",
+            "fb:_level_achieved_-_tier_1_impressions_lag2_adst.1",
+            "fb:_level_achieved_-_tier_2_impressions_lag2",
+            "paid_social_others_clicks_lag2_adst.2",
+            "ga_app:_will_and_cid_pequena_baixo_risco_impressions_lag4",
+            "digital_tactic_others_clicks",
+            "kwai_impressions_adst.2",
+            "programmaticclicks_adst.5",
+            "indicacao_clicks_adst.1",
+            "infleux_clicks_adst.3",
+            "influencer_clicks",
+        ],
+        "total_approved_accounts_-_revenue": [
+            "paid_search_impressions_adst.5",
+            "kwai_impressions_lag2_adst.3",
+            "indicacao_clicks_adst.3",
+            "infleux_clicks_adst.3",
+            "programmaticclicks_adst.4",
+            "influencer_clicks_adst.3",
+            "fb:_level_achieved_-_tier_1_impressions_adst.2",
+            "fb:_level_achieved_-_tier_2_impressions_lag3_adst.5",
+            "paid_social_others_impressions_adst.3",
+            "ga_app:_will_and_cid_pequena_baixo_risco_clicks_lag3_adst.5",
+            "digital_tactic_others_clicks_adst.2",
+        ],
+    }
+    # """ the above part should be modified so that we are fetching features set from the saved model"""
+    def contributions(X, model, target):
+        X1 = X.copy()
+        for j, col in enumerate(X1.columns):
+            X1[col] = X1[col] * model.params.values[j]
+        contributions = np.round(
+            (X1.sum() / sum(X1.sum()) * 100).sort_values(ascending=False), 2
+        )
+        contributions = (
+            pd.DataFrame(contributions, columns=target)
+            .reset_index()
+            .rename(columns={"index": "Channel"})
+        )
+        contributions["Channel"] = [
+            re.split(r"_imp|_cli", col)[0] for col in contributions["Channel"]
+        ]
+        return contributions
+    def model_fit(features_set, target):
+        X = transformed_data[features_set]
+        y = transformed_data[target]
+        ss = MinMaxScaler()
+        X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
+        X = sm.add_constant(X)
+        X_train = X.iloc[:150]
+        X_test = X.iloc[150:]
+        y_train = y.iloc[:150]
+        y_test = y.iloc[150:]
+        model = sm.OLS(y_train, X_train).fit()
+        predicted_values_train = model.predict(X_train)
+        r2 = model.rsquared
+        adjr2 = model.rsquared_adj
+        train_mape = mean_absolute_percentage_error(y_train, predicted_values_train)
+        test_mape = mean_absolute_percentage_error(y_test, model.predict(X_test))
+        summary = model.summary()
+        train_contributions = contributions(X_train, model, [target])
+        return (
+            pd.DataFrame(
+                {
+                    "Model": target,
+                    "R2": np.round(r2, 2),
+                    "ADJr2": np.round(adjr2, 2),
+                    "Train Mape": np.round(train_mape, 2),
+                    "Test Mape": np.round(test_mape, 2),
+                    "Summary": summary,
+                    "Model_object": model,
+                },
+                index=[0],
+            ),
+            train_contributions,
+        )
+    metrics_table = pd.DataFrame()
+    if "contribution_df" not in st.session_state:
+        st.session_state["contribution_df"] = pd.DataFrame()
+    for target, feature_set in feature_set_dct.items():
+        metrics_table = pd.concat(
+            [metrics_table, model_fit(features_set=feature_set, target=target)[0]]
+        )
+        if st.session_state["contribution_df"].empty:
+            st.session_state["contribution_df"] = model_fit(
+                features_set=feature_set, target=target
+            )[1]
+        else:
+            st.session_state["contribution_df"] = pd.merge(
+                st.session_state["contribution_df"],
+                model_fit(features_set=feature_set, target=target)[1],
+            )
+    # st.write(st.session_state["contribution_df"])
+    metrics_table.reset_index(drop=True, inplace=True)
+    eda_columns = st.columns(2)
+    with eda_columns[1]:
+        eda = st.button(
+            "Generate EDA Report",
+            help="Click to generate a bivariate report for the selected response metric from the table below.",
+        )
+    # st.markdown('Model Metrics')
+    st.title("Contribution Overview")
+    contribution_selections = st.multiselect(
+        "Select the models to compare contributions",
+        [
+            col
+            for col in st.session_state["contribution_df"].columns
+            if col.lower() != "channel"
+        ],
+        default=[
+            col
+            for col in st.session_state["contribution_df"].columns
+            if col.lower() != "channel"
+        ][-1],
+    )
+    trace_data = []
+    for selection in contribution_selections:
+        trace = go.Bar(
+            x=st.session_state["contribution_df"]["Channel"],
+            y=st.session_state["contribution_df"][selection],
+            name=selection,
+            text=np.round(st.session_state["contribution_df"][selection], 0)
+            .astype(int)
+            .astype(str)
+            + "%",
+            textposition="outside",
+        )
+        trace_data.append(trace)
+    layout = go.Layout(
+        title="Metrics Contribution by Channel",
+        xaxis=dict(title="Channel Name"),
+        yaxis=dict(title="Metrics Contribution"),
+        barmode="group",
+    )
+    fig = go.Figure(data=trace_data, layout=layout)
+    st.plotly_chart(fig, use_container_width=True)
+    ############################################ Waterfall Chart ############################################
+    # import plotly.graph_objects as go
+    # # Initialize a Plotly figure
+    # fig = go.Figure()
+    # for selection in contribution_selections:
+    #     # Ensure y_values are numeric
+    #     y_values = st.session_state["contribution_df"][selection].values.astype(float)
+    #     # Generating text labels for each bar, ensuring operations are compatible with string formats
+    #     text_values = [f"{val}%" for val in np.round(y_values, 0).astype(int)]
+    #     fig.add_trace(
+    #         go.Waterfall(
+    #             name=selection,
+    #             orientation="v",
+    #             measure=["relative"]
+    #             * len(y_values),  # Adjust if you have absolute values at certain points
+    #             x=st.session_state["contribution_df"]["Channel"].tolist(),
+    #             text=text_values,
+    #             textposition="outside",
+    #             y=y_values,
+    #             increasing={"marker": {"color": "green"}},
+    #             decreasing={"marker": {"color": "red"}},
+    #             totals={"marker": {"color": "blue"}},
+    #         )
+    #     )
+    # fig.update_layout(
+    #     title="Metrics Contribution by Channel",
+    #     xaxis={"title": "Channel Name"},
+    #     yaxis={"title": "Metrics Contribution"},
+    #     height=600,
+    # )
+    # # Displaying the waterfall chart in Streamlit
+    # st.plotly_chart(fig, use_container_width=True)
+    import plotly.graph_objects as go
+    # Initialize a Plotly figure
+    fig = go.Figure()
+    for selection in contribution_selections:
+        # Ensure contributions are numeric
+        contributions = (
+            st.session_state["contribution_df"][selection].values.astype(float).tolist()
+        )
+        channel_names = st.session_state["contribution_df"]["Channel"].tolist()
+        display_name, display_contribution, base_contribution = [], [], 0
+        for channel_name, contribution in zip(channel_names, contributions):
+            if channel_name != "const":
+                display_name.append(channel_name)
+                display_contribution.append(contribution)
+            else:
+                base_contribution = contribution
+        display_name = ["Base Sales"] + display_name
+        display_contribution = [base_contribution] + display_contribution
+        # Generating text labels for each bar, ensuring operations are compatible with string formats
+        text_values = [
+            f"{val}%" for val in np.round(display_contribution, 0).astype(int)
+        ]
+        fig.add_trace(
+            go.Waterfall(
+                orientation="v",
+                measure=["relative"]
+                * len(
+                    display_contribution
+                ),  # Adjust if you have absolute values at certain points
+                x=display_name,
+                text=text_values,
+                textposition="outside",
+                y=display_contribution,
+                increasing={"marker": {"color": "green"}},
+                decreasing={"marker": {"color": "red"}},
+                totals={"marker": {"color": "blue"}},
+            )
+        )
+    fig.update_layout(
+        title="Metrics Contribution by Channel",
+        xaxis={"title": "Channel Name"},
+        yaxis={"title": "Metrics Contribution"},
+        height=600,
+    )
+    # Displaying the waterfall chart in Streamlit
+    st.plotly_chart(fig, use_container_width=True)
+    ############################################ Waterfall Chart ############################################
+    st.title("Analysis of Models Result")
+    # st.markdown()
+    gd_table = metrics_table.iloc[:, :-2]
+    gd = GridOptionsBuilder.from_dataframe(gd_table)
+    # gd.configure_pagination(enabled=True)
+    gd.configure_selection(
+        use_checkbox=True,
+        selection_mode="single",
+        pre_select_all_rows=False,
+        pre_selected_rows=[1],
+    )
+    gridoptions = gd.build()
+    table = AgGrid(
+        gd_table, gridOptions=gridoptions, fit_columns_on_grid_load=True, height=200
+    )
+    # table=metrics_table.iloc[:,:-2]
+    # table.insert(0, "Select", False)
+    # selection_table=st.data_editor(table,column_config={"Select": st.column_config.CheckboxColumn(required=True)})
+    if len(table.selected_rows) == 0:
+        st.warning(
+            "Click on the checkbox to view comprehensive results of the selected model."
+        )
+        st.stop()
+    else:
+        target_column = table.selected_rows[0]["Model"]
+        feature_set = feature_set_dct[target_column]
+    with eda_columns[1]:
+        if eda:
+            def generate_report_with_target(channel_data, target_feature):
+                report = sv.analyze(
+                    [channel_data, "Dataset"], target_feat=target_feature, verbose=False
+                )
+                temp_dir = tempfile.mkdtemp()
+                report_path = os.path.join(temp_dir, "report.html")
+                report.show_html(
+                    filepath=report_path, open_browser=False
+                )  # Generate the report as an HTML file
+                return report_path
+            report_data = transformed_data[feature_set]
+            report_data[target_column] = transformed_data[target_column]
+            report_file = generate_report_with_target(report_data, target_column)
+            if os.path.exists(report_file):
+                with open(report_file, "rb") as f:
+                    st.download_button(
+                        label="Download EDA Report",
+                        data=f.read(),
+                        file_name="report.html",
+                        mime="text/html",
+                    )
+            else:
+                st.warning("Report generation failed. Unable to find the report file.")
+    model = metrics_table[metrics_table["Model"] == target_column]["Model_object"].iloc[
+        0
+    ]
+    st.header("Model Summary")
+    st.write(model.summary())
+    X = transformed_data[feature_set]
+    ss = MinMaxScaler()
+    X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
+    X = sm.add_constant(X)
+    y = transformed_data[target_column]
+    X_train = X.iloc[:150]
+    X_test = X.iloc[150:]
+    y_train = y.iloc[:150]
+    y_test = y.iloc[150:]
+    X.index = transformed_data["date"]
+    y.index = transformed_data["date"]
+    metrics_table_train, fig_train = plot_actual_vs_predicted(
+        X_train.index, y_train, model.predict(X_train), model
+    )
+    metrics_table_test, fig_test = plot_actual_vs_predicted(
+        X_test.index, y_test, model.predict(X_test), model
+    )
+    metrics_table_train = metrics_table_train.set_index("Metric").transpose()
+    metrics_table_train.index = ["Train"]
+    metrics_table_test = metrics_table_test.set_index("Metric").transpose()
+    metrics_table_test.index = ["test"]
+    metrics_table = np.round(pd.concat([metrics_table_train, metrics_table_test]), 2)
+    st.markdown("Result Overview")
+    st.dataframe(np.round(metrics_table, 2), use_container_width=True)
+    st.subheader("Actual vs Predicted Plot Train")
+    st.plotly_chart(fig_train, use_container_width=True)
+    st.subheader("Actual vs Predicted Plot Test")
+    st.plotly_chart(fig_test, use_container_width=True)
+    st.markdown("## Residual Analysis")
+    columns = st.columns(2)
+    Xtrain1 = X_train.copy()
+    with columns[0]:
+        fig = plot_residual_predicted(y_train, model.predict(Xtrain1), Xtrain1)
+        st.plotly_chart(fig)
+    with columns[1]:
+        st.empty()
+        fig = qqplot(y_train, model.predict(X_train))
+        st.plotly_chart(fig)
+    with columns[0]:
+        fig = residual_distribution(y_train, model.predict(X_train))
+        st.pyplot(fig)
+elif auth_status == False:
+    st.error("Username/Password is incorrect")
+    try:
+        username_forgot_pw, email_forgot_password, random_password = (
+            authenticator.forgot_password("Forgot password")
+        )
+        if username_forgot_pw:
+            st.success("New password sent securely")
+            # Random password to be transferred to the user securely
+        elif username_forgot_pw == False:
+            st.error("Username not found")
+    except Exception as e:
+        st.error(e)

pages/5_Model_Tuning_with_panel.py ADDED Viewed

	@@ -0,0 +1,527 @@

+'''
+MMO Build Sprint 3
+date :
+changes : capability to tune MixedLM as well as simple LR in the same page
+'''
+import streamlit as st
+import pandas as pd
+from Eda_functions import format_numbers
+import pickle
+from utilities import set_header, load_local_css
+import statsmodels.api as sm
+import re
+from sklearn.preprocessing import MinMaxScaler
+import matplotlib.pyplot as plt
+from statsmodels.stats.outliers_influence import variance_inflation_factor
+st.set_option('deprecation.showPyplotGlobalUse', False)
+import statsmodels.formula.api as smf
+from Data_prep_functions import *
+# for i in ["model_tuned", "X_train_tuned", "X_test_tuned", "tuned_model_features", "tuned_model", "tuned_model_dict"] :
+st.set_page_config(
+    page_title="Model Tuning",
+    page_icon=":shark:",
+    layout="wide",
+    initial_sidebar_state='collapsed'
+)
+load_local_css('styles.css')
+set_header()
+# Sprint3
+# is_panel = st.session_state['is_panel']
+# panel_col = 'markets'  # set the panel column
+date_col = 'date'
+panel_col = [col.lower().replace('.','_').replace('@','_').replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for col in  st.session_state['bin_dict']['Panel Level 1']  ] [0]# set the panel column
+is_panel = True if len(panel_col)>0 else False
+# flag indicating there is not tuned model till now
+# Sprint4 - model tuned dict
+if 'Model_Tuned' not in st.session_state:
+    st.session_state['Model_Tuned'] = {}
+st.title('1. Model Tuning')
+# st.write(st.session_state['base_model_feature_set'])
+if "X_train" not in st.session_state:
+    st.error(
+        "Oops! It seems there are no saved models available. Please build and save a model from the previous page to proceed.")
+    st.stop()
+# X_train=st.session_state['X_train']
+# X_test=st.session_state['X_test']
+# y_train=st.session_state['y_train']
+# y_test=st.session_state['y_test']
+# df=st.session_state['media_data']
+# st.write(X_train.columns)
+# st.write(X_test.columns)
+if "is_tuned_model" not in st.session_state:
+        st.session_state["is_tuned_model"] = {}
+# Sprint4 - if used_response_metrics is not blank, then select one of the used_response_metrics, else target is revenue by default
+if "used_response_metrics" in st.session_state and st.session_state['used_response_metrics'] != []:
+    sel_target_col = st.selectbox("Select the response metric", st.session_state['used_response_metrics'])
+    target_col = sel_target_col.lower().replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_")
+else:
+    sel_target_col = 'Total Approved Accounts - Revenue'
+    target_col = 'total_approved_accounts_revenue'
+# Sprint4 - Look through all saved models, only show saved models of the sel resp metric (target_col)
+saved_models = st.session_state['saved_model_names']
+required_saved_models = [m.split("__")[0] for m in saved_models if m.split("__")[1] == target_col]
+sel_model = st.selectbox("Select the model to tune", required_saved_models)
+with open("best_models.pkl", 'rb') as file:
+    model_dict = pickle.load(file)
+sel_model_dict = model_dict[sel_model + "__" + target_col]  # Sprint4 - get the model obj of the selected model
+# st.write(sel_model_dict)
+X_train = sel_model_dict['X_train']
+X_test = sel_model_dict['X_test']
+y_train = sel_model_dict['y_train']
+y_test = sel_model_dict['y_test']
+df = st.session_state['media_data']
+if 'selected_model' not in st.session_state:
+    st.session_state['selected_model'] = 0
+# st.write(model_dict[st.session_state["selected_model"]]['X_train'].columns)
+st.markdown('### 1.1 Event Flags')
+st.markdown('Helps in quantifying the impact of specific occurrences of events')
+with st.expander('Apply Event Flags'):
+    # st.session_state["selected_model"]=st.selectbox('Select Model to apply flags',model_dict.keys())
+    model = sel_model_dict['Model_object']
+    date = st.session_state['date']
+    date = pd.to_datetime(date)
+    X_train = sel_model_dict['X_train']
+    # features_set= model_dict[st.session_state["selected_model"]]['feature_set']
+    features_set = sel_model_dict["feature_set"]
+    col = st.columns(3)
+    min_date = min(date)
+    max_date = max(date)
+    with col[0]:
+        start_date = st.date_input('Select Start Date', min_date, min_value=min_date, max_value=max_date)
+    with col[1]:
+        end_date = st.date_input('Select End Date', max_date, min_value=min_date, max_value=max_date)
+    with col[2]:
+        repeat = st.selectbox('Repeat Annually', ['Yes', 'No'], index=1)
+    if repeat == 'Yes':
+        repeat = True
+    else:
+        repeat = False
+    if 'Flags' not in st.session_state:
+        st.session_state['Flags'] = {}
+    # print("**"*50)
+    # print(y_train)
+    # print("**"*50)
+    # print(model.fittedvalues)
+    if is_panel:  # Sprint3
+        met, line_values, fig_flag = plot_actual_vs_predicted(X_train[date_col], y_train,
+                                                              model.fittedvalues, model,
+                                                              target_column=sel_target_col,
+                                                              flag=(start_date, end_date),
+                                                              repeat_all_years=repeat, is_panel=True)
+        st.plotly_chart(fig_flag, use_container_width=True)
+        # create flag on test
+        met, test_line_values, fig_flag = plot_actual_vs_predicted(X_test[date_col], y_test,
+                                                                   sel_model_dict['pred_test'], model,
+                                                                   target_column=sel_target_col,
+                                                                   flag=(start_date, end_date),
+                                                                   repeat_all_years=repeat, is_panel=True)
+    else:
+        pred_train=model.predict(X_train[features_set])
+        met, line_values, fig_flag = plot_actual_vs_predicted(X_train[date_col], y_train, pred_train, model,
+                                                              flag=(start_date, end_date), repeat_all_years=repeat,is_panel=False)
+        st.plotly_chart(fig_flag, use_container_width=True)
+        pred_test=model.predict(X_test[features_set])
+        met, test_line_values, fig_flag = plot_actual_vs_predicted(X_test[date_col], y_test, pred_test, model,
+                                                                   flag=(start_date, end_date), repeat_all_years=repeat,is_panel=False)
+    flag_name = 'f1_flag'
+    flag_name = st.text_input('Enter Flag Name')
+    # Sprint4 - add selected target col to flag name
+    if st.button('Update flag'):
+        st.session_state['Flags'][flag_name + '__'+ target_col] = {}
+        st.session_state['Flags'][flag_name + '__'+ target_col]['train'] = line_values
+        st.session_state['Flags'][flag_name + '__'+ target_col]['test'] = test_line_values
+        # st.write(st.session_state['Flags'][flag_name])
+        st.success(f'{flag_name + "__" + target_col} stored')
+    # Sprint4 - only show flag created for the particular target col
+    st.write(st.session_state['Flags'].keys() )
+    target_model_flags = [f.split("__")[0] for f in st.session_state['Flags'].keys() if f.split("__")[1] == target_col]
+    options = list(target_model_flags)
+    selected_options = []
+    num_columns = 4
+    num_rows = -(-len(options) // num_columns)
+tick = False
+if st.checkbox('Select all'):
+    tick = True
+selected_options = []
+for row in range(num_rows):
+    cols = st.columns(num_columns)
+    for col in cols:
+        if options:
+            option = options.pop(0)
+            selected = col.checkbox(option, value=tick)
+            if selected:
+                selected_options.append(option)
+st.markdown('### 1.2 Select Parameters to Apply')
+parameters = st.columns(3)
+with parameters[0]:
+    Trend = st.checkbox("**Trend**")
+    st.markdown('Helps account for long-term trends or seasonality that could influence advertising effectiveness')
+with parameters[1]:
+    week_number = st.checkbox('**Week_number**')
+    st.markdown('Assists in detecting and incorporating weekly patterns or seasonality')
+with parameters[2]:
+    sine_cosine = st.checkbox('**Sine and Cosine Waves**')
+    st.markdown('Helps in capturing cyclical patterns or seasonality in the data')
+#
+# def get_tuned_model():
+#     st.session_state['build_tuned_model']=True
+if st.button('Build model with Selected Parameters and Flags', key='build_tuned_model'):
+    new_features = features_set
+    st.header('2.1 Results Summary')
+    # date=list(df.index)
+    # df = df.reset_index(drop=True)
+    # st.write(df.head(2))
+    # X_train=df[features_set]
+    ss = MinMaxScaler()
+    if is_panel == True:
+        X_train_tuned = X_train[features_set]
+        # X_train_tuned = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
+        X_train_tuned[target_col] = X_train[target_col]
+        X_train_tuned[date_col] = X_train[date_col]
+        X_train_tuned[panel_col] = X_train[panel_col]
+        X_test_tuned = X_test[features_set]
+        # X_test_tuned = pd.DataFrame(ss.transform(X), columns=X.columns)
+        X_test_tuned[target_col] = X_test[target_col]
+        X_test_tuned[date_col] = X_test[date_col]
+        X_test_tuned[panel_col] = X_test[panel_col]
+    else:
+        X_train_tuned = X_train[features_set]
+        # X_train_tuned = pd.DataFrame(ss.fit_transform(X_train_tuned), columns=X_train_tuned.columns)
+        X_test_tuned = X_test[features_set]
+        # X_test_tuned = pd.DataFrame(ss.transform(X_test_tuned), columns=X_test_tuned.columns)
+    for flag in selected_options:
+        # Spirnt4 - added target_col in flag name
+        X_train_tuned[flag] = st.session_state['Flags'][flag + "__" + target_col]['train']
+        X_test_tuned[flag] = st.session_state['Flags'][flag + "__" + target_col]['test']
+        # test
+        # X_train_tuned.to_csv("Test/X_train_tuned_flag.csv",index=False)
+        # X_test_tuned.to_csv("Test/X_test_tuned_flag.csv",index=False)
+    # print("()()"*20,flag, len(st.session_state['Flags'][flag]))
+    if Trend:
+        # Sprint3 - group by panel, calculate trend of each panel spearately. Add trend to new feature set
+        if is_panel:
+            newdata = pd.DataFrame()
+            panel_wise_end_point_train = {}
+            for panel, groupdf in X_train_tuned.groupby(panel_col):
+                groupdf.sort_values(date_col, inplace=True)
+                groupdf['Trend'] = np.arange(1, len(groupdf) + 1, 1)
+                newdata = pd.concat([newdata, groupdf])
+                panel_wise_end_point_train[panel] = len(groupdf)
+            X_train_tuned = newdata.copy()
+            test_newdata = pd.DataFrame()
+            for panel, test_groupdf in X_test_tuned.groupby(panel_col):
+                test_groupdf.sort_values(date_col, inplace=True)
+                start = panel_wise_end_point_train[panel] + 1
+                end = start + len(test_groupdf) # should be + 1? - Sprint4
+                # print("??"*20, panel, len(test_groupdf), len(np.arange(start, end, 1)), start)
+                test_groupdf['Trend'] = np.arange(start, end, 1)
+                test_newdata = pd.concat([test_newdata, test_groupdf])
+            X_test_tuned = test_newdata.copy()
+            new_features = new_features + ['Trend']
+        else:
+            X_train_tuned['Trend'] = np.arange(1, len(X_train_tuned) + 1, 1)
+            X_test_tuned['Trend'] = np.arange(len(X_train_tuned) + 1, len(X_train_tuned) + len(X_test_tuned) + 1, 1)
+            new_features = new_features + ['Trend']
+    if week_number:
+        # Sprint3 - create weeknumber from date column in xtrain tuned. add week num to new feature set
+        if is_panel:
+            X_train_tuned[date_col] = pd.to_datetime(X_train_tuned[date_col])
+            X_train_tuned['Week_number'] = X_train_tuned[date_col].dt.day_of_week
+            if X_train_tuned['Week_number'].nunique() == 1:
+                st.write("All dates in the data are of the same week day. Hence Week number can't be used.")
+            else:
+                X_test_tuned[date_col] = pd.to_datetime(X_test_tuned[date_col])
+                X_test_tuned['Week_number'] = X_test_tuned[date_col].dt.day_of_week
+                new_features = new_features + ['Week_number']
+        else:
+            date = pd.to_datetime(date.values)
+            X_train_tuned['Week_number'] = pd.to_datetime(X_train[date_col]).dt.day_of_week
+            X_test_tuned['Week_number'] = pd.to_datetime(X_test[date_col]).dt.day_of_week
+            new_features = new_features + ['Week_number']
+    if sine_cosine:
+        # Sprint3 - create panel wise sine cosine waves in xtrain tuned. add to new feature set
+        if is_panel:
+            new_features = new_features + ['sine_wave', 'cosine_wave']
+            newdata = pd.DataFrame()
+            newdata_test = pd.DataFrame()
+            groups = X_train_tuned.groupby(panel_col)
+            frequency = 2 * np.pi / 365  # Adjust the frequency as needed
+            train_panel_wise_end_point = {}
+            for panel, groupdf in groups:
+                num_samples = len(groupdf)
+                train_panel_wise_end_point[panel] = num_samples
+                days_since_start = np.arange(num_samples)
+                sine_wave = np.sin(frequency * days_since_start)
+                cosine_wave = np.cos(frequency * days_since_start)
+                sine_cosine_df = pd.DataFrame({'sine_wave': sine_wave, 'cosine_wave': cosine_wave})
+                assert len(sine_cosine_df) == len(groupdf)
+                # groupdf = pd.concat([groupdf, sine_cosine_df], axis=1)
+                groupdf['sine_wave'] = sine_wave
+                groupdf['cosine_wave'] = cosine_wave
+                newdata = pd.concat([newdata, groupdf])
+            X_train_tuned = newdata.copy()
+            test_groups = X_test_tuned.groupby(panel_col)
+            for panel, test_groupdf in test_groups:
+                num_samples = len(test_groupdf)
+                start = train_panel_wise_end_point[panel]
+                days_since_start = np.arange(start, start + num_samples, 1)
+                # print("##", panel, num_samples, start, len(np.arange(start, start+num_samples, 1)))
+                sine_wave = np.sin(frequency * days_since_start)
+                cosine_wave = np.cos(frequency * days_since_start)
+                sine_cosine_df = pd.DataFrame({'sine_wave': sine_wave, 'cosine_wave': cosine_wave})
+                assert len(sine_cosine_df) == len(test_groupdf)
+                # groupdf = pd.concat([groupdf, sine_cosine_df], axis=1)
+                test_groupdf['sine_wave'] = sine_wave
+                test_groupdf['cosine_wave'] = cosine_wave
+                newdata_test = pd.concat([newdata_test, test_groupdf])
+            X_test_tuned = newdata_test.copy()
+        else:
+            new_features = new_features + ['sine_wave', 'cosine_wave']
+            num_samples = len(X_train_tuned)
+            frequency = 2 * np.pi / 365  # Adjust the frequency as needed
+            days_since_start = np.arange(num_samples)
+            sine_wave = np.sin(frequency * days_since_start)
+            cosine_wave = np.cos(frequency * days_since_start)
+            sine_cosine_df = pd.DataFrame({'sine_wave': sine_wave, 'cosine_wave': cosine_wave})
+            # Concatenate the sine and cosine waves with the scaled X DataFrame
+            X_train_tuned = pd.concat([X_train_tuned, sine_cosine_df], axis=1)
+            test_num_samples = len(X_test_tuned)
+            start = num_samples
+            days_since_start = np.arange(start, start + test_num_samples, 1)
+            sine_wave = np.sin(frequency * days_since_start)
+            cosine_wave = np.cos(frequency * days_since_start)
+            sine_cosine_df = pd.DataFrame({'sine_wave': sine_wave, 'cosine_wave': cosine_wave})
+            # Concatenate the sine and cosine waves with the scaled X DataFrame
+            X_test_tuned = pd.concat([X_test_tuned, sine_cosine_df], axis=1)
+    # model
+    if selected_options:
+        new_features = new_features + selected_options
+    if is_panel:
+        inp_vars_str = " + ".join(new_features)
+        new_features=list(set(new_features))
+        # X_train_tuned.to_csv("Test/X_train_tuned.csv",index=False)
+        # st.write(X_train_tuned[['total_approved_accounts_revenue'] + new_features].dtypes)
+        # st.write(X_train_tuned[['total_approved_accounts_revenue', panel_col] + new_features].isna().sum())
+        md_str = target_col + " ~ " + inp_vars_str
+        md_tuned = smf.mixedlm(md_str,
+                               data=X_train_tuned[[target_col] + new_features],
+                               groups=X_train_tuned[panel_col])
+        model_tuned = md_tuned.fit()
+        # plot act v pred for original model and tuned model
+        metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(X_train[date_col], y_train,
+                                                                                 model.fittedvalues, model,
+                                                                                 target_column=sel_target_col,
+                                                                                 is_panel=True)
+        metrics_table_tuned, line, actual_vs_predicted_plot_tuned = plot_actual_vs_predicted(X_train_tuned[date_col],
+                                                                                             X_train_tuned[target_col],
+                                                                                             model_tuned.fittedvalues,
+                                                                                             model_tuned,
+                                                                                             target_column=sel_target_col,
+                                                                                             is_panel=True)
+    else:
+        new_features=list(set(new_features))
+        # st.write(new_features)
+        model_tuned = sm.OLS(y_train, X_train_tuned[new_features]).fit()
+        # st.write(X_train_tuned.columns)
+        metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(date[:130], y_train,
+                                                                                 model.predict(X_train[features_set]), model,
+                                                                                 target_column=sel_target_col)
+        metrics_table_tuned, line, actual_vs_predicted_plot_tuned = plot_actual_vs_predicted(date[:130], y_train,
+                                                                                             model_tuned.predict(
+                                                                                                 X_train_tuned),
+                                                                                             model_tuned,
+                                                                                             target_column=sel_target_col)
+    # st.write(metrics_table_tuned)
+    mape = np.round(metrics_table.iloc[0, 1], 2)
+    r2 = np.round(metrics_table.iloc[1, 1], 2)
+    adjr2 = np.round(metrics_table.iloc[2, 1], 2)
+    mape_tuned = np.round(metrics_table_tuned.iloc[0, 1], 2)
+    r2_tuned = np.round(metrics_table_tuned.iloc[1, 1], 2)
+    adjr2_tuned = np.round(metrics_table_tuned.iloc[2, 1], 2)
+    parameters_ = st.columns(3)
+    with parameters_[0]:
+        st.metric('R2', r2_tuned, np.round(r2_tuned - r2, 2))
+    with parameters_[1]:
+        st.metric('Adjusted R2', adjr2_tuned, np.round(adjr2_tuned - adjr2, 2))
+    with parameters_[2]:
+        st.metric('MAPE', mape_tuned, np.round(mape_tuned - mape, 2), 'inverse')
+    st.write(model_tuned.summary())
+    X_train_tuned[date_col] = X_train[date_col]
+    X_test_tuned[date_col] = X_test[date_col]
+    X_train_tuned[target_col] = y_train
+    X_test_tuned[target_col] = y_test
+    st.header('2.2 Actual vs. Predicted Plot')
+    # if is_panel:
+    #   metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(date, y_train, model.predict(X_train),
+    #                                                                              model, target_column='Revenue',is_panel=True)
+    # else:
+    #   metrics_table,line,actual_vs_predicted_plot=plot_actual_vs_predicted(date, y_train, model.predict(X_train), model,target_column='Revenue')
+    if is_panel :
+        metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(X_train_tuned[date_col],
+                                                                                 X_train_tuned[target_col],
+                                                                                 model_tuned.fittedvalues, model_tuned,
+                                                                                 target_column=sel_target_col,
+                                                                                 is_panel=True)
+    else :
+        metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(X_train_tuned[date_col],
+                                                                                 X_train_tuned[target_col],
+                                                                                 model_tuned.predict(X_train_tuned[new_features]),
+                                                                                 model_tuned,
+                                                                                 target_column=sel_target_col,
+                                                                                 is_panel=False)
+    # plot_actual_vs_predicted(X_train[date_col], y_train,
+    #                                                                             model.fittedvalues, model,
+    #                                                                             target_column='Revenue',
+    #                                                                             is_panel=is_panel)
+    st.plotly_chart(actual_vs_predicted_plot, use_container_width=True)
+    st.markdown('## 2.3 Residual Analysis')
+    if is_panel :
+        columns = st.columns(2)
+        with columns[0]:
+            fig = plot_residual_predicted(y_train, model_tuned.fittedvalues, X_train_tuned)
+            st.plotly_chart(fig)
+        with columns[1]:
+            st.empty()
+            fig = qqplot(y_train, model_tuned.fittedvalues)
+            st.plotly_chart(fig)
+        with columns[0]:
+            fig = residual_distribution(y_train, model_tuned.fittedvalues)
+            st.pyplot(fig)
+    else:
+        columns = st.columns(2)
+        with columns[0]:
+            fig = plot_residual_predicted(y_train, model_tuned.predict(X_train_tuned[new_features]), X_train)
+            st.plotly_chart(fig)
+        with columns[1]:
+            st.empty()
+            fig = qqplot(y_train, model_tuned.predict(X_train_tuned[new_features]))
+            st.plotly_chart(fig)
+        with columns[0]:
+            fig = residual_distribution(y_train, model_tuned.predict(X_train_tuned[new_features]))
+            st.pyplot(fig)
+    st.session_state['is_tuned_model'][target_col] = True
+    # Sprint4 - saved tuned model in a dict
+    st.session_state['Model_Tuned'][sel_model + "__" + target_col] = {
+                                                                      "Model_object": model_tuned,
+                                                                      'feature_set': new_features,
+                                                                      'X_train_tuned': X_train_tuned,
+                                                                      'X_test_tuned': X_test_tuned
+                                                                      }
+# Pending
+# if st.session_state['build_tuned_model']==True:
+if st.session_state['Model_Tuned'] is not None :
+    if st.checkbox('Use this model to build response curves', key='save_model'):
+        #   save_model = st.button('Use this model to build response curves', key='saved_tuned_model')
+        #   if save_model:
+        st.session_state["is_tuned_model"][target_col]=True
+        with open("tuned_model.pkl", "wb") as f:
+            # pickle.dump(st.session_state['tuned_model'], f)
+            pickle.dump(st.session_state['Model_Tuned'], f)  # Sprint4
+        # X_test_tuned.to_csv("Test/X_test_tuned_final.csv", index=False)
+        # X_train_tuned.to_csv("Test/X_train_tuned.csv", index=False)
+        st.success(sel_model + "__" + target_col + ' Tuned saved!')
+    # if is_panel:
+    #     # st.session_state["tuned_model_features"] = new_features
+    #     with open("tuned_model.pkl", "wb") as f:
+    #         # pickle.dump(st.session_state['tuned_model'], f)
+    #         pickle.dump(st.session_state['Model_Tuned'], f)  # Sprint4
+    #     st.success(sel_model + "__" + target_col + ' Tuned saved!')
+#   raw_data=df[features_set]
+#   columns_raw=[re.split(r"(_lag|_adst)",col)[0] for col in raw_data.columns]
+#   raw_data.columns=columns_raw
+#   columns_media=[col for col in columns_raw if Categorised_data[col]['BB']=='Media']
+#   raw_data=raw_data[columns_media]
+#   raw_data['Date']=list(df.index)
+#   spends_var=[col for col in df.columns if "spends" in col.lower() and 'adst' not in col.lower() and 'lag' not in col.lower()]
+#   spends_df=df[spends_var]
+#   spends_df['Week']=list(df.index)
+#   j=0
+#   X1=X.copy()
+#   col=X1.columns
+#   for i in model.params.values:
+#       X1[col[j]]=X1.iloc[:,j]*i
+#       j+=1
+#   contribution_df=X1
+#   contribution_df['Date']=list(df.index)
+#   excel_file='Overview_data.xlsx'
+#   with pd.ExcelWriter(excel_file,engine='xlsxwriter') as writer:
+#      raw_data.to_excel(writer,sheet_name='RAW DATA MMM',index=False)
+#      spends_df.to_excel(writer,sheet_name='SPEND INPUT',index=False)
+#      contribution_df.to_excel(writer,sheet_name='CONTRIBUTION MMM')

pages/6_Model_Result_Overview.py ADDED Viewed

	@@ -0,0 +1,348 @@

+'''
+MMO Build Sprint 3
+additions : contributions calculated using tuned Mixed LM model
+pending : contributions calculations using - 1. not tuned Mixed LM model, 2. tuned OLS model, 3. not tuned OLS model
+MMO Build Sprint 4
+additions : response metrics selection
+pending : contributions calculations using - 1. not tuned Mixed LM model, 2. tuned OLS model, 3. not tuned OLS model
+'''
+import streamlit as st
+import pandas as pd
+from sklearn.preprocessing import MinMaxScaler
+import pickle
+from utilities_with_panel import (set_header,
+                                  overview_test_data_prep_panel,
+                                  overview_test_data_prep_nonpanel,
+                                  initialize_data,
+                                  load_local_css,
+                                  create_channel_summary,
+                                  create_contribution_pie,
+                                  create_contribuion_stacked_plot,
+                                  create_channel_spends_sales_plot,
+                                  format_numbers,
+                                  channel_name_formating)
+import plotly.graph_objects as go
+import streamlit_authenticator as stauth
+import yaml
+from yaml import SafeLoader
+import time
+st.set_page_config(layout='wide')
+load_local_css('styles.css')
+set_header()
+def get_random_effects(media_data, panel_col, mdf):
+    random_eff_df = pd.DataFrame(columns=[panel_col, "random_effect"])
+    for i, market in enumerate(media_data[panel_col].unique()):
+        print(i, end='\r')
+        intercept = mdf.random_effects[market].values[0]
+        random_eff_df.loc[i, 'random_effect'] = intercept
+        random_eff_df.loc[i, panel_col] = market
+    return random_eff_df
+def process_train_and_test(train, test, features, panel_col, target_col):
+    X1 = train[features]
+    ss = MinMaxScaler()
+    X1 = pd.DataFrame(ss.fit_transform(X1), columns=X1.columns)
+    X1[panel_col] = train[panel_col]
+    X1[target_col] = train[target_col]
+    if test is not None:
+        X2 = test[features]
+        X2 = pd.DataFrame(ss.transform(X2), columns=X2.columns)
+        X2[panel_col] = test[panel_col]
+        X2[target_col] = test[target_col]
+        return X1, X2
+    return X1
+def mdf_predict(X_df, mdf, random_eff_df) :
+    X=X_df.copy()
+    X=pd.merge(X, random_eff_df[[panel_col,'random_effect']], on=panel_col, how='left')
+    X['pred_fixed_effect'] = mdf.predict(X)
+    X['pred'] = X['pred_fixed_effect'] + X['random_effect']
+    X.to_csv('Test/merged_df_contri.csv',index=False)
+    X.drop(columns=['pred_fixed_effect', 'random_effect'], inplace=True)
+    return X
+target='Revenue'
+# is_panel=False
+# is_panel = st.session_state['is_panel']
+panel_col = [col.lower().replace('.','_').replace('@','_').replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for col in  st.session_state['bin_dict']['Panel Level 1']  ] [0]# set the panel column
+date_col = 'date'
+#st.write(media_data)
+is_panel = True if len(panel_col)>0 else False
+# panel_col='markets'
+date_col = 'date'
+# Sprint4 - if used_response_metrics is not blank, then select one of the used_response_metrics, else target is revenue by default
+if "used_response_metrics" in st.session_state and st.session_state['used_response_metrics']!=[]:
+    sel_target_col = st.selectbox("Select the response metric", st.session_state['used_response_metrics'])
+    target_col = sel_target_col.lower().replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_")
+else :
+    sel_target_col = 'Total Approved Accounts - Revenue'
+    target_col = 'total_approved_accounts_revenue'
+# Sprint4 - Look through all saved tuned models, only show saved models of the sel resp metric (target_col)
+# saved_models = st.session_state['saved_model_names']
+ # Sprint4 - get the model obj of the selected model
+# st.write(sel_model_dict)
+# Sprint3 - Contribution
+if is_panel:
+    # read tuned mixedLM model
+    # if st.session_state["tuned_model"] is not None :
+    if st.session_state["is_tuned_model"][target_col]==True: #Sprint4
+        with open("tuned_model.pkl", 'rb') as file:
+            model_dict = pickle.load(file)
+        saved_models = list(model_dict.keys())
+        required_saved_models = [m.split("__")[0] for m in saved_models if m.split("__")[1] == target_col]
+        sel_model = st.selectbox("Select the model to review", required_saved_models)
+        sel_model_dict = model_dict[sel_model + "__" + target_col]
+        # model=st.session_state["tuned_model"]
+        # X_train=st.session_state["X_train_tuned"]
+        # X_test=st.session_state["X_test_tuned"]
+        # best_feature_set=st.session_state["tuned_model_features"]
+        model=sel_model_dict["Model_object"]
+        X_train=sel_model_dict["X_train_tuned"]
+        X_test=sel_model_dict["X_test_tuned"]
+        best_feature_set=sel_model_dict["feature_set"]
+        # st.write("features", best_feature_set)
+        # st.write(X_test.columns)
+    else : # if non tuned model to be used # Pending
+        with open("best_models.pkl", 'rb') as file:
+            model_dict = pickle.load(file)
+        saved_models = list(model_dict.keys())
+        required_saved_models = [m.split("__")[0] for m in saved_models if m.split("__")[1] == target_col]
+        sel_model = st.selectbox("Select the model to review", required_saved_models)
+        sel_model_dict = model_dict[sel_model + "__" + target_col]
+        model=st.session_state["base_model"]
+        X_train = st.session_state['X_train']
+        X_test = st.session_state['X_test']
+        # y_train = st.session_state['y_train']
+        # y_test = st.session_state['y_test']
+        best_feature_set = st.session_state['base_model_feature_set']
+        # st.write(best_feature_set)
+        # st.write(X_test.columns)
+    # Calculate contributions
+    with open("data_import.pkl", "rb") as f:
+        data = pickle.load(f)
+    # Accessing the loaded objects
+    st.session_state['orig_media_data'] = data["final_df"]
+    st.session_state['orig_media_data'].columns=[col.lower().replace('.','_').replace('@','_').replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for col in st.session_state['orig_media_data'].columns]
+    media_data = st.session_state["media_data"]
+    # st.session_state['orig_media_data']=st.session_state["media_data"]
+    #st.write(media_data)
+    contri_df = pd.DataFrame()
+    y = []
+    y_pred = []
+    random_eff_df = get_random_effects(media_data, panel_col, model)
+    random_eff_df['fixed_effect'] = model.fe_params['Intercept']
+    random_eff_df['panel_effect'] = random_eff_df['random_effect'] + random_eff_df['fixed_effect']
+    # random_eff_df.to_csv("Test/random_eff_df_contri.csv", index=False)
+    coef_df = pd.DataFrame(model.fe_params)
+    coef_df.columns = ['coef']
+    # coef_df.reset_index().to_csv("Test/coef_df_contri1.csv",index=False)
+    # print(model.fe_params)
+    x_train_contribution = X_train.copy()
+    x_test_contribution = X_test.copy()
+    # preprocessing not needed since X_train is already preprocessed
+    # X1, X2 = process_train_and_test(x_train_contribution, x_test_contribution, best_feature_set, panel_col, target_col)
+    # x_train_contribution[best_feature_set] = X1[best_feature_set]
+    # x_test_contribution[best_feature_set] = X2[best_feature_set]
+    x_train_contribution = mdf_predict(x_train_contribution, model, random_eff_df)
+    x_test_contribution = mdf_predict(x_test_contribution, model, random_eff_df)
+    x_train_contribution = pd.merge(x_train_contribution, random_eff_df[[panel_col, 'panel_effect']], on=panel_col,
+                                    how='left')
+    x_test_contribution = pd.merge(x_test_contribution, random_eff_df[[panel_col, 'panel_effect']], on=panel_col,
+                                   how='left')
+    inp_coef = coef_df['coef'][1:].tolist() # 0th index is intercept
+    for i in range(len(inp_coef)):
+        x_train_contribution[str(best_feature_set[i]) + "_contr"] = inp_coef[i] * x_train_contribution[best_feature_set[i]]
+        x_test_contribution[str(best_feature_set[i]) + "_contr"] = inp_coef[i] * x_test_contribution[best_feature_set[i]]
+    x_train_contribution['sum_contributions'] = x_train_contribution.filter(regex="contr").sum(axis=1)
+    x_train_contribution['sum_contributions'] = x_train_contribution['sum_contributions'] + x_train_contribution['panel_effect']
+    x_test_contribution['sum_contributions'] = x_test_contribution.filter(regex="contr").sum(axis=1)
+    x_test_contribution['sum_contributions'] = x_test_contribution['sum_contributions'] + x_test_contribution['panel_effect']
+    # # test
+    x_train_contribution.to_csv("Test/x_train_contribution.csv",index=False)
+    x_test_contribution.to_csv("Test/x_test_contribution.csv",index=False)
+    #
+    # st.session_state['orig_media_data'].to_csv("Test/transformed_data.csv",index=False)
+    # st.session_state['X_test_spends'].to_csv("Test/test_spends.csv",index=False)
+    # # st.write(st.session_state['orig_media_data'].columns)
+    st.write(date_col,panel_col)
+    # st.write(x_test_contribution)
+    overview_test_data_prep_panel(x_test_contribution, st.session_state['orig_media_data'], st.session_state['X_test_spends'],
+                        date_col, panel_col, target_col)
+else : # NON PANEL
+    if st.session_state["is_tuned_model"][target_col]==True: #Sprint4
+        with open("tuned_model.pkl", 'rb') as file:
+            model_dict = pickle.load(file)
+        saved_models = list(model_dict.keys())
+        required_saved_models = [m.split("__")[0] for m in saved_models if m.split("__")[1] == target_col]
+        sel_model = st.selectbox("Select the model to review", required_saved_models)
+        sel_model_dict = model_dict[sel_model + "__" + target_col]
+        model=sel_model_dict["Model_object"]
+        X_train=sel_model_dict["X_train_tuned"]
+        X_test=sel_model_dict["X_test_tuned"]
+        best_feature_set=sel_model_dict["feature_set"]
+    else : #Sprint4
+        with open("best_models.pkl", 'rb') as file:
+            model_dict = pickle.load(file)
+        saved_models = list(model_dict.keys())
+        required_saved_models = [m.split("__")[0] for m in saved_models if m.split("__")[1] == target_col]
+        sel_model = st.selectbox("Select the model to review", required_saved_models)
+        sel_model_dict = model_dict[sel_model + "__" + target_col]
+        model=sel_model_dict["Model_object"]
+        X_train=sel_model_dict["X_train"]
+        X_test=sel_model_dict["X_test"]
+        best_feature_set=sel_model_dict["feature_set"]
+    x_train_contribution = X_train.copy()
+    x_test_contribution = X_test.copy()
+    x_train_contribution['pred'] = model.predict(x_train_contribution[best_feature_set])
+    x_test_contribution['pred'] = model.predict(x_test_contribution[best_feature_set])
+    for num,i in enumerate(model.params.values):
+        col=best_feature_set[num]
+        x_train_contribution[col + "_contr"] = X_train[col] * i
+        x_test_contribution[col + "_contr"] = X_test[col] * i
+    x_test_contribution.to_csv("Test/x_test_contribution_non_panel.csv",index=False)
+    overview_test_data_prep_nonpanel(x_test_contribution, st.session_state['orig_media_data'].copy(), st.session_state['X_test_spends'].copy(), date_col, target_col)
+# for k, v in st.session_sta
+# te.items():
+#     if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
+#         st.session_state[k] = v
+# authenticator = st.session_state.get('authenticator')
+# if authenticator is None:
+#     authenticator = load_authenticator()
+# name, authentication_status, username = authenticator.login('Login', 'main')
+# auth_status = st.session_state['authentication_status']
+# if auth_status:
+#     authenticator.logout('Logout', 'main')
+#     is_state_initiaized = st.session_state.get('initialized',False)
+#     if not is_state_initiaized:
+initialize_data(target_col)
+scenario = st.session_state['scenario']
+raw_df = st.session_state['raw_df']
+st.header('Overview of previous spends')
+# st.write(scenario.actual_total_spends)
+# st.write(scenario.actual_total_sales)
+columns = st.columns((1,1,3))
+with columns[0]:
+    st.metric(label='Spends', value=format_numbers(float(scenario.actual_total_spends)))
+###print(f"##################### {scenario.actual_total_sales} ##################")
+with columns[1]:
+    st.metric(label=target, value=format_numbers(float(scenario.actual_total_sales),include_indicator=False))
+actual_summary_df = create_channel_summary(scenario)
+actual_summary_df['Channel'] = actual_summary_df['Channel'].apply(channel_name_formating)
+columns = st.columns((2,1))
+with columns[0]:
+    with st.expander('Channel wise overview'):
+        st.markdown(actual_summary_df.style.set_table_styles(
+        [{
+            'selector': 'th',
+            'props': [('background-color', '#11B6BD')]
+        },
+            {
+            'selector' : 'tr:nth-child(even)',
+            'props' : [('background-color', '#11B6BD')]
+            }]).to_html(), unsafe_allow_html=True)
+st.markdown("<hr>",unsafe_allow_html=True)
+##############################
+st.plotly_chart(create_contribution_pie(scenario),use_container_width=True)
+st.markdown("<hr>",unsafe_allow_html=True)
+################################3
+st.plotly_chart(create_contribuion_stacked_plot(scenario),use_container_width=True)
+st.markdown("<hr>",unsafe_allow_html=True)
+#######################################
+selected_channel_name = st.selectbox('Channel', st.session_state['channels_list'] + ['non media'], format_func=channel_name_formating)
+selected_channel = scenario.channels.get(selected_channel_name,None)
+st.plotly_chart(create_channel_spends_sales_plot(selected_channel), use_container_width=True)
+st.markdown("<hr>",unsafe_allow_html=True)
+# elif auth_status == False:
+#     st.error('Username/Password is incorrect')
+# if auth_status != True:
+#     try:
+#         username_forgot_pw, email_forgot_password, random_password = authenticator.forgot_password('Forgot password')
+#         if username_forgot_pw:
+#             st.success('New password sent securely')
+#             # Random password to be transferred to user securely
+#         elif username_forgot_pw == False:
+#             st.error('Username not found')
+#     except Exception as e:
+#         st.error(e)

pages/7_Build_Response_Curves.py ADDED Viewed

	@@ -0,0 +1,185 @@

+import streamlit as st
+import plotly.express as px
+import numpy as np
+import plotly.graph_objects as go
+from utilities_with_panel import channel_name_formating, load_authenticator, initialize_data
+from sklearn.metrics import r2_score
+from collections import OrderedDict
+from classes import class_from_dict,class_to_dict
+import pickle
+import json
+from utilities import (
+    load_local_css,
+    set_header,
+    channel_name_formating,
+)
+for k, v in st.session_state.items():
+    if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
+        st.session_state[k] = v
+def s_curve(x,K,b,a,x0):
+    return K / (1 + b*np.exp(-a*(x-x0)))
+def save_scenario(scenario_name):
+    """
+    Save the current scenario with the mentioned name in the session state
+    Parameters
+    ----------
+    scenario_name
+        Name of the scenario to be saved
+    """
+    if 'saved_scenarios' not in st.session_state:
+        st.session_state = OrderedDict()
+    #st.session_state['saved_scenarios'][scenario_name] = st.session_state['scenario'].save()
+    st.session_state['saved_scenarios'][scenario_name] = class_to_dict(st.session_state['scenario'])
+    st.session_state['scenario_input'] = ""
+    print(type(st.session_state['saved_scenarios']))
+    with open('../saved_scenarios.pkl', 'wb') as f:
+        pickle.dump(st.session_state['saved_scenarios'],f)
+def reset_curve_parameters():
+    del st.session_state['K']
+    del st.session_state['b']
+    del st.session_state['a']
+    del st.session_state['x0']
+def update_response_curve():
+    # st.session_state['rcs'][selected_channel_name]['K'] = st.session_state['K']
+    # st.session_state['rcs'][selected_channel_name]['b'] = st.session_state['b']
+    # st.session_state['rcs'][selected_channel_name]['a'] = st.session_state['a']
+    # st.session_state['rcs'][selected_channel_name]['x0'] = st.session_state['x0']
+    # rcs = st.session_state['rcs']
+    _channel_class = st.session_state['scenario'].channels[selected_channel_name]
+    _channel_class.update_response_curves({
+                           'K'  : st.session_state['K'],
+                           'b'  : st.session_state['b'],
+                           'a'  : st.session_state['a'],
+                           'x0' : st.session_state['x0']})
+# authenticator = st.session_state.get('authenticator')
+# if authenticator is None:
+#     authenticator = load_authenticator()
+# name, authentication_status, username = authenticator.login('Login', 'main')
+# auth_status = st.session_state.get('authentication_status')
+# if auth_status == True:
+#     is_state_initiaized = st.session_state.get('initialized',False)
+#     if not is_state_initiaized:
+#         print("Scenario page state reloaded")
+# Sprint4 - if used_response_metrics is not blank, then select one of the used_response_metrics, else target is revenue by default
+st.set_page_config(layout='wide')
+load_local_css('styles.css')
+set_header()
+if "used_response_metrics" in st.session_state and st.session_state['used_response_metrics']!=[]:
+    sel_target_col = st.selectbox("Select the response metric", st.session_state['used_response_metrics'])
+    target_col = sel_target_col.lower().replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_")
+else :
+    sel_target_col = 'Total Approved Accounts - Revenue'
+    target_col = 'total_approved_accounts_revenue'
+initialize_data(target_col)
+st.subheader("Build response curves")
+channels_list = st.session_state['channels_list']
+selected_channel_name = st.selectbox('Channel', st.session_state['channels_list'] + ['Others'], format_func=channel_name_formating,on_change=reset_curve_parameters)
+rcs = {}
+for channel_name in channels_list:
+    rcs[channel_name] = st.session_state['scenario'].channels[channel_name].response_curve_params
+# rcs = st.session_state['rcs']
+if 'K' not in st.session_state:
+    st.session_state['K'] = rcs[selected_channel_name]['K']
+if 'b' not in st.session_state:
+    st.session_state['b'] = rcs[selected_channel_name]['b']
+if 'a' not in st.session_state:
+    st.session_state['a'] = rcs[selected_channel_name]['a']
+if 'x0' not in st.session_state:
+    st.session_state['x0'] = rcs[selected_channel_name]['x0']
+x = st.session_state['actual_input_df'][selected_channel_name].values
+y = st.session_state['actual_contribution_df'][selected_channel_name].values
+power = (np.ceil(np.log(x.max()) / np.log(10) )- 3)
+# fig = px.scatter(x, s_curve(x/10**power,
+#                             st.session_state['K'],
+#                             st.session_state['b'],
+#                             st.session_state['a'],
+#                             st.session_state['x0']))
+fig = px.scatter(x=x, y=y)
+fig.add_trace(go.Scatter(x=sorted(x), y=s_curve(sorted(x)/10**power,st.session_state['K'],
+                                    st.session_state['b'],
+                                    st.session_state['a'],
+                                    st.session_state['x0']),
+                        line=dict(color='red')))
+fig.update_layout(title_text="Response Curve",showlegend=False)
+fig.update_annotations(font_size=10)
+fig.update_xaxes(title='Spends')
+fig.update_yaxes(title=sel_target_col)
+st.plotly_chart(fig,use_container_width=True)
+r2 = r2_score(y, s_curve(x / 10**power,
+                        st.session_state['K'],
+                        st.session_state['b'],
+                        st.session_state['a'],
+                        st.session_state['x0']))
+st.metric('R2',round(r2,2))
+columns = st.columns(4)
+with columns[0]:
+    st.number_input('K',key='K',format="%0.5f")
+with columns[1]:
+    st.number_input('b',key='b',format="%0.5f")
+with columns[2]:
+    st.number_input('a',key='a',step=0.0001,format="%0.5f")
+with columns[3]:
+    st.number_input('x0',key='x0',format="%0.5f")
+st.button('Update parameters',on_click=update_response_curve)
+st.button('Reset parameters',on_click=reset_curve_parameters)
+scenario_name = st.text_input('Scenario name', key='scenario_input',placeholder='Scenario name',label_visibility='collapsed')
+st.button('Save', on_click=lambda  : save_scenario(scenario_name),disabled=len(st.session_state['scenario_input']) == 0)
+file_name = st.text_input('rcs download file name', key='file_name_input',placeholder='file name',label_visibility='collapsed')
+st.download_button(
+                    label="Download response curves",
+                    data=json.dumps(rcs),
+                    file_name=f"{file_name}.json",
+                    mime="application/json",
+                    disabled= len(file_name) == 0,
+                )
+def s_curve_derivative(x, K, b, a, x0):
+    # Derivative of the S-curve function
+    return a * b * K * np.exp(-a * (x - x0)) / ((1 + b * np.exp(-a * (x - x0))) ** 2)
+# Parameters of the S-curve
+K = st.session_state['K']
+b = st.session_state['b']
+a = st.session_state['a']
+x0 = st.session_state['x0']
+# Optimized spend value obtained from the tool
+optimized_spend = st.number_input('value of x')  # Replace this with your optimized spend value
+# Calculate the slope at the optimized spend value
+slope_at_optimized_spend = s_curve_derivative(optimized_spend, K, b, a, x0)
+st.write("Slope ", slope_at_optimized_spend)

pages/8_Scenario_Planner.py ADDED Viewed

	@@ -0,0 +1,1424 @@

+import streamlit as st
+from numerize.numerize import numerize
+import numpy as np
+from functools import partial
+from collections import OrderedDict
+from plotly.subplots import make_subplots
+import plotly.graph_objects as go
+from utilities import (
+    format_numbers,
+    load_local_css,
+    set_header,
+    initialize_data,
+    load_authenticator,
+    send_email,
+    channel_name_formating,
+)
+from classes import class_from_dict, class_to_dict
+import pickle
+import streamlit_authenticator as stauth
+import yaml
+from yaml import SafeLoader
+import re
+import pandas as pd
+import plotly.express as px
+st.set_page_config(layout="wide")
+load_local_css("styles.css")
+set_header()
+for k, v in st.session_state.items():
+    if k not in ["logout", "login", "config"] and not k.startswith("FormSubmitter"):
+        st.session_state[k] = v
+# ======================================================== #
+# ======================= Functions ====================== #
+# ======================================================== #
+def optimize(key, status_placeholder):
+    """
+    Optimize the spends for the sales
+    """
+    channel_list = [
+        key for key, value in st.session_state["optimization_channels"].items() if value
+    ]
+    if len(channel_list) > 0:
+        scenario = st.session_state["scenario"]
+        if key.lower() == "media spends":
+            with status_placeholder:
+                with st.spinner("Optimizing"):
+                    result = st.session_state["scenario"].optimize(
+                        st.session_state["total_spends_change"], channel_list
+                    )
+        # elif key.lower() == "revenue":
+        else:
+            with status_placeholder:
+                with st.spinner("Optimizing"):
+                    result = st.session_state["scenario"].optimize_spends(
+                        st.session_state["total_sales_change"], channel_list
+                    )
+        for channel_name, modified_spends in result:
+            st.session_state[channel_name] = numerize(
+                modified_spends * scenario.channels[channel_name].conversion_rate,
+                1,
+            )
+            prev_spends = (
+                st.session_state["scenario"].channels[channel_name].actual_total_spends
+            )
+            st.session_state[f"{channel_name}_change"] = round(
+                100 * (modified_spends - prev_spends) / prev_spends, 2
+            )
+def save_scenario(scenario_name):
+    """
+    Save the current scenario with the mentioned name in the session state
+    Parameters
+    ----------
+    scenario_name
+        Name of the scenario to be saved
+    """
+    if "saved_scenarios" not in st.session_state:
+        st.session_state = OrderedDict()
+    # st.session_state['saved_scenarios'][scenario_name] = st.session_state['scenario'].save()
+    st.session_state["saved_scenarios"][scenario_name] = class_to_dict(
+        st.session_state["scenario"]
+    )
+    st.session_state["scenario_input"] = ""
+    # print(type(st.session_state['saved_scenarios']))
+    with open("../saved_scenarios.pkl", "wb") as f:
+        pickle.dump(st.session_state["saved_scenarios"], f)
+if "allow_spends_update" not in st.session_state:
+    st.session_state["allow_spends_update"] = True
+if "allow_sales_update" not in st.session_state:
+    st.session_state["allow_sales_update"] = True
+def update_sales_abs_slider():
+    actual_sales = _scenario.actual_total_sales
+    if validate_input(st.session_state["total_sales_change_abs_slider"]):
+        modified_sales = extract_number_for_string(
+            st.session_state["total_sales_change_abs_slider"]
+        )
+        st.session_state["total_sales_change"] = round(
+            ((modified_sales / actual_sales) - 1) * 100
+        )
+        st.session_state["total_sales_change_abs"] = numerize(modified_sales, 1)
+def update_sales_abs():
+    if (
+        st.session_state["total_sales_change_abs"]
+        in st.session_state["total_sales_change_abs_slider_options"]
+    ):
+        st.session_state["allow_sales_update"] = True
+    else:
+        st.session_state["allow_sales_update"] = False
+    actual_sales = _scenario.actual_total_sales
+    if (
+        validate_input(st.session_state["total_sales_change_abs"])
+        and st.session_state["allow_sales_update"]
+    ):
+        modified_sales = extract_number_for_string(
+            st.session_state["total_sales_change_abs"]
+        )
+        st.session_state["total_sales_change"] = round(
+            ((modified_sales / actual_sales) - 1) * 100
+        )
+        st.session_state["total_sales_change_abs_slider"] = numerize(modified_sales, 1)
+def update_sales():
+    st.session_state["total_sales_change_abs"] = numerize(
+        (1 + st.session_state["total_sales_change"] / 100)
+        * _scenario.actual_total_sales,
+        1,
+    )
+    st.session_state["total_sales_change_abs_slider"] = numerize(
+        (1 + st.session_state["total_sales_change"] / 100)
+        * _scenario.actual_total_sales,
+        1,
+    )
+def update_all_spends_abs_slider():
+    actual_spends = _scenario.actual_total_spends
+    if validate_input(st.session_state["total_spends_change_abs_slider"]):
+        modified_spends = extract_number_for_string(
+            st.session_state["total_spends_change_abs_slider"]
+        )
+        st.session_state["total_spends_change"] = round(
+            ((modified_spends / actual_spends) - 1) * 100
+        )
+        st.session_state["total_spends_change_abs"] = numerize(modified_spends, 1)
+        update_all_spends()
+# def update_all_spends_abs_slider():
+#     actual_spends = _scenario.actual_total_spends
+#     if validate_input(st.session_state["total_spends_change_abs_slider"]):
+#         print("#" * 100)
+#         print(st.session_state["total_spends_change_abs_slider"])
+#         print("#" * 100)
+#         modified_spends = extract_number_for_string(
+#             st.session_state["total_spends_change_abs_slider"]
+#         )
+#         st.session_state["total_spends_change"] = (
+#             (modified_spends / actual_spends) - 1
+#         ) * 100
+#         st.session_state["total_spends_change_abs"] = st.session_state[
+#             "total_spends_change_abs_slider"
+#         ]
+#         update_all_spends()
+def update_all_spends_abs():
+    if (
+        st.session_state["total_spends_change_abs"]
+        in st.session_state["total_spends_change_abs_slider_options"]
+    ):
+        st.session_state["allow_spends_update"] = True
+    else:
+        st.session_state["allow_spends_update"] = False
+    actual_spends = _scenario.actual_total_spends
+    if (
+        validate_input(st.session_state["total_spends_change_abs"])
+        and st.session_state["allow_spends_update"]
+    ):
+        modified_spends = extract_number_for_string(
+            st.session_state["total_spends_change_abs"]
+        )
+        st.session_state["total_spends_change"] = (
+            (modified_spends / actual_spends) - 1
+        ) * 100
+        st.session_state["total_spends_change_abs_slider"] = st.session_state[
+            "total_spends_change_abs"
+        ]
+        update_all_spends()
+def update_spends():
+    st.session_state["total_spends_change_abs"] = numerize(
+        (1 + st.session_state["total_spends_change"] / 100)
+        * _scenario.actual_total_spends,
+        1,
+    )
+    st.session_state["total_spends_change_abs_slider"] = numerize(
+        (1 + st.session_state["total_spends_change"] / 100)
+        * _scenario.actual_total_spends,
+        1,
+    )
+    update_all_spends()
+def update_all_spends():
+    """
+    Updates spends for all the channels with the given overall spends change
+    """
+    percent_change = st.session_state["total_spends_change"]
+    for channel_name in st.session_state["channels_list"]:
+        channel = st.session_state["scenario"].channels[channel_name]
+        current_spends = channel.actual_total_spends
+        modified_spends = (1 + percent_change / 100) * current_spends
+        st.session_state["scenario"].update(channel_name, modified_spends)
+        st.session_state[channel_name] = numerize(
+            modified_spends * channel.conversion_rate, 1
+        )
+        st.session_state[f"{channel_name}_change"] = percent_change
+def extract_number_for_string(string_input):
+    string_input = string_input.upper()
+    if string_input.endswith("K"):
+        return float(string_input[:-1]) * 10**3
+    elif string_input.endswith("M"):
+        return float(string_input[:-1]) * 10**6
+    elif string_input.endswith("B"):
+        return float(string_input[:-1]) * 10**9
+def validate_input(string_input):
+    pattern = r"\d+\.?\d*[K|M|B]$"
+    match = re.match(pattern, string_input)
+    if match is None:
+        return False
+    return True
+def update_data_by_percent(channel_name):
+    prev_spends = (
+        st.session_state["scenario"].channels[channel_name].actual_total_spends
+        * st.session_state["scenario"].channels[channel_name].conversion_rate
+    )
+    modified_spends = prev_spends * (
+        1 + st.session_state[f"{channel_name}_change"] / 100
+    )
+    st.session_state[channel_name] = numerize(modified_spends, 1)
+    st.session_state["scenario"].update(
+        channel_name,
+        modified_spends
+        / st.session_state["scenario"].channels[channel_name].conversion_rate,
+    )
+def update_data(channel_name):
+    """
+    Updates the spends for the given channel
+    """
+    if validate_input(st.session_state[channel_name]):
+        modified_spends = extract_number_for_string(st.session_state[channel_name])
+        prev_spends = (
+            st.session_state["scenario"].channels[channel_name].actual_total_spends
+            * st.session_state["scenario"].channels[channel_name].conversion_rate
+        )
+        st.session_state[f"{channel_name}_change"] = round(
+            100 * (modified_spends - prev_spends) / prev_spends, 2
+        )
+        st.session_state["scenario"].update(
+            channel_name,
+            modified_spends
+            / st.session_state["scenario"].channels[channel_name].conversion_rate,
+        )
+    # st.session_state['scenario'].update(channel_name, modified_spends)
+    # else:
+    #     try:
+    #         modified_spends = float(st.session_state[channel_name])
+    #         prev_spends = st.session_state['scenario'].channels[channel_name].actual_total_spends * st.session_state['scenario'].channels[channel_name].conversion_rate
+    #         st.session_state[f'{channel_name}_change'] = round(100*(modified_spends - prev_spends) / prev_spends,2)
+    #         st.session_state['scenario'].update(channel_name, modified_spends/st.session_state['scenario'].channels[channel_name].conversion_rate)
+    #         st.session_state[f'{channel_name}'] = numerize(modified_spends,1)
+    #     except ValueError:
+    #         st.write('Invalid input')
+def select_channel_for_optimization(channel_name):
+    """
+    Marks the given channel for optimization
+    """
+    st.session_state["optimization_channels"][channel_name] = st.session_state[
+        f"{channel_name}_selected"
+    ]
+def select_all_channels_for_optimization():
+    """
+    Marks all the channel for optimization
+    """
+    for channel_name in st.session_state["optimization_channels"].keys():
+        st.session_state[f"{channel_name}_selected"] = st.session_state[
+            "optimze_all_channels"
+        ]
+        st.session_state["optimization_channels"][channel_name] = st.session_state[
+            "optimze_all_channels"
+        ]
+def update_penalty():
+    """
+    Updates the penalty flag for sales calculation
+    """
+    st.session_state["scenario"].update_penalty(st.session_state["apply_penalty"])
+def reset_scenario(panel_selected, file_selected, updated_rcs):
+    # #print(st.session_state['default_scenario_dict'])
+    # st.session_state['scenario']  = class_from_dict(st.session_state['default_scenario_dict'])
+    # for channel in st.session_state['scenario'].channels.values():
+    #     st.session_state[channel.name] = float(channel.actual_total_spends * channel.conversion_rate)
+    # initialize_data()
+    if panel_selected == "Aggregated":
+        initialize_data(
+            panel=panel_selected,
+            target_file=file_selected,
+            updated_rcs=updated_rcs,
+            metrics=metrics_selected,
+        )
+        panel = None
+    else:
+        initialize_data(
+            panel=panel_selected,
+            target_file=file_selected,
+            updated_rcs=updated_rcs,
+            metrics=metrics_selected,
+        )
+    for channel_name in st.session_state["channels_list"]:
+        st.session_state[f"{channel_name}_selected"] = False
+        st.session_state[f"{channel_name}_change"] = 0
+    st.session_state["optimze_all_channels"] = False
+    st.session_state["total_sales_change"] = 0
+    update_spends()
+    update_sales()
+    reset_inputs()
+    # st.rerun()
+def format_number(num):
+    if num >= 1_000_000:
+        return f"{num / 1_000_000:.2f}M"
+    elif num >= 1_000:
+        return f"{num / 1_000:.0f}K"
+    else:
+        return f"{num:.2f}"
+def summary_plot(data, x, y, title, text_column):
+    fig = px.bar(
+        data,
+        x=x,
+        y=y,
+        orientation="h",
+        title=title,
+        text=text_column,
+        color="Channel_name",
+    )
+    # Convert text_column to numeric values
+    data[text_column] = pd.to_numeric(data[text_column], errors="coerce")
+    # Update the format of the displayed text based on magnitude
+    fig.update_traces(
+        texttemplate="%{text:.2s}",
+        textposition="outside",
+        hovertemplate="%{x:.2s}",
+    )
+    fig.update_layout(xaxis_title=x, yaxis_title="Channel Name", showlegend=False)
+    return fig
+def s_curve(x, K, b, a, x0):
+    return K / (1 + b * np.exp(-a * (x - x0)))
+def find_segment_value(x, roi, mroi):
+    start_value = x[0]
+    end_value = x[len(x) - 1]
+    # Condition for green region: Both MROI and ROI > 1
+    green_condition = (roi > 1) & (mroi > 1)
+    left_indices = np.where(green_condition)[0]
+    left_value = x[left_indices[0]] if left_indices.size > 0 else x[0]
+    right_indices = np.where(green_condition)[0]
+    right_value = x[right_indices[-1]] if right_indices.size > 0 else x[0]
+    return start_value, end_value, left_value, right_value
+def calculate_rgba(
+    start_value, end_value, left_value, right_value, current_channel_spends
+):
+    # Initialize alpha to None for clarity
+    alpha = None
+    # Determine the color and calculate relative_position and alpha based on the point's position
+    if start_value <= current_channel_spends <= left_value:
+        color = "yellow"
+        relative_position = (current_channel_spends - start_value) / (
+            left_value - start_value
+        )
+        alpha = 0.8 - (0.6 * relative_position)  # Alpha decreases from start to end
+    elif left_value < current_channel_spends <= right_value:
+        color = "green"
+        relative_position = (current_channel_spends - left_value) / (
+            right_value - left_value
+        )
+        alpha = 0.8 - (0.6 * relative_position)  # Alpha decreases from start to end
+    elif right_value < current_channel_spends <= end_value:
+        color = "red"
+        relative_position = (current_channel_spends - right_value) / (
+            end_value - right_value
+        )
+        alpha = 0.2 + (0.6 * relative_position)  # Alpha increases from start to end
+    else:
+        # Default case, if the spends are outside the defined ranges
+        return "rgba(136, 136, 136, 0.5)"  # Grey for values outside the range
+    # Ensure alpha is within the intended range in case of any calculation overshoot
+    alpha = max(0.2, min(alpha, 0.8))
+    # Define color codes for RGBA
+    color_codes = {
+        "yellow": "255, 255, 0",  # RGB for yellow
+        "green": "0, 128, 0",  # RGB for green
+        "red": "255, 0, 0",  # RGB for red
+    }
+    rgba = f"rgba({color_codes[color]}, {alpha})"
+    return rgba
+def debug_temp(x_test, power, K, b, a, x0):
+    print("*" * 100)
+    # Calculate the count of bins
+    count_lower_bin = sum(1 for x in x_test if x <= 2524)
+    count_center_bin = sum(1 for x in x_test if x > 2524 and x <= 3377)
+    count_ = sum(1 for x in x_test if x > 3377)
+    print(
+        f"""
+            lower : {count_lower_bin}
+            center : {count_center_bin}
+            upper : {count_}
+          """
+    )
+# @st.cache
+def plot_response_curves():
+    cols = 4
+    rows = (
+        len(channels_list) // cols
+        if len(channels_list) % cols == 0
+        else len(channels_list) // cols + 1
+    )
+    rcs = st.session_state["rcs"]
+    shapes = []
+    fig = make_subplots(rows=rows, cols=cols, subplot_titles=channels_list)
+    for i in range(0, len(channels_list)):
+        col = channels_list[i]
+        x_actual = st.session_state["scenario"].channels[col].actual_spends
+        # x_modified = st.session_state["scenario"].channels[col].modified_spends
+        power = np.ceil(np.log(x_actual.max()) / np.log(10)) - 3
+        K = rcs[col]["K"]
+        b = rcs[col]["b"]
+        a = rcs[col]["a"]
+        x0 = rcs[col]["x0"]
+        x_plot = np.linspace(0, 5 * x_actual.sum(), 50)
+        x, y, marginal_roi = [], [], []
+        for x_p in x_plot:
+            x.append(x_p * x_actual / x_actual.sum())
+        for index in range(len(x_plot)):
+            y.append(s_curve(x[index] / 10**power, K, b, a, x0))
+        for index in range(len(x_plot)):
+            marginal_roi.append(
+                a * y[index] * (1 - y[index] / np.maximum(K, np.finfo(float).eps))
+            )
+        x = (
+            np.sum(x, axis=1)
+            * st.session_state["scenario"].channels[col].conversion_rate
+        )
+        y = np.sum(y, axis=1)
+        marginal_roi = (
+            np.average(marginal_roi, axis=1)
+            / st.session_state["scenario"].channels[col].conversion_rate
+        )
+        roi = y / np.maximum(x, np.finfo(float).eps)
+        fig.add_trace(
+            go.Scatter(
+                x=x,
+                y=y,
+                name=col,
+                customdata=np.stack((roi, marginal_roi), axis=-1),
+                hovertemplate="Spend:%{x:$.2s}<br>Sale:%{y:$.2s}<br>ROI:%{customdata[0]:.3f}<br>MROI:%{customdata[1]:.3f}",
+                line=dict(color="blue"),
+            ),
+            row=1 + (i) // cols,
+            col=i % cols + 1,
+        )
+        x_optimal = (
+            st.session_state["scenario"].channels[col].modified_total_spends
+            * st.session_state["scenario"].channels[col].conversion_rate
+        )
+        y_optimal = st.session_state["scenario"].channels[col].modified_total_sales
+        # if col == "Paid_social_others":
+        #     debug_temp(x_optimal * x_actual / x_actual.sum(), power, K, b, a, x0)
+        fig.add_trace(
+            go.Scatter(
+                x=[x_optimal],
+                y=[y_optimal],
+                name=col,
+                legendgroup=col,
+                showlegend=False,
+                marker=dict(color=["black"]),
+            ),
+            row=1 + (i) // cols,
+            col=i % cols + 1,
+        )
+        shapes.append(
+            go.layout.Shape(
+                type="line",
+                x0=0,
+                y0=y_optimal,
+                x1=x_optimal,
+                y1=y_optimal,
+                line_width=1,
+                line_dash="dash",
+                line_color="black",
+                xref=f"x{i+1}",
+                yref=f"y{i+1}",
+            )
+        )
+        shapes.append(
+            go.layout.Shape(
+                type="line",
+                x0=x_optimal,
+                y0=0,
+                x1=x_optimal,
+                y1=y_optimal,
+                line_width=1,
+                line_dash="dash",
+                line_color="black",
+                xref=f"x{i+1}",
+                yref=f"y{i+1}",
+            )
+        )
+        start_value, end_value, left_value, right_value = find_segment_value(
+            x,
+            roi,
+            marginal_roi,
+        )
+        # Adding background colors
+        y_max = y.max() * 1.3  # 30% extra space above the max
+        # Yellow region
+        shapes.append(
+            go.layout.Shape(
+                type="rect",
+                x0=start_value,
+                y0=0,
+                x1=left_value,
+                y1=y_max,
+                line=dict(width=0),
+                fillcolor="rgba(255, 255, 0, 0.3)",
+                layer="below",
+                xref=f"x{i+1}",
+                yref=f"y{i+1}",
+            )
+        )
+        # Green region
+        shapes.append(
+            go.layout.Shape(
+                type="rect",
+                x0=left_value,
+                y0=0,
+                x1=right_value,
+                y1=y_max,
+                line=dict(width=0),
+                fillcolor="rgba(0, 255, 0, 0.3)",
+                layer="below",
+                xref=f"x{i+1}",
+                yref=f"y{i+1}",
+            )
+        )
+        # Red region
+        shapes.append(
+            go.layout.Shape(
+                type="rect",
+                x0=right_value,
+                y0=0,
+                x1=end_value,
+                y1=y_max,
+                line=dict(width=0),
+                fillcolor="rgba(255, 0, 0, 0.3)",
+                layer="below",
+                xref=f"x{i+1}",
+                yref=f"y{i+1}",
+            )
+        )
+    fig.update_layout(
+        # height=1000,
+        # width=1000,
+        title_text=f"Response Curves (X: Spends Vs Y: {target})",
+        showlegend=False,
+        shapes=shapes,
+    )
+    fig.update_annotations(font_size=10)
+    # fig.update_xaxes(title="Spends")
+    # fig.update_yaxes(title=target)
+    fig.update_yaxes(
+        gridcolor="rgba(136, 136, 136, 0.5)", gridwidth=0.5, griddash="dash"
+    )
+    return fig
+# @st.cache
+# def plot_response_curves():
+#     cols = 4
+#     rcs = st.session_state["rcs"]
+#     shapes = []
+#     fig = make_subplots(rows=6, cols=cols, subplot_titles=channels_list)
+#     for i in range(0, len(channels_list)):
+#         col = channels_list[i]
+#         x = st.session_state["actual_df"][col].values
+#         spends = x.sum()
+#         power = np.ceil(np.log(x.max()) / np.log(10)) - 3
+#         x = np.linspace(0, 3 * x.max(), 200)
+#         K = rcs[col]["K"]
+#         b = rcs[col]["b"]
+#         a = rcs[col]["a"]
+#         x0 = rcs[col]["x0"]
+#         y = s_curve(x / 10**power, K, b, a, x0)
+#         roi = y / x
+#         marginal_roi = a * (y) * (1 - y / K)
+#         fig.add_trace(
+#             go.Scatter(
+#                 x=52
+#                 * x
+#                 * st.session_state["scenario"].channels[col].conversion_rate,
+#                 y=52 * y,
+#                 name=col,
+#                 customdata=np.stack((roi, marginal_roi), axis=-1),
+#                 hovertemplate="Spend:%{x:$.2s}<br>Sale:%{y:$.2s}<br>ROI:%{customdata[0]:.3f}<br>MROI:%{customdata[1]:.3f}",
+#             ),
+#             row=1 + (i) // cols,
+#             col=i % cols + 1,
+#         )
+#         fig.add_trace(
+#             go.Scatter(
+#                 x=[
+#                     spends
+#                     * st.session_state["scenario"]
+#                     .channels[col]
+#                     .conversion_rate
+#                 ],
+#                 y=[52 * s_curve(spends / (10**power * 52), K, b, a, x0)],
+#                 name=col,
+#                 legendgroup=col,
+#                 showlegend=False,
+#                 marker=dict(color=["black"]),
+#             ),
+#             row=1 + (i) // cols,
+#             col=i % cols + 1,
+#         )
+#         shapes.append(
+#             go.layout.Shape(
+#                 type="line",
+#                 x0=0,
+#                 y0=52 * s_curve(spends / (10**power * 52), K, b, a, x0),
+#                 x1=spends
+#                 * st.session_state["scenario"].channels[col].conversion_rate,
+#                 y1=52 * s_curve(spends / (10**power * 52), K, b, a, x0),
+#                 line_width=1,
+#                 line_dash="dash",
+#                 line_color="black",
+#                 xref=f"x{i+1}",
+#                 yref=f"y{i+1}",
+#             )
+#         )
+#         shapes.append(
+#             go.layout.Shape(
+#                 type="line",
+#                 x0=spends
+#                 * st.session_state["scenario"].channels[col].conversion_rate,
+#                 y0=0,
+#                 x1=spends
+#                 * st.session_state["scenario"].channels[col].conversion_rate,
+#                 y1=52 * s_curve(spends / (10**power * 52), K, b, a, x0),
+#                 line_width=1,
+#                 line_dash="dash",
+#                 line_color="black",
+#                 xref=f"x{i+1}",
+#                 yref=f"y{i+1}",
+#             )
+#         )
+#     fig.update_layout(
+#         height=1500,
+#         width=1000,
+#         title_text="Response Curves",
+#         showlegend=False,
+#         shapes=shapes,
+#     )
+#     fig.update_annotations(font_size=10)
+#     fig.update_xaxes(title="Spends")
+#     fig.update_yaxes(title=target)
+#     return fig
+# ======================================================== #
+# ==================== HTML Components =================== #
+# ======================================================== #
+def generate_spending_header(heading):
+    return st.markdown(
+        f"""<h2 class="spends-header">{heading}</h2>""", unsafe_allow_html=True
+    )
+# ======================================================== #
+# =================== Session variables ================== #
+# ======================================================== #
+with open("config.yaml") as file:
+    config = yaml.load(file, Loader=SafeLoader)
+    st.session_state["config"] = config
+authenticator = stauth.Authenticate(
+    config["credentials"],
+    config["cookie"]["name"],
+    config["cookie"]["key"],
+    config["cookie"]["expiry_days"],
+    config["preauthorized"],
+)
+st.session_state["authenticator"] = authenticator
+name, authentication_status, username = authenticator.login("Login", "main")
+auth_status = st.session_state.get("authentication_status")
+import os
+import glob
+def get_excel_names(directory):
+    # Create a list to hold the final parts of the filenames
+    last_portions = []
+    # Patterns to match Excel files (.xlsx and .xls) that contain @#
+    patterns = [
+        os.path.join(directory, "*@#*.xlsx"),
+        os.path.join(directory, "*@#*.xls"),
+    ]
+    # Process each pattern
+    for pattern in patterns:
+        files = glob.glob(pattern)
+        # Extracting the last portion after @# for each file
+        for file in files:
+            base_name = os.path.basename(file)
+            last_portion = base_name.split("@#")[-1]
+            last_portion = last_portion.replace(".xlsx", "").replace(
+                ".xls", ""
+            )  # Removing extensions
+            last_portions.append(last_portion)
+    return last_portions
+def name_formating(channel_name):
+    # Replace underscores with spaces
+    name_mod = channel_name.replace("_", " ")
+    # Capitalize the first letter of each word
+    name_mod = name_mod.title()
+    return name_mod
+@st.cache_resource(show_spinner=False)
+def panel_fetch(file_selected):
+    raw_data_mmm_df = pd.read_excel(file_selected, sheet_name="RAW DATA MMM")
+    if "Panel" in raw_data_mmm_df.columns:
+        panel = list(set(raw_data_mmm_df["Panel"]))
+    else:
+        raw_data_mmm_df = None
+        panel = None
+    return panel
+def reset_inputs():
+    if "total_spends_change_abs" in st.session_state:
+        del st.session_state.total_spends_change_abs
+    if "total_spends_change" in st.session_state:
+        del st.session_state.total_spends_change
+    if "total_spends_change_abs_slider" in st.session_state:
+        del st.session_state.total_spends_change_abs_slider
+    if "total_sales_change_abs" in st.session_state:
+        del st.session_state.total_sales_change_abs
+    if "total_sales_change" in st.session_state:
+        del st.session_state.total_sales_change
+    if "total_sales_change_abs_slider" in st.session_state:
+        del st.session_state.total_sales_change_abs_slider
+    st.session_state["initialized"] = False
+if auth_status == True:
+    authenticator.logout("Logout", "main")
+    st.header("Simulation")
+    col1, col2 = st.columns([1, 1])
+    # Response Metrics
+    directory = "metrics_level_data"
+    metrics_list = get_excel_names(directory)
+    metrics_selected = col1.selectbox(
+        "Response Metrics",
+        metrics_list,
+        format_func=name_formating,
+        index=0,
+        on_change=reset_inputs,
+    )
+    # Target
+    target = name_formating(metrics_selected)
+    file_selected = (
+        f".\metrics_level_data\Overview_data_test_panel@#{metrics_selected}.xlsx"
+    )
+    # Panel List
+    panel_list = panel_fetch(file_selected)
+    # Panel Selected
+    panel_selected = col2.selectbox(
+        "Panel",
+        ["Aggregated"] + panel_list,
+        index=0,
+        on_change=reset_inputs,
+    )
+    if "update_rcs" in st.session_state:
+        updated_rcs = st.session_state["update_rcs"]
+    else:
+        updated_rcs = None
+    if "first_time" not in st.session_state:
+        st.session_state["first_time"] = True
+    # Check if state is initiaized
+    is_state_initiaized = st.session_state.get("initialized", False)
+    if not is_state_initiaized or st.session_state["first_time"]:
+        # initialize_data()
+        if panel_selected == "Aggregated":
+            initialize_data(
+                panel=panel_selected,
+                target_file=file_selected,
+                updated_rcs=updated_rcs,
+                metrics=metrics_selected,
+            )
+            panel = None
+        else:
+            initialize_data(
+                panel=panel_selected,
+                target_file=file_selected,
+                updated_rcs=updated_rcs,
+                metrics=metrics_selected,
+            )
+        st.session_state["initialized"] = True
+        st.session_state["first_time"] = False
+    # Channels List
+    channels_list = st.session_state["channels_list"]
+    # ======================================================== #
+    # ========================== UI ========================== #
+    # ======================================================== #
+    # print(list(st.session_state.keys()))
+    main_header = st.columns((2, 2))
+    sub_header = st.columns((1, 1, 1, 1))
+    _scenario = st.session_state["scenario"]
+    if "total_spends_change" not in st.session_state:
+        st.session_state.total_spends_change = 0
+    if "total_sales_change" not in st.session_state:
+        st.session_state.total_sales_change = 0
+    if "total_spends_change_abs" not in st.session_state:
+        st.session_state["total_spends_change_abs"] = numerize(
+            _scenario.actual_total_spends, 1
+        )
+    if "total_sales_change_abs" not in st.session_state:
+        st.session_state["total_sales_change_abs"] = numerize(
+            _scenario.actual_total_sales, 1
+        )
+    if "total_spends_change_abs_slider" not in st.session_state:
+        st.session_state.total_spends_change_abs_slider = numerize(
+            _scenario.actual_total_spends, 1
+        )
+    if "total_sales_change_abs_slider" not in st.session_state:
+        st.session_state.total_sales_change_abs_slider = numerize(
+            _scenario.actual_total_sales, 1
+        )
+    with main_header[0]:
+        st.subheader("Actual")
+    with main_header[-1]:
+        st.subheader("Simulated")
+    with sub_header[0]:
+        st.metric(label="Spends", value=format_numbers(_scenario.actual_total_spends))
+    with sub_header[1]:
+        st.metric(
+            label=target,
+            value=format_numbers(
+                float(_scenario.actual_total_sales), include_indicator=False
+            ),
+        )
+    with sub_header[2]:
+        st.metric(
+            label="Spends",
+            value=format_numbers(_scenario.modified_total_spends),
+            delta=numerize(_scenario.delta_spends, 1),
+        )
+    with sub_header[3]:
+        st.metric(
+            label=target,
+            value=format_numbers(
+                float(_scenario.modified_total_sales), include_indicator=False
+            ),
+            delta=numerize(_scenario.delta_sales, 1),
+        )
+    with st.expander("Channel Spends Simulator", expanded=True):
+        _columns1 = st.columns((2, 2, 1, 1))
+        with _columns1[0]:
+            optimization_selection = st.selectbox(
+                "Optimize", options=["Media Spends", target], key="optimization_key"
+            )
+        with _columns1[1]:
+            st.markdown("#")
+            # if st.checkbox(
+            #     label="Optimize all Channels",
+            #     key="optimze_all_channels",
+            #     value=False,
+            #     # on_change=select_all_channels_for_optimization,
+            # ):
+            #     select_all_channels_for_optimization()
+            st.checkbox(
+                label="Optimize all Channels",
+                key="optimze_all_channels",
+                value=False,
+                on_change=select_all_channels_for_optimization,
+            )
+        with _columns1[2]:
+            st.markdown("#")
+            # st.button(
+            #     "Optimize",
+            #     on_click=optimize,
+            #     args=(st.session_state["optimization_key"]),
+            #     use_container_width=True,
+            # )
+            optimize_placeholder = st.empty()
+        with _columns1[3]:
+            st.markdown("#")
+            st.button(
+                "Reset",
+                on_click=reset_scenario,
+                args=(panel_selected, file_selected, updated_rcs),
+                use_container_width=True,
+            )
+        _columns2 = st.columns((2, 2, 2))
+        if st.session_state["optimization_key"] == "Media Spends":
+            with _columns2[0]:
+                spend_input = st.text_input(
+                    "Absolute",
+                    key="total_spends_change_abs",
+                    # label_visibility="collapsed",
+                    on_change=update_all_spends_abs,
+                )
+            with _columns2[1]:
+                st.number_input(
+                    "Percent Change",
+                    key="total_spends_change",
+                    min_value=-50,
+                    max_value=50,
+                    step=1,
+                    on_change=update_spends,
+                )
+            with _columns2[2]:
+                min_value = round(_scenario.actual_total_spends * 0.5)
+                max_value = round(_scenario.actual_total_spends * 1.5)
+                st.session_state["total_spends_change_abs_slider_options"] = [
+                    numerize(value, 1)
+                    for value in range(min_value, max_value + 1, int(1e4))
+                ]
+                st.select_slider(
+                    "Absolute Slider",
+                    options=st.session_state["total_spends_change_abs_slider_options"],
+                    key="total_spends_change_abs_slider",
+                    on_change=update_all_spends_abs_slider,
+                )
+        elif st.session_state["optimization_key"] == target:
+            with _columns2[0]:
+                sales_input = st.text_input(
+                    "Absolute",
+                    key="total_sales_change_abs",
+                    on_change=update_sales_abs,
+                )
+            with _columns2[1]:
+                st.number_input(
+                    "Percent Change",
+                    key="total_sales_change",
+                    min_value=-50,
+                    max_value=50,
+                    step=1,
+                    on_change=update_sales,
+                )
+            with _columns2[2]:
+                min_value = round(_scenario.actual_total_sales * 0.5)
+                max_value = round(_scenario.actual_total_sales * 1.5)
+                st.session_state["total_sales_change_abs_slider_options"] = [
+                    numerize(value, 1)
+                    for value in range(min_value, max_value + 1, int(1e5))
+                ]
+                st.select_slider(
+                    "Absolute Slider",
+                    options=st.session_state["total_sales_change_abs_slider_options"],
+                    key="total_sales_change_abs_slider",
+                    on_change=update_sales_abs_slider,
+                )
+        if (
+            not st.session_state["allow_sales_update"]
+            and optimization_selection == target
+        ):
+            st.warning("Invalid Input")
+        if (
+            not st.session_state["allow_spends_update"]
+            and optimization_selection == "Media Spends"
+        ):
+            st.warning("Invalid Input")
+        status_placeholder = st.empty()
+        # if optimize_placeholder.button("Optimize", use_container_width=True):
+        #     optimize(st.session_state["optimization_key"], status_placeholder)
+        #     st.rerun()
+        optimize_placeholder.button(
+            "Optimize",
+            on_click=optimize,
+            args=(st.session_state["optimization_key"], status_placeholder),
+            use_container_width=True,
+        )
+        st.markdown("""<hr class="spends-heading-seperator">""", unsafe_allow_html=True)
+        _columns = st.columns((2.5, 2, 1.5, 1.5, 1))
+        with _columns[0]:
+            generate_spending_header("Channel")
+        with _columns[1]:
+            generate_spending_header("Spends Input")
+        with _columns[2]:
+            generate_spending_header("Spends")
+        with _columns[3]:
+            generate_spending_header(target)
+        with _columns[4]:
+            generate_spending_header("Optimize")
+        st.markdown("""<hr class="spends-heading-seperator">""", unsafe_allow_html=True)
+        if "acutual_predicted" not in st.session_state:
+            st.session_state["acutual_predicted"] = {
+                "Channel_name": [],
+                "Actual_spend": [],
+                "Optimized_spend": [],
+                "Delta": [],
+            }
+        for i, channel_name in enumerate(channels_list):
+            _channel_class = st.session_state["scenario"].channels[channel_name]
+            _columns = st.columns((2.5, 1.5, 1.5, 1.5, 1))
+            with _columns[0]:
+                st.write(channel_name_formating(channel_name))
+                bin_placeholder = st.container()
+            with _columns[1]:
+                channel_bounds = _channel_class.bounds
+                channel_spends = float(_channel_class.actual_total_spends)
+                min_value = float((1 + channel_bounds[0] / 100) * channel_spends)
+                max_value = float((1 + channel_bounds[1] / 100) * channel_spends)
+                ##print(st.session_state[channel_name])
+                spend_input = st.text_input(
+                    channel_name,
+                    key=channel_name,
+                    label_visibility="collapsed",
+                    on_change=partial(update_data, channel_name),
+                )
+                if not validate_input(spend_input):
+                    st.error("Invalid input")
+                channel_name_current = f"{channel_name}_change"
+                st.number_input(
+                    "Percent Change",
+                    key=channel_name_current,
+                    step=1,
+                    on_change=partial(update_data_by_percent, channel_name),
+                )
+            with _columns[2]:
+                # spends
+                current_channel_spends = float(
+                    _channel_class.modified_total_spends
+                    * _channel_class.conversion_rate
+                )
+                actual_channel_spends = float(
+                    _channel_class.actual_total_spends * _channel_class.conversion_rate
+                )
+                spends_delta = float(
+                    _channel_class.delta_spends * _channel_class.conversion_rate
+                )
+                st.session_state["acutual_predicted"]["Channel_name"].append(
+                    channel_name
+                )
+                st.session_state["acutual_predicted"]["Actual_spend"].append(
+                    actual_channel_spends
+                )
+                st.session_state["acutual_predicted"]["Optimized_spend"].append(
+                    current_channel_spends
+                )
+                st.session_state["acutual_predicted"]["Delta"].append(spends_delta)
+                ## REMOVE
+                st.metric(
+                    "Spends",
+                    format_numbers(current_channel_spends),
+                    delta=numerize(spends_delta, 1),
+                    label_visibility="collapsed",
+                )
+            with _columns[3]:
+                # sales
+                current_channel_sales = float(_channel_class.modified_total_sales)
+                actual_channel_sales = float(_channel_class.actual_total_sales)
+                sales_delta = float(_channel_class.delta_sales)
+                st.metric(
+                    target,
+                    format_numbers(current_channel_sales, include_indicator=False),
+                    delta=numerize(sales_delta, 1),
+                    label_visibility="collapsed",
+                )
+            with _columns[4]:
+                # if st.checkbox(
+                #     label="select for optimization",
+                #     key=f"{channel_name}_selected",
+                #     value=False,
+                #     # on_change=partial(select_channel_for_optimization, channel_name),
+                #     label_visibility="collapsed",
+                # ):
+                #     select_channel_for_optimization(channel_name)
+                st.checkbox(
+                    label="select for optimization",
+                    key=f"{channel_name}_selected",
+                    value=False,
+                    on_change=partial(select_channel_for_optimization, channel_name),
+                    label_visibility="collapsed",
+                )
+            st.markdown(
+                """<hr class="spends-child-seperator">""",
+                unsafe_allow_html=True,
+            )
+            # Bins
+            col = channels_list[i]
+            x_actual = st.session_state["scenario"].channels[col].actual_spends
+            x_modified = st.session_state["scenario"].channels[col].modified_spends
+            x_total = x_modified.sum()
+            power = np.ceil(np.log(x_actual.max()) / np.log(10)) - 3
+            updated_rcs_key = f"{metrics_selected}#@{panel_selected}#@{channel_name}"
+            if updated_rcs and updated_rcs_key in list(updated_rcs.keys()):
+                K = updated_rcs[updated_rcs_key]["K"]
+                b = updated_rcs[updated_rcs_key]["b"]
+                a = updated_rcs[updated_rcs_key]["a"]
+                x0 = updated_rcs[updated_rcs_key]["x0"]
+            else:
+                K = st.session_state["rcs"][col]["K"]
+                b = st.session_state["rcs"][col]["b"]
+                a = st.session_state["rcs"][col]["a"]
+                x0 = st.session_state["rcs"][col]["x0"]
+            x_plot = np.linspace(0, 5 * x_actual.sum(), 200)
+            # Append current_channel_spends to the end of x_plot
+            x_plot = np.append(x_plot, current_channel_spends)
+            x, y, marginal_roi = [], [], []
+            for x_p in x_plot:
+                x.append(x_p * x_actual / x_actual.sum())
+            for index in range(len(x_plot)):
+                y.append(s_curve(x[index] / 10**power, K, b, a, x0))
+            for index in range(len(x_plot)):
+                marginal_roi.append(
+                    a * y[index] * (1 - y[index] / np.maximum(K, np.finfo(float).eps))
+                )
+            x = (
+                np.sum(x, axis=1)
+                * st.session_state["scenario"].channels[col].conversion_rate
+            )
+            y = np.sum(y, axis=1)
+            marginal_roi = (
+                np.average(marginal_roi, axis=1)
+                / st.session_state["scenario"].channels[col].conversion_rate
+            )
+            roi = y / np.maximum(x, np.finfo(float).eps)
+            roi_current, marginal_roi_current = roi[-1], marginal_roi[-1]
+            x, y, roi, marginal_roi = (
+                x[:-1],
+                y[:-1],
+                roi[:-1],
+                marginal_roi[:-1],
+            )  # Drop data for current spends
+            start_value, end_value, left_value, right_value = find_segment_value(
+                x,
+                roi,
+                marginal_roi,
+            )
+            rgba = calculate_rgba(
+                start_value,
+                end_value,
+                left_value,
+                right_value,
+                current_channel_spends,
+            )
+            with bin_placeholder:
+                st.markdown(
+                    f"""
+                    <div style="
+                        border-radius: 12px;
+                        background-color: {rgba};
+                        padding: 10px;
+                        text-align: center;
+                        color: #006EC0;
+                        ">
+                        <p style="margin: 0; font-size: 20px;">ROI: {round(roi_current,1)}</p>
+                        <p style="margin: 0; font-size: 20px;">Marginal ROI: {round(marginal_roi_current,1)}</p>
+                    </div>
+                    """,
+                    unsafe_allow_html=True,
+                )
+    with st.expander("See Response Curves", expanded=True):
+        fig = plot_response_curves()
+        st.plotly_chart(fig, use_container_width=True)
+    _columns = st.columns(2)
+    with _columns[0]:
+        st.subheader("Save Scenario")
+        scenario_name = st.text_input(
+            "Scenario name",
+            key="scenario_input",
+            placeholder="Scenario name",
+            label_visibility="collapsed",
+        )
+        st.button(
+            "Save",
+            on_click=lambda: save_scenario(scenario_name),
+            disabled=len(st.session_state["scenario_input"]) == 0,
+        )
+    summary_df = pd.DataFrame(st.session_state["acutual_predicted"])
+    summary_df.drop_duplicates(subset="Channel_name", keep="last", inplace=True)
+    summary_df_sorted = summary_df.sort_values(by="Delta", ascending=False)
+    summary_df_sorted["Delta_percent"] = np.round(
+        ((summary_df_sorted["Optimized_spend"] / summary_df_sorted["Actual_spend"]) - 1)
+        * 100,
+        2,
+    )
+    with open("summary_df.pkl", "wb") as f:
+        pickle.dump(summary_df_sorted, f)
+        # st.dataframe(summary_df_sorted)
+        # ___columns=st.columns(3)
+        # with ___columns[2]:
+        #     fig=summary_plot(summary_df_sorted, x='Delta_percent', y='Channel_name', title='Delta', text_column='Delta_percent')
+        #     st.plotly_chart(fig,use_container_width=True)
+        # with ___columns[0]:
+        #     fig=summary_plot(summary_df_sorted, x='Actual_spend', y='Channel_name', title='Actual Spend', text_column='Actual_spend')
+        #     st.plotly_chart(fig,use_container_width=True)
+        # with ___columns[1]:
+        #     fig=summary_plot(summary_df_sorted, x='Optimized_spend', y='Channel_name', title='Planned Spend', text_column='Optimized_spend')
+        #     st.plotly_chart(fig,use_container_width=True)
+elif auth_status == False:
+    st.error("Username/Password is incorrect")
+if auth_status != True:
+    try:
+        username_forgot_pw, email_forgot_password, random_password = (
+            authenticator.forgot_password("Forgot password")
+        )
+        if username_forgot_pw:
+            st.session_state["config"]["credentials"]["usernames"][username_forgot_pw][
+                "password"
+            ] = stauth.Hasher([random_password]).generate()[0]
+            send_email(email_forgot_password, random_password)
+            st.success("New password sent securely")
+            # Random password to be transferred to user securely
+        elif username_forgot_pw == False:
+            st.error("Username not found")
+    except Exception as e:
+        st.error(e)

pages/9_Saved_Scenarios.py ADDED Viewed

	@@ -0,0 +1,276 @@

+import streamlit as st
+from numerize.numerize import numerize
+import io
+import pandas as pd
+from utilities import (format_numbers,decimal_formater,
+                       channel_name_formating,
+                       load_local_css,set_header,
+                       initialize_data,
+                       load_authenticator)
+from openpyxl import Workbook
+from openpyxl.styles import Alignment,Font,PatternFill
+import pickle
+import streamlit_authenticator as stauth
+import yaml
+from yaml import SafeLoader
+from classes import class_from_dict
+st.set_page_config(layout='wide')
+load_local_css('styles.css')
+set_header()
+# for k, v in st.session_state.items():
+#     if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
+#         st.session_state[k] = v
+def create_scenario_summary(scenario_dict):
+    summary_rows = []
+    for channel_dict in scenario_dict['channels']:
+        name_mod = channel_name_formating(channel_dict['name'])
+        summary_rows.append([name_mod,
+                             channel_dict.get('actual_total_spends') * channel_dict.get('conversion_rate'),
+                             channel_dict.get('modified_total_spends') * channel_dict.get('conversion_rate'),
+                             channel_dict.get('actual_total_sales') ,
+                             channel_dict.get('modified_total_sales'),
+                             channel_dict.get('actual_total_sales') / (channel_dict.get('actual_total_spends') * channel_dict.get('conversion_rate')),
+                             channel_dict.get('modified_total_sales') / (channel_dict.get('modified_total_spends') * channel_dict.get('conversion_rate')),
+                             channel_dict.get('actual_mroi'),
+                             channel_dict.get('modified_mroi'),
+                             channel_dict.get('actual_total_spends') * channel_dict.get('conversion_rate') / channel_dict.get('actual_total_sales'),
+                             channel_dict.get('modified_total_spends') * channel_dict.get('conversion_rate') / channel_dict.get('modified_total_sales')])
+    summary_rows.append(['Total',
+                         scenario_dict.get('actual_total_spends'),
+                         scenario_dict.get('modified_total_spends'),
+                         scenario_dict.get('actual_total_sales'),
+                         scenario_dict.get('modified_total_sales'),
+                         scenario_dict.get('actual_total_sales') / scenario_dict.get('actual_total_spends'),
+                         scenario_dict.get('modified_total_sales') / scenario_dict.get('modified_total_spends'),
+                         '-',
+                         '-',
+                         scenario_dict.get('actual_total_spends') / scenario_dict.get('actual_total_sales'),
+                         scenario_dict.get('modified_total_spends') / scenario_dict.get('modified_total_sales')])
+    columns_index = pd.MultiIndex.from_product([[''],['Channel']], names=["first", "second"])
+    columns_index = columns_index.append(pd.MultiIndex.from_product([['Spends','NRPU','ROI','MROI','Spend per NRPU'],['Actual','Simulated']], names=["first", "second"]))
+    return  pd.DataFrame(summary_rows, columns=columns_index)
+def summary_df_to_worksheet(df, ws):
+    heading_fill = PatternFill(fill_type='solid',start_color='FF11B6BD',end_color='FF11B6BD')
+    for j,header in enumerate(df.columns.values):
+        col = j + 1
+        for i in range(1,3):
+            ws.cell(row=i, column=j + 1, value=header[i - 1]).font = Font(bold=True, color='FF11B6BD')
+            ws.cell(row=i,column=j+1).fill = heading_fill
+        if  col > 1 and (col - 6)%5==0:
+            ws.merge_cells(start_row=1, end_row=1, start_column = col-3, end_column=col)
+            ws.cell(row=1,column=col).alignment = Alignment(horizontal='center')
+    for i,row in enumerate(df.itertuples()):
+        for j,value in enumerate(row):
+            if j == 0:
+                continue
+            elif (j-2)%4 == 0 or (j-3)%4 == 0:
+                ws.cell(row=i+3, column = j, value=value).number_format = '$#,##0.0'
+            else:
+                ws.cell(row=i+3, column = j, value=value)
+from openpyxl.utils import get_column_letter
+from openpyxl.styles import Font, PatternFill
+import logging
+def scenario_df_to_worksheet(df, ws):
+    heading_fill = PatternFill(start_color='FF11B6BD', end_color='FF11B6BD', fill_type='solid')
+    for j, header in enumerate(df.columns.values):
+        cell = ws.cell(row=1, column=j + 1, value=header)
+        cell.font = Font(bold=True, color='FF11B6BD')
+        cell.fill = heading_fill
+    for i, row in enumerate(df.itertuples()):
+        for j, value in enumerate(row[1:], start=1):  # Start from index 1 to skip the index column
+            try:
+                cell = ws.cell(row=i + 2, column=j, value=value)
+                if isinstance(value, (int, float)):
+                    cell.number_format = '$#,##0.0'
+                elif isinstance(value, str):
+                    cell.value = value[:32767]
+                else:
+                    cell.value = str(value)
+            except ValueError as e:
+                logging.error(f"Error assigning value '{value}' to cell {get_column_letter(j)}{i+2}: {e}")
+                cell.value = None  # Assign None to the cell where the error occurred
+    return ws
+def download_scenarios():
+    """
+    Makes a excel with all saved scenarios and saves it locally
+    """
+    ## create summary page
+    if len(scenarios_to_download) == 0:
+        return
+    wb = Workbook()
+    wb.iso_dates = True
+    wb.remove(wb.active)
+    st.session_state['xlsx_buffer'] = io.BytesIO()
+    summary_df = None
+    #print(scenarios_to_download)
+    for scenario_name in scenarios_to_download:
+        scenario_dict =  st.session_state['saved_scenarios'][scenario_name]
+        _spends = []
+        column_names = ['Date']
+        _sales = None
+        dates = None
+        summary_rows = []
+        for channel in scenario_dict['channels']:
+            if dates is None:
+                dates = channel.get('dates')
+                _spends.append(dates)
+            if _sales is None:
+                _sales = channel.get('modified_sales')
+            else:
+                _sales += channel.get('modified_sales')
+            _spends.append(channel.get('modified_spends') * channel.get('conversion_rate'))
+            column_names.append(channel.get('name'))
+            name_mod = channel_name_formating(channel['name'])
+            summary_rows.append([name_mod,
+                                channel.get('modified_total_spends') * channel.get('conversion_rate') ,
+                                channel.get('modified_total_sales'),
+                                channel.get('modified_total_sales') / channel.get('modified_total_spends') * channel.get('conversion_rate'),
+                                channel.get('modified_mroi'),
+                                channel.get('modified_total_sales') / channel.get('modified_total_spends') * channel.get('conversion_rate')])
+        _spends.append(_sales)
+        column_names.append('NRPU')
+        scenario_df = pd.DataFrame(_spends).T
+        scenario_df.columns = column_names
+        ## write to sheet
+        ws = wb.create_sheet(scenario_name)
+        scenario_df_to_worksheet(scenario_df, ws)
+        summary_rows.append(['Total',
+                        scenario_dict.get('modified_total_spends') ,
+                        scenario_dict.get('modified_total_sales'),
+                        scenario_dict.get('modified_total_sales') / scenario_dict.get('modified_total_spends'),
+                        '-',
+                        scenario_dict.get('modified_total_spends') / scenario_dict.get('modified_total_sales')])
+        columns_index = pd.MultiIndex.from_product([[''],['Channel']], names=["first", "second"])
+        columns_index = columns_index.append(pd.MultiIndex.from_product([[scenario_name],['Spends','NRPU','ROI','MROI','Spends per NRPU']], names=["first", "second"]))
+        if summary_df is None:
+            summary_df = pd.DataFrame(summary_rows, columns = columns_index)
+            summary_df = summary_df.set_index(('','Channel'))
+        else:
+            _df = pd.DataFrame(summary_rows, columns = columns_index)
+            _df = _df.set_index(('','Channel'))
+            summary_df = summary_df.merge(_df, left_index=True, right_index=True)
+    ws = wb.create_sheet('Summary',0)
+    summary_df_to_worksheet(summary_df.reset_index(), ws)
+    wb.save(st.session_state['xlsx_buffer'])
+    st.session_state['disable_download_button'] = False
+def disable_download_button():
+    st.session_state['disable_download_button'] =True
+def transform(x):
+    if x.name == ("",'Channel'):
+        return x
+    elif x.name[0] == 'ROI' or x.name[0] == 'MROI':
+        return x.apply(lambda y : y if isinstance(y,str) else decimal_formater(format_numbers(y,include_indicator=False,n_decimals=4),n_decimals=4))
+    else:
+        return x.apply(lambda y : y if isinstance(y,str) else format_numbers(y))
+def delete_scenario():
+    if selected_scenario in st.session_state['saved_scenarios']:
+        del st.session_state['saved_scenarios'][selected_scenario]
+        with open('../saved_scenarios.pkl', 'wb') as f:
+            pickle.dump(st.session_state['saved_scenarios'],f)
+def load_scenario():
+    if selected_scenario in st.session_state['saved_scenarios']:
+        st.session_state['scenario'] = class_from_dict(selected_scenario_details)
+authenticator = st.session_state.get('authenticator')
+if authenticator is None:
+    authenticator = load_authenticator()
+name, authentication_status, username = authenticator.login('Login', 'main')
+auth_status = st.session_state.get('authentication_status')
+if auth_status == True:
+    is_state_initiaized = st.session_state.get('initialized',False)
+    if not is_state_initiaized:
+        #print("Scenario page state reloaded")
+        initialize_data()
+    saved_scenarios = st.session_state['saved_scenarios']
+    if len(saved_scenarios) ==0:
+        st.header('No saved scenarios')
+    else:
+        with st.sidebar:
+            selected_scenario = st.radio(
+                'Pick a scenario to view details',
+                list(saved_scenarios.keys())
+            )
+            st.markdown("""<hr>""", unsafe_allow_html=True)
+            scenarios_to_download = st.multiselect('Select scenarios to download',
+                        list(saved_scenarios.keys()))
+            st.button('Prepare download',on_click=download_scenarios)
+            st.download_button(
+                    label="Download Scenarios",
+                    data=st.session_state['xlsx_buffer'].getvalue(),
+                    file_name="scenarios.xlsx",
+                    mime="application/vnd.ms-excel",
+                    disabled= st.session_state['disable_download_button'],
+                    on_click= disable_download_button
+                )
+        column_1, column_2,column_3 = st.columns((6,1,1))
+        with column_1:
+            st.header(selected_scenario)
+        with column_2:
+            st.button('Delete scenarios', on_click=delete_scenario)
+        with column_3:
+            st.button('Load Scenario', on_click=load_scenario)
+        selected_scenario_details = saved_scenarios[selected_scenario]
+        pd.set_option('display.max_colwidth', 100)
+        st.markdown(create_scenario_summary(selected_scenario_details).transform(transform).style.set_table_styles(
+    [{
+        'selector': 'th',
+        'props': [('background-color', '#11B6BD')]
+    },
+        {
+        'selector' : 'tr:nth-child(even)',
+        'props' : [('background-color', '#11B6BD')]
+        }
+        ]).to_html(),unsafe_allow_html=True)
+elif auth_status == False:
+    st.error('Username/Password is incorrect')
+if auth_status != True:
+    try:
+        username_forgot_pw, email_forgot_password, random_password = authenticator.forgot_password('Forgot password')
+        if username_forgot_pw:
+            st.success('New password sent securely')
+            # Random password to be transferred to user securely
+        elif username_forgot_pw == False:
+            st.error('Username not found')
+    except Exception as e:
+        st.error(e)