BlendMMM commited on
Commit
bd80083
·
verified ·
1 Parent(s): 317f3b2

Upload 10 files

Browse files
pages/10_Optimized_Result_Analysis.py ADDED
@@ -0,0 +1,399 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from numerize.numerize import numerize
3
+ import pandas as pd
4
+ from utilities import (format_numbers,decimal_formater,
5
+ load_local_css,set_header,
6
+ initialize_data,
7
+ load_authenticator)
8
+ import pickle
9
+ import streamlit_authenticator as stauth
10
+ import yaml
11
+ from yaml import SafeLoader
12
+ from classes import class_from_dict
13
+ import plotly.express as px
14
+ import numpy as np
15
+ import plotly.graph_objects as go
16
+ import pandas as pd
17
+
18
+
19
+ def summary_plot(data, x, y, title, text_column, color, format_as_percent=False, format_as_decimal=False):
20
+ fig = px.bar(data, x=x, y=y, orientation='h',
21
+ title=title, text=text_column, color=color)
22
+ fig.update_layout(showlegend=False)
23
+ data[text_column] = pd.to_numeric(data[text_column], errors='coerce')
24
+
25
+ # Update the format of the displayed text based on the chosen format
26
+ if format_as_percent:
27
+ fig.update_traces(texttemplate='%{text:.0%}', textposition='outside', hovertemplate='%{x:.0%}')
28
+ elif format_as_decimal:
29
+ fig.update_traces(texttemplate='%{text:.2f}', textposition='outside', hovertemplate='%{x:.2f}')
30
+ else:
31
+ fig.update_traces(texttemplate='%{text:.2s}', textposition='outside', hovertemplate='%{x:.2s}')
32
+
33
+ fig.update_layout(xaxis_title=x, yaxis_title='Channel Name', showlegend=False)
34
+ return fig
35
+
36
+
37
+ def stacked_summary_plot(data, x, y, title, text_column, color_column, stack_column=None, format_as_percent=False, format_as_decimal=False):
38
+ fig = px.bar(data, x=x, y=y, orientation='h',
39
+ title=title, text=text_column, color=color_column, facet_col=stack_column)
40
+ fig.update_layout(showlegend=False)
41
+ data[text_column] = pd.to_numeric(data[text_column], errors='coerce')
42
+
43
+ # Update the format of the displayed text based on the chosen format
44
+ if format_as_percent:
45
+ fig.update_traces(texttemplate='%{text:.0%}', textposition='outside', hovertemplate='%{x:.0%}')
46
+ elif format_as_decimal:
47
+ fig.update_traces(texttemplate='%{text:.2f}', textposition='outside', hovertemplate='%{x:.2f}')
48
+ else:
49
+ fig.update_traces(texttemplate='%{text:.2s}', textposition='outside', hovertemplate='%{x:.2s}')
50
+
51
+ fig.update_layout(xaxis_title=x, yaxis_title='', showlegend=False)
52
+ return fig
53
+
54
+
55
+
56
+ def funnel_plot(data, x, y, title, text_column, color_column, format_as_percent=False, format_as_decimal=False):
57
+ data[text_column] = pd.to_numeric(data[text_column], errors='coerce')
58
+
59
+ # Round the numeric values in the text column to two decimal points
60
+ data[text_column] = data[text_column].round(2)
61
+
62
+ # Create a color map for categorical data
63
+ color_map = {category: f'rgb({i * 30 % 255},{i * 50 % 255},{i * 70 % 255})' for i, category in enumerate(data[color_column].unique())}
64
+
65
+ fig = go.Figure(go.Funnel(
66
+ y=data[y],
67
+ x=data[x],
68
+ text=data[text_column],
69
+ marker=dict(color=data[color_column].map(color_map)),
70
+ textinfo="value",
71
+ hoverinfo='y+x+text'
72
+ ))
73
+
74
+ # Update the format of the displayed text based on the chosen format
75
+ if format_as_percent:
76
+ fig.update_layout(title=title, funnelmode="percent")
77
+ elif format_as_decimal:
78
+ fig.update_layout(title=title, funnelmode="overlay")
79
+ else:
80
+ fig.update_layout(title=title, funnelmode="group")
81
+
82
+ return fig
83
+
84
+
85
+ st.set_page_config(layout='wide')
86
+ load_local_css('styles.css')
87
+ set_header()
88
+
89
+ # for k, v in st.session_state.items():
90
+ # if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
91
+ # st.session_state[k] = v
92
+
93
+ st.empty()
94
+ st.header('Model Result Analysis')
95
+ spends_data=pd.read_excel('Overview_data_test.xlsx')
96
+
97
+ with open('summary_df.pkl', 'rb') as file:
98
+ summary_df_sorted = pickle.load(file)
99
+
100
+ selected_scenario= st.selectbox('Select Saved Scenarios',['S1','S2'])
101
+
102
+ st.header('Optimized Spends Overview')
103
+ ___columns=st.columns(3)
104
+ with ___columns[2]:
105
+ fig=summary_plot(summary_df_sorted, x='Delta_percent', y='Channel_name', title='Delta', text_column='Delta_percent',color='Channel_name')
106
+ st.plotly_chart(fig,use_container_width=True)
107
+ with ___columns[0]:
108
+ fig=summary_plot(summary_df_sorted, x='Actual_spend', y='Channel_name', title='Actual Spend', text_column='Actual_spend',color='Channel_name')
109
+ st.plotly_chart(fig,use_container_width=True)
110
+ with ___columns[1]:
111
+ fig=summary_plot(summary_df_sorted, x='Optimized_spend', y='Channel_name', title='Planned Spend', text_column='Optimized_spend',color='Channel_name')
112
+ st.plotly_chart(fig,use_container_width=False)
113
+
114
+ st.header(' Budget Allocation')
115
+ summary_df_sorted['Perc_alloted']=np.round(summary_df_sorted['Optimized_spend']/summary_df_sorted['Optimized_spend'].sum(),2)
116
+ columns2=st.columns(2)
117
+ with columns2[0]:
118
+ fig=summary_plot(summary_df_sorted, x='Optimized_spend', y='Channel_name', title='Planned Spend', text_column='Optimized_spend',color='Channel_name')
119
+ st.plotly_chart(fig,use_container_width=True)
120
+ with columns2[1]:
121
+ fig=summary_plot(summary_df_sorted, x='Perc_alloted', y='Channel_name', title='% Split', text_column='Perc_alloted',color='Channel_name',format_as_percent=True)
122
+ st.plotly_chart(fig,use_container_width=True)
123
+
124
+
125
+ if 'raw_data' not in st.session_state:
126
+ st.session_state['raw_data']=pd.read_excel('raw_data_nov7_combined1.xlsx')
127
+ st.session_state['raw_data']=st.session_state['raw_data'][st.session_state['raw_data']['MediaChannelName'].isin(summary_df_sorted['Channel_name'].unique())]
128
+ st.session_state['raw_data']=st.session_state['raw_data'][st.session_state['raw_data']['Date'].isin(spends_data["Date"].unique())]
129
+
130
+
131
+
132
+ #st.write(st.session_state['raw_data']['ResponseMetricName'])
133
+ # st.write(st.session_state['raw_data'])
134
+
135
+
136
+ st.header('Response Forecast Overview')
137
+ raw_data=st.session_state['raw_data']
138
+ effectiveness_overall=raw_data.groupby('ResponseMetricName').agg({'ResponseMetricValue': 'sum'}).reset_index()
139
+ effectiveness_overall['Efficiency']=effectiveness_overall['ResponseMetricValue'].map(lambda x: x/raw_data['Media Spend'].sum() )
140
+ # st.write(effectiveness_overall)
141
+
142
+ columns6=st.columns(3)
143
+
144
+ effectiveness_overall.sort_values(by=['ResponseMetricValue'],ascending=False,inplace=True)
145
+ effectiveness_overall=np.round(effectiveness_overall,2)
146
+ effectiveness_overall['ResponseMetric'] = effectiveness_overall['ResponseMetricName'].apply(lambda x: 'BAU' if 'BAU' in x else ('Gamified' if 'Gamified' in x else x))
147
+ # effectiveness_overall=np.where(effectiveness_overall[effectiveness_overall['ResponseMetricName']=="Adjusted Account Approval BAU"],"Adjusted Account Approval BAU",effectiveness_overall['ResponseMetricName'])
148
+
149
+ effectiveness_overall.replace({'ResponseMetricName':{'BAU approved clients - Appsflyer':'Approved clients - Appsflyer',
150
+ 'Gamified approved clients - Appsflyer':'Approved clients - Appsflyer'}},inplace=True)
151
+
152
+ # st.write(effectiveness_overall.sort_values(by=['ResponseMetricValue'],ascending=False))
153
+
154
+
155
+ condition = effectiveness_overall['ResponseMetricName'] == "Adjusted Account Approval BAU"
156
+ condition1= effectiveness_overall['ResponseMetricName'] == "Approved clients - Appsflyer"
157
+ effectiveness_overall['ResponseMetric'] = np.where(condition, "Adjusted Account Approval BAU", effectiveness_overall['ResponseMetric'])
158
+
159
+ effectiveness_overall['ResponseMetricName'] = np.where(condition1, "Approved clients - Appsflyer (BAU, Gamified)", effectiveness_overall['ResponseMetricName'])
160
+ # effectiveness_overall=pd.DataFrame({'ResponseMetricName':["App Installs - Appsflyer",'Account Requests - Appsflyer',
161
+ # 'Total Adjusted Account Approval','Adjusted Account Approval BAU',
162
+ # 'Approved clients - Appsflyer','Approved clients - Appsflyer'],
163
+ # 'ResponseMetricValue':[683067,367020,112315,79768,36661,16834],
164
+ # 'Efficiency':[1.24,0.67,0.2,0.14,0.07,0.03],
165
+ custom_colors = {
166
+ 'App Installs - Appsflyer': 'rgb(255, 135, 0)', # Steel Blue (Blue)
167
+ 'Account Requests - Appsflyer': 'rgb(125, 239, 161)', # Cornflower Blue (Blue)
168
+ 'Adjusted Account Approval': 'rgb(129, 200, 255)', # Dodger Blue (Blue)
169
+ 'Adjusted Account Approval BAU': 'rgb(255, 207, 98)', # Light Sky Blue (Blue)
170
+ 'Approved clients - Appsflyer': 'rgb(0, 97, 198)', # Light Blue (Blue)
171
+ "BAU": 'rgb(41, 176, 157)', # Steel Blue (Blue)
172
+ "Gamified": 'rgb(213, 218, 229)' # Silver (Gray)
173
+ # Add more categories and their respective shades of blue as needed
174
+ }
175
+
176
+
177
+
178
+
179
+
180
+
181
+ with columns6[0]:
182
+ revenue=(effectiveness_overall[effectiveness_overall['ResponseMetricName']=='Total Approved Accounts - Revenue']['ResponseMetricValue']).iloc[0]
183
+ revenue=round(revenue / 1_000_000, 2)
184
+
185
+ # st.metric('Total Revenue', f"${revenue} M")
186
+ # with columns6[1]:
187
+ # BAU=(effectiveness_overall[effectiveness_overall['ResponseMetricName']=='BAU approved clients - Revenue']['ResponseMetricValue']).iloc[0]
188
+ # BAU=round(BAU / 1_000_000, 2)
189
+ # st.metric('BAU approved clients - Revenue', f"${BAU} M")
190
+ # with columns6[2]:
191
+ # Gam=(effectiveness_overall[effectiveness_overall['ResponseMetricName']=='Gamified approved clients - Revenue']['ResponseMetricValue']).iloc[0]
192
+ # Gam=round(Gam / 1_000_000, 2)
193
+ # st.metric('Gamified approved clients - Revenue', f"${Gam} M")
194
+
195
+ # st.write(effectiveness_overall)
196
+ data = {'Revenue': ['BAU approved clients - Revenue', 'Gamified approved clients- Revenue'],
197
+ 'ResponseMetricValue': [70200000, 1770000],
198
+ 'Efficiency':[127.54,3.21]}
199
+ df = pd.DataFrame(data)
200
+
201
+
202
+ columns9=st.columns([0.60,0.40])
203
+ with columns9[0]:
204
+ figd = px.pie(df,
205
+ names='Revenue',
206
+ values='ResponseMetricValue',
207
+ hole=0.3, # set the size of the hole in the donut
208
+ title='Effectiveness')
209
+ figd.update_layout(
210
+ margin=dict(l=0, r=0, b=0, t=0),width=100, height=180,legend=dict(
211
+ orientation='v', # set orientation to horizontal
212
+ x=0, # set x to 0 to move to the left
213
+ y=0.8 # adjust y as needed
214
+ )
215
+ )
216
+
217
+ st.plotly_chart(figd, use_container_width=True)
218
+
219
+ with columns9[1]:
220
+ figd1 = px.pie(df,
221
+ names='Revenue',
222
+ values='Efficiency',
223
+ hole=0.3, # set the size of the hole in the donut
224
+ title='Efficiency')
225
+ figd1.update_layout(
226
+ margin=dict(l=0, r=0, b=0, t=0),width=100,height=180,showlegend=False
227
+ )
228
+ st.plotly_chart(figd1, use_container_width=True)
229
+
230
+ effectiveness_overall['Response Metric Name']=effectiveness_overall['ResponseMetricName']
231
+
232
+
233
+
234
+ columns4= st.columns([0.55,0.45])
235
+ with columns4[0]:
236
+ fig=px.funnel(effectiveness_overall[~(effectiveness_overall['ResponseMetricName'].isin(['Total Approved Accounts - Revenue',
237
+ 'BAU approved clients - Revenue',
238
+ 'Gamified approved clients - Revenue',
239
+ "Total Approved Accounts - Appsflyer"]))],
240
+ x='ResponseMetricValue', y='Response Metric Name',color='ResponseMetric',
241
+ color_discrete_map=custom_colors,title='Effectiveness',
242
+ labels=None)
243
+ custom_y_labels=['App Installs - Appsflyer','Account Requests - Appsflyer','Adjusted Account Approval','Adjusted Account Approval BAU',
244
+ "Approved clients - Appsflyer (BAU, Gamified)"
245
+ ]
246
+ fig.update_layout(showlegend=False,
247
+ yaxis=dict(
248
+ tickmode='array',
249
+ ticktext=custom_y_labels,
250
+ )
251
+ )
252
+ fig.update_traces(textinfo='value', textposition='inside', texttemplate='%{x:.2s} ', hoverinfo='y+x+percent initial')
253
+
254
+ last_trace_index = len(fig.data) - 1
255
+ fig.update_traces(marker=dict(line=dict(color='black', width=2)), selector=dict(marker=dict(color='blue')))
256
+
257
+ st.plotly_chart(fig,use_container_width=True)
258
+
259
+
260
+
261
+
262
+
263
+ with columns4[1]:
264
+
265
+ # Your existing code for creating the bar chart
266
+ fig1 = px.bar((effectiveness_overall[~(effectiveness_overall['ResponseMetricName'].isin(['Total Approved Accounts - Revenue',
267
+ 'BAU approved clients - Revenue',
268
+ 'Gamified approved clients - Revenue',
269
+ "Total Approved Accounts - Appsflyer"]))]).sort_values(by='ResponseMetricValue'),
270
+ x='Efficiency', y='Response Metric Name',
271
+ color_discrete_map=custom_colors, color='ResponseMetric',
272
+ labels=None,text_auto=True,title='Efficiency'
273
+ )
274
+
275
+ # Update layout and traces
276
+ fig1.update_traces(customdata=effectiveness_overall['Efficiency'],
277
+ textposition='auto')
278
+ fig1.update_layout(showlegend=False)
279
+ fig1.update_yaxes(title='',showticklabels=False)
280
+ fig1.update_xaxes(title='',showticklabels=False)
281
+ fig1.update_xaxes(tickfont=dict(size=20))
282
+ fig1.update_yaxes(tickfont=dict(size=20))
283
+ st.plotly_chart(fig1, use_container_width=True)
284
+
285
+
286
+ effectiveness_overall_revenue=pd.DataFrame({'ResponseMetricName':['Approved Clients','Approved Clients'],
287
+ 'ResponseMetricValue':[70201070,1768900],
288
+ 'Efficiency':[127.54,3.21],
289
+ 'ResponseMetric':['BAU','Gamified']
290
+ })
291
+ # from plotly.subplots import make_subplots
292
+ # fig = make_subplots(rows=1, cols=2,
293
+ # subplot_titles=["Effectiveness", "Efficiency"])
294
+
295
+ # # Add first plot as subplot
296
+ # fig.add_trace(go.Funnel(
297
+ # x = fig.data[0].x,
298
+ # y = fig.data[0].y,
299
+ # textinfo = 'value+percent initial',
300
+ # hoverinfo = 'x+y+percent initial'
301
+ # ), row=1, col=1)
302
+
303
+ # # Update layout for first subplot
304
+ # fig.update_xaxes(title_text="Response Metric Value", row=1, col=1)
305
+ # fig.update_yaxes(ticktext = custom_y_labels, row=1, col=1)
306
+
307
+ # # Add second plot as subplot
308
+ # fig.add_trace(go.Bar(
309
+ # x = fig1.data[0].x,
310
+ # y = fig1.data[0].y,
311
+ # customdata = fig1.data[0].customdata,
312
+ # textposition = 'auto'
313
+ # ), row=1, col=2)
314
+
315
+ # # Update layout for second subplot
316
+ # fig.update_xaxes(title_text="Efficiency", showticklabels=False, row=1, col=2)
317
+ # fig.update_yaxes(title='', showticklabels=False, row=1, col=2)
318
+
319
+ # fig.update_layout(height=600, width=800, title_text="Key Metrics")
320
+ # st.plotly_chart(fig)
321
+
322
+
323
+ st.header('Return Forecast by Media Channel')
324
+ with st.expander("Return Forecast by Media Channel"):
325
+ metric_data=[val for val in list(st.session_state['raw_data']['ResponseMetricName'].unique()) if val!=np.NaN]
326
+ # st.write(metric_data)
327
+ metric=st.selectbox('Select Metric',metric_data,index=1)
328
+
329
+ selected_metric=st.session_state['raw_data'][st.session_state['raw_data']['ResponseMetricName']==metric]
330
+ # st.dataframe(selected_metric.head(2))
331
+ selected_metric=st.session_state['raw_data'][st.session_state['raw_data']['ResponseMetricName']==metric]
332
+ effectiveness=selected_metric.groupby(by=['MediaChannelName'])['ResponseMetricValue'].sum()
333
+ effectiveness_df=pd.DataFrame({'Channel':effectiveness.index,"ResponseMetricValue":effectiveness.values})
334
+
335
+ summary_df_sorted=summary_df_sorted.merge(effectiveness_df,left_on="Channel_name",right_on='Channel')
336
+
337
+ # st.dataframe(summary_df_sorted.head(2))
338
+ summary_df_sorted['Efficiency']=summary_df_sorted['ResponseMetricValue']/summary_df_sorted['Optimized_spend']
339
+ # # # st.dataframe(summary_df_sorted.head(2))
340
+ # st.dataframe(summary_df_sorted.head(2))
341
+
342
+ columns= st.columns(3)
343
+ with columns[0]:
344
+ fig=summary_plot(summary_df_sorted, x='Optimized_spend', y='Channel_name', title='', text_column='Optimized_spend',color='Channel_name')
345
+ st.plotly_chart(fig,use_container_width=True)
346
+ with columns[1]:
347
+
348
+ # effectiveness=(selected_metric.groupby(by=['MediaChannelName'])['ResponseMetricValue'].sum()).values
349
+ # effectiveness_df=pd.DataFrame({'Channel':st.session_state['raw_data']['MediaChannelName'].unique(),"ResponseMetricValue":effectiveness})
350
+ # # effectiveness.reset_index(inplace=True)
351
+ # # st.dataframe(effectiveness.head())
352
+ fig=summary_plot(summary_df_sorted, x='ResponseMetricValue', y='Channel_name', title='Effectiveness', text_column='ResponseMetricValue',color='Channel_name')
353
+ st.plotly_chart(fig,use_container_width=True)
354
+
355
+ with columns[2]:
356
+ fig=summary_plot(summary_df_sorted, x='Efficiency', y='Channel_name', title='Efficiency', text_column='Efficiency',color='Channel_name',format_as_decimal=True)
357
+ st.plotly_chart(fig,use_container_width=True)
358
+
359
+ import plotly.express as px
360
+ import plotly.graph_objects as go
361
+ from plotly.subplots import make_subplots
362
+
363
+ # Create figure with subplots
364
+ # fig = make_subplots(rows=1, cols=2)
365
+
366
+ # # Add funnel plot to subplot 1
367
+ # fig.add_trace(
368
+ # go.Funnel(
369
+ # x=effectiveness_overall[~(effectiveness_overall['ResponseMetricName'].isin(['Total Approved Accounts - Revenue', 'BAU approved clients - Revenue', 'Gamified approved clients - Revenue', "Total Approved Accounts - Appsflyer"]))]['ResponseMetricValue'],
370
+ # y=effectiveness_overall[~(effectiveness_overall['ResponseMetricName'].isin(['Total Approved Accounts - Revenue', 'BAU approved clients - Revenue', 'Gamified approved clients - Revenue', "Total Approved Accounts - Appsflyer"]))]['ResponseMetricName'],
371
+ # textposition="inside",
372
+ # texttemplate="%{x:.2s}",
373
+ # customdata=effectiveness_overall['Efficiency'],
374
+ # hovertemplate="%{customdata:.2f}<extra></extra>"
375
+ # ),
376
+ # row=1, col=1
377
+ # )
378
+
379
+ # # Add bar plot to subplot 2
380
+ # fig.add_trace(
381
+ # go.Bar(
382
+ # x=effectiveness_overall.sort_values(by='ResponseMetricValue')['Efficiency'],
383
+ # y=effectiveness_overall.sort_values(by='ResponseMetricValue')['ResponseMetricName'],
384
+ # marker_color=effectiveness_overall['ResponseMetric'],
385
+ # customdata=effectiveness_overall['Efficiency'],
386
+ # hovertemplate="%{customdata:.2f}<extra></extra>",
387
+ # textposition="outside"
388
+ # ),
389
+ # row=1, col=2
390
+ # )
391
+
392
+ # # Update layout
393
+ # fig.update_layout(title_text="Effectiveness")
394
+ # fig.update_yaxes(title_text="", row=1, col=1)
395
+ # fig.update_yaxes(title_text="", showticklabels=False, row=1, col=2)
396
+ # fig.update_xaxes(title_text="Efficiency", showticklabels=False, row=1, col=2)
397
+
398
+ # # Show figure
399
+ # st.plotly_chart(fig)
pages/1_Data_Validation.py ADDED
@@ -0,0 +1,251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import plotly.express as px
4
+ import plotly.graph_objects as go
5
+ from Eda_functions import *
6
+ import numpy as np
7
+ import pickle
8
+ from streamlit_pandas_profiling import st_profile_report
9
+ import streamlit as st
10
+ import streamlit.components.v1 as components
11
+ import sweetviz as sv
12
+ from utilities import set_header,load_local_css
13
+ from st_aggrid import GridOptionsBuilder,GridUpdateMode
14
+ from st_aggrid import GridOptionsBuilder
15
+ from st_aggrid import AgGrid
16
+ import base64
17
+ import os
18
+ import tempfile
19
+ from ydata_profiling import ProfileReport
20
+ import re
21
+
22
+ st.set_page_config(
23
+ page_title="Data Validation",
24
+ page_icon=":shark:",
25
+ layout="wide",
26
+ initial_sidebar_state='collapsed'
27
+ )
28
+ load_local_css('styles.css')
29
+ set_header()
30
+
31
+
32
+
33
+
34
+
35
+ with open('data_import.pkl', 'rb') as f:
36
+ data = pickle.load(f)
37
+
38
+ st.session_state['cleaned_data']= data['final_df']
39
+ st.session_state['category_dict'] = data['bin_dict']
40
+
41
+ st.title('Data Validation and Insights')
42
+
43
+
44
+ target_variables=[st.session_state['category_dict'][key] for key in st.session_state['category_dict'].keys() if key =='Response Metrics']
45
+
46
+ target_column = st.selectbox('Select the Target Feature/Dependent Variable (will be used in all charts as reference)',list(*target_variables))
47
+ st.session_state['target_column']=target_column
48
+ panels=st.session_state['category_dict']['Panel Level 1'][0]
49
+ selected_panels=st.multiselect('Please choose the panels you wish to analyze.If no panels are selected, insights will be derived from the overall data.',st.session_state['cleaned_data'][panels].unique())
50
+ aggregation_dict = {item: 'sum' if key == 'Media' else 'mean' for key, value in st.session_state['category_dict'].items() for item in value if item not in ['date','Panel_1']}
51
+
52
+ with st.expander('**Reponse Metric Analysis**'):
53
+
54
+ if len(selected_panels)>0:
55
+ st.session_state['Cleaned_data_panel']=st.session_state['cleaned_data'][st.session_state['cleaned_data']['Panel_1'].isin(selected_panels)]
56
+
57
+ st.session_state['Cleaned_data_panel']=st.session_state['Cleaned_data_panel'].groupby(by='date').agg(aggregation_dict)
58
+ st.session_state['Cleaned_data_panel']=st.session_state['Cleaned_data_panel'].reset_index()
59
+ else:
60
+ st.session_state['Cleaned_data_panel']=st.session_state['cleaned_data'].groupby(by='date').agg(aggregation_dict)
61
+ st.session_state['Cleaned_data_panel']=st.session_state['Cleaned_data_panel'].reset_index()
62
+
63
+
64
+ fig=line_plot_target(st.session_state['Cleaned_data_panel'], target=target_column, title=f'{target_column} Over Time')
65
+ st.plotly_chart(fig, use_container_width=True)
66
+
67
+
68
+ media_channel=list(*[st.session_state['category_dict'][key] for key in st.session_state['category_dict'].keys() if key =='Media'])
69
+ # st.write(media_channel)
70
+
71
+ Non_media_variables=list(*[st.session_state['category_dict'][key] for key in st.session_state['category_dict'].keys() if key =='Exogenous' or key=='Internal'])
72
+
73
+
74
+ st.markdown('### Annual Data Summary')
75
+ st.dataframe(summary(st.session_state['Cleaned_data_panel'], media_channel+[target_column], spends=None,Target=True), use_container_width=True)
76
+
77
+ if st.checkbox('Show raw data'):
78
+ st.write(pd.concat([pd.to_datetime(st.session_state['Cleaned_data_panel']['date']).dt.strftime('%m/%d/%Y'),st.session_state['Cleaned_data_panel'].select_dtypes(np.number).applymap(format_numbers)],axis=1))
79
+ col1 = st.columns(1)
80
+
81
+ if "selected_feature" not in st.session_state:
82
+ st.session_state['selected_feature']=None
83
+
84
+
85
+ def generate_report_with_target(channel_data, target_feature):
86
+ report = sv.analyze([channel_data, "Dataset"], target_feat=target_feature)
87
+ temp_dir = tempfile.mkdtemp()
88
+ report_path = os.path.join(temp_dir, "report.html")
89
+ report.show_html(filepath=report_path, open_browser=False) # Generate the report as an HTML file
90
+ return report_path
91
+
92
+ def generate_profile_report(df):
93
+ pr = df.profile_report()
94
+ temp_dir = tempfile.mkdtemp()
95
+ report_path = os.path.join(temp_dir, "report.html")
96
+ pr.to_file(report_path)
97
+ return report_path
98
+
99
+
100
+ #st.header()
101
+ with st.expander('Univariate and Bivariate Report'):
102
+ eda_columns=st.columns(2)
103
+ with eda_columns[0]:
104
+ if st.button('Generate Profile Report',help='Univariate report which inlcudes all statistical analysis'):
105
+ with st.spinner('Generating Report'):
106
+ report_file = generate_profile_report(st.session_state['Cleaned_data_panel'])
107
+
108
+ if os.path.exists(report_file):
109
+ with open(report_file, 'rb') as f:
110
+ st.success('Report Generated')
111
+ st.download_button(
112
+ label="Download EDA Report",
113
+ data=f.read(),
114
+ file_name="pandas_profiling_report.html",
115
+ mime="text/html"
116
+ )
117
+ else:
118
+ st.warning("Report generation failed. Unable to find the report file.")
119
+
120
+ with eda_columns[1]:
121
+ if st.button('Generate Sweetviz Report',help='Bivariate report for selected response metric'):
122
+ with st.spinner('Generating Report'):
123
+ report_file = generate_report_with_target(st.session_state['Cleaned_data_panel'], target_column)
124
+
125
+ if os.path.exists(report_file):
126
+ with open(report_file, 'rb') as f:
127
+ st.success('Report Generated')
128
+ st.download_button(
129
+ label="Download EDA Report",
130
+ data=f.read(),
131
+ file_name="report.html",
132
+ mime="text/html"
133
+ )
134
+ else:
135
+ st.warning("Report generation failed. Unable to find the report file.")
136
+
137
+
138
+
139
+ #st.warning('Work in Progress')
140
+ with st.expander('Media Variables Analysis'):
141
+ # Get the selected feature
142
+ st.session_state["selected_feature"]= st.selectbox('Select media', [col for col in media_channel if 'cost' not in col.lower() and 'spend' not in col.lower()])
143
+
144
+ # Filter spends features based on the selected feature
145
+ spends_features = [col for col in st.session_state['Cleaned_data_panel'].columns if any(keyword in col.lower() for keyword in ['cost', 'spend'])]
146
+ spends_feature = [col for col in spends_features if re.split(r'_cost|_spend', col.lower())[0] in st.session_state["selected_feature"]]
147
+
148
+ if 'validation' not in st.session_state:
149
+ st.session_state['validation']=[]
150
+
151
+
152
+ val_variables=[col for col in media_channel if col!='date']
153
+ if len(spends_feature)==0:
154
+ st.warning('No spends varaible available for the selected metric in data')
155
+
156
+ else:
157
+ fig_row1 = line_plot(st.session_state['Cleaned_data_panel'], x_col='date', y1_cols=[st.session_state["selected_feature"]], y2_cols=[target_column], title=f'Analysis of {st.session_state["selected_feature"]} and {[target_column][0]} Over Time')
158
+ st.plotly_chart(fig_row1, use_container_width=True)
159
+ st.markdown('### Summary')
160
+ st.dataframe(summary(st.session_state['cleaned_data'],[st.session_state["selected_feature"]],spends=spends_feature[0]),use_container_width=True)
161
+
162
+ cols2=st.columns(2)
163
+ with cols2[0]:
164
+ if st.button('Validate'):
165
+ st.session_state['validation'].append(st.session_state["selected_feature"])
166
+ with cols2[1]:
167
+ if st.checkbox('Validate all'):
168
+ st.session_state['validation'].extend(val_variables)
169
+ st.success('All media variables are validated ✅')
170
+
171
+ if len(set(st.session_state['validation']).intersection(val_variables))!=len(val_variables):
172
+ validation_data=pd.DataFrame({'Validate':[True if col in st.session_state['validation'] else False for col in val_variables],
173
+ 'Variables':val_variables
174
+ })
175
+ cols3=st.columns([1,30])
176
+ with cols3[1]:
177
+ validation_df=st.data_editor(validation_data,
178
+ # column_config={
179
+ # 'Validate':st.column_config.CheckboxColumn(wi)
180
+
181
+ # },
182
+ column_config={
183
+ "Validate": st.column_config.CheckboxColumn(
184
+ default=False,
185
+ width=100,
186
+ ),
187
+ 'Variables':st.column_config.TextColumn(
188
+ width=1000
189
+
190
+ )
191
+ },hide_index=True)
192
+
193
+ selected_rows = validation_df[validation_df['Validate']==True]['Variables']
194
+
195
+ #st.write(selected_rows)
196
+
197
+ st.session_state['validation'].extend(selected_rows)
198
+
199
+ not_validated_variables = [col for col in val_variables if col not in st.session_state["validation"]]
200
+ if not_validated_variables:
201
+ not_validated_message = f'The following variables are not validated:\n{" , ".join(not_validated_variables)}'
202
+ st.warning(not_validated_message)
203
+
204
+
205
+
206
+ with st.expander('Non Media Variables Analysis'):
207
+ selected_columns_row4 = st.selectbox('Select Channel',Non_media_variables,index=1)
208
+ # # Create the dual-axis line plot
209
+ fig_row4 = line_plot(st.session_state['Cleaned_data_panel'], x_col='date', y1_cols=[selected_columns_row4], y2_cols=[target_column], title=f'Analysis of {selected_columns_row4} and {target_column} Over Time')
210
+ st.plotly_chart(fig_row4, use_container_width=True)
211
+ selected_non_media=selected_columns_row4
212
+ sum_df = st.session_state['Cleaned_data_panel'][['date', selected_non_media,target_column]]
213
+ sum_df['Year']=pd.to_datetime(st.session_state['Cleaned_data_panel']['date']).dt.year
214
+ #st.dataframe(df)
215
+ #st.dataframe(sum_df.head(2))
216
+ sum_df=sum_df.groupby('Year').agg('sum')
217
+ sum_df.loc['Grand Total']=sum_df.sum()
218
+ sum_df=sum_df.applymap(format_numbers)
219
+ sum_df.fillna('-',inplace=True)
220
+ sum_df=sum_df.replace({"0.0":'-','nan':'-'})
221
+ st.markdown('### Summary')
222
+ st.dataframe(sum_df,use_container_width=True)
223
+
224
+
225
+ with st.expander('Correlation Analysis'):
226
+ options = list(st.session_state['Cleaned_data_panel'].select_dtypes(np.number).columns)
227
+
228
+ # selected_options = []
229
+ # num_columns = 4
230
+ # num_rows = -(-len(options) // num_columns) # Ceiling division to calculate rows
231
+
232
+ # # Create a grid of checkboxes
233
+ # st.header('Select Features for Correlation Plot')
234
+ # tick=False
235
+ # if st.checkbox('Select all'):
236
+ # tick=True
237
+ # selected_options = []
238
+ # for row in range(num_rows):
239
+ # cols = st.columns(num_columns)
240
+ # for col in cols:
241
+ # if options:
242
+ # option = options.pop(0)
243
+ # selected = col.checkbox(option,value=tick)
244
+ # if selected:
245
+ # selected_options.append(option)
246
+ # # Display selected options
247
+
248
+ selected_options=st.multiselect('Select Variables For correlation plot',[var for var in options if var!= target_column],default=options[3])
249
+
250
+ st.pyplot(correlation_plot(st.session_state['Cleaned_data_panel'],selected_options,target_column))
251
+
pages/2_Transformations.py ADDED
@@ -0,0 +1,522 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Importing necessary libraries
2
+ import streamlit as st
3
+
4
+ st.set_page_config(
5
+ page_title="Transformations",
6
+ page_icon=":shark:",
7
+ layout="wide",
8
+ initial_sidebar_state="collapsed",
9
+ )
10
+
11
+ import pickle
12
+ import numpy as np
13
+ import pandas as pd
14
+ from utilities import set_header, load_local_css
15
+ import streamlit_authenticator as stauth
16
+ import yaml
17
+ from yaml import SafeLoader
18
+
19
+ load_local_css("styles.css")
20
+ set_header()
21
+
22
+ # Check for authentication status
23
+ for k, v in st.session_state.items():
24
+ if k not in ["logout", "login", "config"] and not k.startswith(
25
+ "FormSubmitter"
26
+ ):
27
+ st.session_state[k] = v
28
+ with open("config.yaml") as file:
29
+ config = yaml.load(file, Loader=SafeLoader)
30
+ st.session_state["config"] = config
31
+ authenticator = stauth.Authenticate(
32
+ config["credentials"],
33
+ config["cookie"]["name"],
34
+ config["cookie"]["key"],
35
+ config["cookie"]["expiry_days"],
36
+ config["preauthorized"],
37
+ )
38
+ st.session_state["authenticator"] = authenticator
39
+ name, authentication_status, username = authenticator.login("Login", "main")
40
+ auth_status = st.session_state.get("authentication_status")
41
+
42
+ if auth_status == True:
43
+ authenticator.logout("Logout", "main")
44
+ is_state_initiaized = st.session_state.get("initialized", False)
45
+
46
+ if not is_state_initiaized:
47
+
48
+ if 'session_name' not in st.session_state:
49
+ st.session_state['session_name']=None
50
+
51
+
52
+ # Deserialize and load the objects from the pickle file
53
+ with open("data_import.pkl", "rb") as f:
54
+ data = pickle.load(f)
55
+
56
+ # Accessing the loaded objects
57
+ final_df_loaded = data["final_df"]
58
+ bin_dict_loaded = data["bin_dict"]
59
+
60
+ # Initialize session state
61
+ if "transformed_columns_dict" not in st.session_state:
62
+ st.session_state["transformed_columns_dict"] = {} # Default empty dictionary
63
+
64
+ if "final_df" not in st.session_state:
65
+ st.session_state["final_df"] = final_df_loaded # Default as original dataframe
66
+
67
+ if "summary_string" not in st.session_state:
68
+ st.session_state["summary_string"] = None # Default as None
69
+
70
+ # Extract original columns for specified categories
71
+ original_columns = {
72
+ category: bin_dict_loaded[category]
73
+ for category in ["Media", "Internal", "Exogenous"]
74
+ if category in bin_dict_loaded
75
+ }
76
+
77
+ # Retrive Panel columns
78
+ panel_1 = bin_dict_loaded.get("Panel Level 1")
79
+ panel_2 = bin_dict_loaded.get("Panel Level 2")
80
+
81
+ # # For testing on non panel level
82
+ # final_df_loaded = final_df_loaded.drop("Panel_1", axis=1)
83
+ # final_df_loaded = final_df_loaded.groupby("date").mean().reset_index()
84
+ # panel_1 = None
85
+
86
+ # Apply transformations on panel level
87
+ st.write("")
88
+ if panel_1:
89
+ panel = panel_1 + panel_2 if panel_2 else panel_1
90
+ else:
91
+ panel = []
92
+
93
+
94
+ # Function to build transformation widgets
95
+ def transformation_widgets(category, transform_params, date_granularity):
96
+ # Transformation Options
97
+ transformation_options = {
98
+ "Media": ["Lag", "Moving Average", "Saturation", "Power", "Adstock"],
99
+ "Internal": ["Lead", "Lag", "Moving Average"],
100
+ "Exogenous": ["Lead", "Lag", "Moving Average"],
101
+ }
102
+
103
+ with st.expander(f"{category} Transformations"):
104
+
105
+ # Let users select which transformations to apply
106
+ transformations_to_apply = st.multiselect(
107
+ "Select transformations to apply",
108
+ options=transformation_options[category],
109
+ default=[],
110
+ key=f"transformation_{category}",
111
+ )
112
+
113
+ # Determine the number of transformations to put in each column
114
+ transformations_per_column = (
115
+ len(transformations_to_apply) // 2 + len(transformations_to_apply) % 2
116
+ )
117
+
118
+ # Create two columns
119
+ col1, col2 = st.columns(2)
120
+
121
+ # Assign transformations to each column
122
+ transformations_col1 = transformations_to_apply[:transformations_per_column]
123
+ transformations_col2 = transformations_to_apply[transformations_per_column:]
124
+
125
+ # Define a helper function to create widgets for each transformation
126
+ def create_transformation_widgets(column, transformations):
127
+ with column:
128
+ for transformation in transformations:
129
+ # Conditionally create widgets for selected transformations
130
+ if transformation == "Lead":
131
+ st.markdown(f"**Lead ({date_granularity})**")
132
+ lead = st.slider(
133
+ "Lead periods",
134
+ 1,
135
+ 10,
136
+ (1, 2),
137
+ 1,
138
+ key=f"lead_{category}",
139
+ label_visibility="collapsed",
140
+ )
141
+ start = lead[0]
142
+ end = lead[1]
143
+ step = 1
144
+ transform_params[category]["Lead"] = np.arange(
145
+ start, end + step, step
146
+ )
147
+
148
+ if transformation == "Lag":
149
+ st.markdown(f"**Lag ({date_granularity})**")
150
+ lag = st.slider(
151
+ "Lag periods",
152
+ 1,
153
+ 10,
154
+ (1, 2),
155
+ 1,
156
+ key=f"lag_{category}",
157
+ label_visibility="collapsed",
158
+ )
159
+ start = lag[0]
160
+ end = lag[1]
161
+ step = 1
162
+ transform_params[category]["Lag"] = np.arange(
163
+ start, end + step, step
164
+ )
165
+
166
+ if transformation == "Moving Average":
167
+ st.markdown(f"**Moving Average ({date_granularity})**")
168
+ window = st.slider(
169
+ "Window size for Moving Average",
170
+ 1,
171
+ 10,
172
+ (1, 2),
173
+ 1,
174
+ key=f"ma_{category}",
175
+ label_visibility="collapsed",
176
+ )
177
+ start = window[0]
178
+ end = window[1]
179
+ step = 1
180
+ transform_params[category]["Moving Average"] = np.arange(
181
+ start, end + step, step
182
+ )
183
+
184
+ if transformation == "Saturation":
185
+ st.markdown("**Saturation (%)**")
186
+ saturation_point = st.slider(
187
+ f"Saturation Percentage",
188
+ 0,
189
+ 100,
190
+ (10, 20),
191
+ 10,
192
+ key=f"sat_{category}",
193
+ label_visibility="collapsed",
194
+ )
195
+ start = saturation_point[0]
196
+ end = saturation_point[1]
197
+ step = 10
198
+ transform_params[category]["Saturation"] = np.arange(
199
+ start, end + step, step
200
+ )
201
+
202
+ if transformation == "Power":
203
+ st.markdown("**Power**")
204
+ power = st.slider(
205
+ f"Power",
206
+ 0,
207
+ 10,
208
+ (2, 4),
209
+ 1,
210
+ key=f"power_{category}",
211
+ label_visibility="collapsed",
212
+ )
213
+ start = power[0]
214
+ end = power[1]
215
+ step = 1
216
+ transform_params[category]["Power"] = np.arange(
217
+ start, end + step, step
218
+ )
219
+
220
+ if transformation == "Adstock":
221
+ st.markdown("**Adstock**")
222
+ rate = st.slider(
223
+ f"Factor ({category})",
224
+ 0.0,
225
+ 1.0,
226
+ (0.5, 0.7),
227
+ 0.05,
228
+ key=f"adstock_{category}",
229
+ label_visibility="collapsed",
230
+ )
231
+ start = rate[0]
232
+ end = rate[1]
233
+ step = 0.05
234
+ adstock_range = [
235
+ round(a, 3) for a in np.arange(start, end + step, step)
236
+ ]
237
+ transform_params[category]["Adstock"] = adstock_range
238
+
239
+ # Create widgets in each column
240
+ create_transformation_widgets(col1, transformations_col1)
241
+ create_transformation_widgets(col2, transformations_col2)
242
+
243
+
244
+ # Function to apply Lag transformation
245
+ def apply_lag(df, lag):
246
+ return df.shift(lag)
247
+
248
+
249
+ # Function to apply Lead transformation
250
+ def apply_lead(df, lead):
251
+ return df.shift(-lead)
252
+
253
+
254
+ # Function to apply Moving Average transformation
255
+ def apply_moving_average(df, window_size):
256
+ return df.rolling(window=window_size).mean()
257
+
258
+
259
+ # Function to apply Saturation transformation
260
+ def apply_saturation(df, saturation_percent_100):
261
+ # Convert saturation percentage from 100-based to fraction
262
+ saturation_percent = saturation_percent_100 / 100.0
263
+
264
+ # Calculate saturation point and steepness
265
+ column_max = df.max()
266
+ column_min = df.min()
267
+ saturation_point = (column_min + column_max) / 2
268
+
269
+ numerator = np.log(
270
+ (1 / (saturation_percent if saturation_percent != 1 else 1 - 1e-9)) - 1
271
+ )
272
+ denominator = np.log(saturation_point / max(column_max, 1e-9))
273
+
274
+ steepness = numerator / max(
275
+ denominator, 1e-9
276
+ ) # Avoid division by zero with a small constant
277
+
278
+ # Apply the saturation transformation
279
+ transformed_series = df.apply(
280
+ lambda x: (1 / (1 + (saturation_point / x) ** steepness)) * x
281
+ )
282
+
283
+ return transformed_series
284
+
285
+
286
+ # Function to apply Power transformation
287
+ def apply_power(df, power):
288
+ return df**power
289
+
290
+
291
+ # Function to apply Adstock transformation
292
+ def apply_adstock(df, factor):
293
+ x = 0
294
+ # Use the walrus operator to update x iteratively with the Adstock formula
295
+ adstock_var = [x := x * factor + v for v in df]
296
+ ans = pd.Series(adstock_var, index=df.index)
297
+ return ans
298
+
299
+
300
+ # Function to generate transformed columns names
301
+ @st.cache_resource(show_spinner=False)
302
+ def generate_transformed_columns(original_columns, transform_params):
303
+ transformed_columns, summary = {}, {}
304
+
305
+ for category, columns in original_columns.items():
306
+ for column in columns:
307
+ transformed_columns[column] = []
308
+ summary_details = (
309
+ []
310
+ ) # List to hold transformation details for the current column
311
+
312
+ if category in transform_params:
313
+ for transformation, values in transform_params[category].items():
314
+ # Generate transformed column names for each value
315
+ for value in values:
316
+ transformed_name = f"{column}@{transformation}_{value}"
317
+ transformed_columns[column].append(transformed_name)
318
+
319
+ # Format the values list as a string with commas and "and" before the last item
320
+ if len(values) > 1:
321
+ formatted_values = (
322
+ ", ".join(map(str, values[:-1])) + " and " + str(values[-1])
323
+ )
324
+ else:
325
+ formatted_values = str(values[0])
326
+
327
+ # Add transformation details
328
+ summary_details.append(f"{transformation} ({formatted_values})")
329
+
330
+ # Only add to summary if there are transformation details for the column
331
+ if summary_details:
332
+ formatted_summary = "⮕ ".join(summary_details)
333
+ # Use <strong> tags to make the column name bold
334
+ summary[column] = f"<strong>{column}</strong>: {formatted_summary}"
335
+
336
+ # Generate a comprehensive summary string for all columns
337
+ summary_items = [
338
+ f"{idx + 1}. {details}" for idx, details in enumerate(summary.values())
339
+ ]
340
+
341
+ summary_string = "\n".join(summary_items)
342
+
343
+ return transformed_columns, summary_string
344
+
345
+
346
+ # Function to apply transformations to DataFrame slices based on specified categories and parameters
347
+ @st.cache_resource(show_spinner=False)
348
+ def apply_category_transformations(df, bin_dict, transform_params, panel):
349
+ # Dictionary for function mapping
350
+ transformation_functions = {
351
+ "Lead": apply_lead,
352
+ "Lag": apply_lag,
353
+ "Moving Average": apply_moving_average,
354
+ "Saturation": apply_saturation,
355
+ "Power": apply_power,
356
+ "Adstock": apply_adstock,
357
+ }
358
+
359
+ # Initialize category_df as an empty DataFrame
360
+ category_df = pd.DataFrame()
361
+
362
+ # Iterate through each category specified in transform_params
363
+ for category in ["Media", "Internal", "Exogenous"]:
364
+ if (
365
+ category not in transform_params
366
+ or category not in bin_dict
367
+ or not transform_params[category]
368
+ ):
369
+ continue # Skip categories without transformations
370
+
371
+ # Slice the DataFrame based on the columns specified in bin_dict for the current category
372
+ df_slice = df[bin_dict[category] + panel]
373
+
374
+ # Iterate through each transformation and its parameters for the current category
375
+ for transformation, parameters in transform_params[category].items():
376
+ transformation_function = transformation_functions[transformation]
377
+
378
+ # Check if there is panel data to group by
379
+ if len(panel) > 0:
380
+ # Apply the transformation to each group
381
+ category_df = pd.concat(
382
+ [
383
+ df_slice.groupby(panel)
384
+ .transform(transformation_function, p)
385
+ .add_suffix(f"@{transformation}_{p}")
386
+ for p in parameters
387
+ ],
388
+ axis=1,
389
+ )
390
+
391
+ # Replace all NaN or null values in category_df with 0
392
+ category_df.fillna(0, inplace=True)
393
+
394
+ # Update df_slice
395
+ df_slice = pd.concat(
396
+ [df[panel], category_df],
397
+ axis=1,
398
+ )
399
+
400
+ else:
401
+ for p in parameters:
402
+ # Apply the transformation function to each column
403
+ temp_df = df_slice.apply(
404
+ lambda x: transformation_function(x, p), axis=0
405
+ ).rename(lambda x: f"{x}@{transformation}_{p}", axis="columns")
406
+ # Concatenate the transformed DataFrame slice to the category DataFrame
407
+ category_df = pd.concat([category_df, temp_df], axis=1)
408
+
409
+ # Replace all NaN or null values in category_df with 0
410
+ category_df.fillna(0, inplace=True)
411
+
412
+ # Update df_slice
413
+ df_slice = pd.concat(
414
+ [df[panel], category_df],
415
+ axis=1,
416
+ )
417
+
418
+ # If category_df has been modified, concatenate it with the panel and response metrics from the original DataFrame
419
+ if not category_df.empty:
420
+ final_df = pd.concat([df, category_df], axis=1)
421
+ else:
422
+ # If no transformations were applied, use the original DataFrame
423
+ final_df = df
424
+
425
+ return final_df
426
+
427
+
428
+ # Function to infers the granularity of the date column in a DataFrame
429
+ @st.cache_resource(show_spinner=False)
430
+ def infer_date_granularity(df):
431
+ # Find the most common difference
432
+ common_freq = pd.Series(df["date"].unique()).diff().dt.days.dropna().mode()[0]
433
+
434
+ # Map the most common difference to a granularity
435
+ if common_freq == 1:
436
+ return "daily"
437
+ elif common_freq == 7:
438
+ return "weekly"
439
+ elif 28 <= common_freq <= 31:
440
+ return "monthly"
441
+ else:
442
+ return "irregular"
443
+
444
+
445
+ #########################################################################################################################################################
446
+ # User input for transformations
447
+ #########################################################################################################################################################
448
+
449
+
450
+ # Infer date granularity
451
+ date_granularity = infer_date_granularity(final_df_loaded)
452
+
453
+ # Initialize the main dictionary to store the transformation parameters for each category
454
+ transform_params = {"Media": {}, "Internal": {}, "Exogenous": {}}
455
+
456
+ # User input for transformations
457
+ st.markdown("### Select Transformations to Apply")
458
+ for category in ["Media", "Internal", "Exogenous"]:
459
+ # Skip Internal
460
+ if category == "Internal":
461
+ continue
462
+
463
+ transformation_widgets(category, transform_params, date_granularity)
464
+
465
+
466
+ #########################################################################################################################################################
467
+ # Apply transformations
468
+ #########################################################################################################################################################
469
+
470
+
471
+ # Apply category-based transformations to the DataFrame
472
+ if st.button("Accept and Proceed", use_container_width=True):
473
+ with st.spinner("Applying transformations..."):
474
+ final_df = apply_category_transformations(
475
+ final_df_loaded, bin_dict_loaded, transform_params, panel
476
+ )
477
+
478
+ # Generate a dictionary mapping original column names to lists of transformed column names
479
+ transformed_columns_dict, summary_string = generate_transformed_columns(
480
+ original_columns, transform_params
481
+ )
482
+
483
+ # Store into transformed dataframe and summary session state
484
+ st.session_state["final_df"] = final_df
485
+ st.session_state["summary_string"] = summary_string
486
+
487
+
488
+ #########################################################################################################################################################
489
+ # Display the transformed DataFrame and summary
490
+ #########################################################################################################################################################
491
+
492
+
493
+ # Display the transformed DataFrame in the Streamlit app
494
+ st.markdown("### Transformed DataFrame")
495
+ st.dataframe(st.session_state["final_df"], hide_index=True)
496
+
497
+ # Total rows and columns
498
+ total_rows, total_columns = st.session_state["final_df"].shape
499
+ st.markdown(
500
+ f"<p style='text-align: justify;'>The transformed DataFrame contains <strong>{total_rows}</strong> rows and <strong>{total_columns}</strong> columns.</p>",
501
+ unsafe_allow_html=True,
502
+ )
503
+
504
+ # Display the summary of transformations as markdown
505
+ if st.session_state["summary_string"]:
506
+ with st.expander("Summary of Transformations"):
507
+ st.markdown("### Summary of Transformations")
508
+ st.markdown(st.session_state["summary_string"], unsafe_allow_html=True)
509
+
510
+ @st.cache_resource(show_spinner=False)
511
+ def save_to_pickle(file_path, final_df):
512
+ # Open the file in write-binary mode and dump the objects
513
+ with open(file_path, "wb") as f:
514
+ pickle.dump({"final_df_transformed": final_df}, f)
515
+ # Data is now saved to file
516
+
517
+ if st.button("Accept and Save", use_container_width=True):
518
+
519
+ save_to_pickle(
520
+ "final_df_transformed.pkl", st.session_state["final_df"]
521
+ )
522
+ st.toast("💾 Saved Successfully!")
pages/4_Model_Build.py ADDED
@@ -0,0 +1,826 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ MMO Build Sprint 3
3
+ additions : adding more variables to session state for saved model : random effect, predicted train & test
4
+
5
+ MMO Build Sprint 4
6
+ additions : ability to run models for different response metrics
7
+ '''
8
+
9
+ import streamlit as st
10
+ import pandas as pd
11
+ import plotly.express as px
12
+ import plotly.graph_objects as go
13
+ from Eda_functions import format_numbers
14
+ import numpy as np
15
+ import pickle
16
+ from st_aggrid import AgGrid
17
+ from st_aggrid import GridOptionsBuilder, GridUpdateMode
18
+ from utilities import set_header, load_local_css
19
+ from st_aggrid import GridOptionsBuilder
20
+ import time
21
+ import itertools
22
+ import statsmodels.api as sm
23
+ import numpy as npc
24
+ import re
25
+ import itertools
26
+ from sklearn.metrics import mean_absolute_error, r2_score, mean_absolute_percentage_error
27
+ from sklearn.preprocessing import MinMaxScaler
28
+ import os
29
+ import matplotlib.pyplot as plt
30
+ from statsmodels.stats.outliers_influence import variance_inflation_factor
31
+
32
+ st.set_option('deprecation.showPyplotGlobalUse', False)
33
+ import statsmodels.api as sm
34
+ import statsmodels.formula.api as smf
35
+
36
+ from datetime import datetime
37
+ import seaborn as sns
38
+ from Data_prep_functions import *
39
+
40
+
41
+
42
+ def get_random_effects(media_data, panel_col, mdf):
43
+ random_eff_df = pd.DataFrame(columns=[panel_col, "random_effect"])
44
+
45
+ for i, market in enumerate(media_data[panel_col].unique()):
46
+ print(i, end='\r')
47
+ intercept = mdf.random_effects[market].values[0]
48
+ random_eff_df.loc[i, 'random_effect'] = intercept
49
+ random_eff_df.loc[i, panel_col] = market
50
+
51
+ return random_eff_df
52
+
53
+
54
+ def mdf_predict(X_df, mdf, random_eff_df):
55
+ X = X_df.copy()
56
+ X['fixed_effect'] = mdf.predict(X)
57
+ X = pd.merge(X, random_eff_df, on=panel_col, how='left')
58
+ X['pred'] = X['fixed_effect'] + X['random_effect']
59
+ # X.to_csv('Test/megred_df.csv',index=False)
60
+ X.drop(columns=['fixed_effect', 'random_effect'], inplace=True)
61
+ return X['pred']
62
+
63
+
64
+ st.set_page_config(
65
+ page_title="Model Build",
66
+ page_icon=":shark:",
67
+ layout="wide",
68
+ initial_sidebar_state='collapsed'
69
+ )
70
+
71
+ load_local_css('styles.css')
72
+ set_header()
73
+
74
+ st.title('1. Build Your Model')
75
+
76
+ with open("data_import.pkl", "rb") as f:
77
+ data = pickle.load(f)
78
+
79
+ st.session_state['bin_dict'] = data["bin_dict"]
80
+
81
+ #st.write(data["bin_dict"])
82
+
83
+ with open("final_df_transformed.pkl", "rb") as f:
84
+ data = pickle.load(f)
85
+
86
+ # Accessing the loaded objects
87
+ media_data = data["final_df_transformed"]
88
+
89
+ # Sprint4 - available response metrics is a list of all reponse metrics in the data
90
+ ## these will be put in a drop down
91
+
92
+ st.session_state['media_data']=media_data
93
+
94
+ if 'available_response_metrics' not in st.session_state:
95
+ # st.session_state['available_response_metrics'] = ['Total Approved Accounts - Revenue',
96
+ # 'Total Approved Accounts - Appsflyer',
97
+ # 'Account Requests - Appsflyer',
98
+ # 'App Installs - Appsflyer']
99
+
100
+ st.session_state['available_response_metrics']= st.session_state['bin_dict']["Response Metrics"]
101
+ # Sprint4
102
+ if "is_tuned_model" not in st.session_state:
103
+ st.session_state["is_tuned_model"] = {}
104
+ for resp_metric in st.session_state['available_response_metrics'] :
105
+ resp_metric=resp_metric.lower().replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_")
106
+ st.session_state["is_tuned_model"][resp_metric] = False
107
+
108
+ # Sprint4 - used_response_metrics is a list of resp metrics for which user has created & saved a model
109
+ if 'used_response_metrics' not in st.session_state:
110
+ st.session_state['used_response_metrics'] = []
111
+
112
+ # Sprint4 - saved_model_names
113
+ if 'saved_model_names' not in st.session_state:
114
+ st.session_state['saved_model_names'] = []
115
+
116
+ # if "model_save_flag" not in st.session_state:
117
+ # st.session_state["model_save_flag"]=False
118
+ # def reset_save():
119
+ # st.session_state["model_save_flag"]=False
120
+ # def set_save():
121
+ # st.session_state["model_save_flag"]=True
122
+ # Sprint4 - select a response metric
123
+
124
+
125
+ sel_target_col = st.selectbox("Select the response metric",
126
+ st.session_state['available_response_metrics'])
127
+ # , on_change=reset_save())
128
+ target_col = sel_target_col.lower().replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_")
129
+
130
+ new_name_dct={col:col.lower().replace('.','_').lower().replace('@','_').replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for col in media_data.columns}
131
+
132
+ media_data.columns=[col.lower().replace('.','_').replace('@','_').replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for col in media_data.columns]
133
+
134
+ #st.write(st.session_state['bin_dict'])
135
+ panel_col = [col.lower().replace('.','_').replace('@','_').replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for col in st.session_state['bin_dict']['Panel Level 1'] ] [0]# set the panel column
136
+ date_col = 'date'
137
+
138
+ #st.write(media_data)
139
+
140
+ is_panel = True if len(panel_col)>0 else False
141
+
142
+ if 'is_panel' not in st.session_state:
143
+ st.session_state['is_panel']=False
144
+
145
+
146
+
147
+ # if st.toggle('Apply Transformations on DMA/Panel Level'):
148
+ # media_data = pd.read_csv(r'C:\Users\SrishtiVerma\Mastercard\Sprint2\upf_data_converted_randomized_resp_metrics.csv')
149
+ # media_data.columns = [i.lower().replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for i in
150
+ # media_data.columns]
151
+ # dma = st.selectbox('Select the Level of data ',
152
+ # [col for col in media_data.columns if col.lower() in ['dma', 'panel', 'markets']])
153
+ # # is_panel = True
154
+ # # st.session_state['is_panel']=True
155
+ #
156
+ # else:
157
+ # # """ code to aggregate data on date """
158
+ # media_data = pd.read_excel(r'C:\Users\SrishtiVerma\Mastercard\Sprint1\Tactic Level Models\Tactic_level_data_imp_clicks_spends.xlsx')
159
+ # media_data.columns = [i.lower().replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for i in
160
+ # media_data.columns]
161
+ # dma = None
162
+ # # is_panel = False
163
+ # # st.session_state['is_panel']=False
164
+
165
+ #media_data = st.session_state["final_df"]
166
+
167
+
168
+
169
+ # st.write(media_data.columns)
170
+
171
+ media_data.sort_values(date_col, inplace=True)
172
+ media_data.reset_index(drop=True, inplace=True)
173
+
174
+ date = media_data[date_col]
175
+ st.session_state['date'] = date
176
+ # revenue=media_data[target_col]
177
+ y = media_data[target_col]
178
+
179
+ if is_panel:
180
+ spends_data = media_data[
181
+ [c for c in media_data.columns if "_cost" in c.lower() or "_spend" in c.lower()] + [date_col, panel_col]]
182
+ # Sprint3 - spends for resp curves
183
+ else:
184
+ spends_data = media_data[
185
+ [c for c in media_data.columns if "_cost" in c.lower() or "_spend" in c.lower()] + [date_col]]
186
+
187
+ y = media_data[target_col]
188
+ # media_data.drop([target_col],axis=1,inplace=True)
189
+ media_data.drop([date_col], axis=1, inplace=True)
190
+ media_data.reset_index(drop=True, inplace=True)
191
+
192
+ # dma_dict={ dm:media_data[media_data[dma]==dm] for dm in media_data[dma].unique()}
193
+
194
+ # st.markdown('## Select the Range of Transformations')
195
+ columns = st.columns(2)
196
+
197
+ old_shape = media_data.shape
198
+
199
+ if "old_shape" not in st.session_state:
200
+ st.session_state['old_shape'] = old_shape
201
+
202
+ # with columns[0]:
203
+ # slider_value_adstock = st.slider('Select Adstock Range (only applied to media)', 0.0, 1.0, (0.2, 0.4), step=0.1,
204
+ # format="%.2f")
205
+ # with columns[1]:
206
+ # slider_value_lag = st.slider('Select Lag Range (applied to media, seasonal, macroeconomic variables)', 1, 7, (1, 3),
207
+ # step=1)
208
+
209
+
210
+ # with columns[2]:
211
+ # slider_value_power=st.slider('Select Power range (only applied to media )',0,4,(1,2),step=1)
212
+
213
+ # with columns[1]:
214
+ # st.number_input('Select the range of half saturation point ',min_value=1,max_value=5)
215
+ # st.number_input('Select the range of ')
216
+
217
+ # Section 1 - Transformations Functions
218
+ # def lag(data, features, lags, dma=None):
219
+ # if dma:
220
+ #
221
+ # transformed_data = pd.concat(
222
+ # [data.groupby([dma])[features].shift(lag).add_suffix(f'_lag_{lag}') for lag in lags], axis=1)
223
+ # # transformed_data = transformed_data.fillna(method='bfill')
224
+ # transformed_data = transformed_data.bfill() # Sprint4 - fillna getting deprecated
225
+ # return pd.concat([transformed_data, data], axis=1)
226
+ #
227
+ # else:
228
+ #
229
+ # # ''' data should be aggregated on date'''
230
+ #
231
+ # transformed_data = pd.concat([data[features].shift(lag).add_suffix(f'_lag_{lag}') for lag in lags], axis=1)
232
+ # # transformed_data = transformed_data.fillna(method='bfill')
233
+ # transformed_data = transformed_data.bfill()
234
+ #
235
+ # return pd.concat([transformed_data, data], axis=1)
236
+ #
237
+ #
238
+ # # adstock
239
+ # def adstock(df, alphas, cutoff, features, dma=None):
240
+ # if dma:
241
+ # transformed_data = pd.DataFrame()
242
+ # for d in df[dma].unique():
243
+ # dma_sub_df = df[df[dma] == d]
244
+ # n = len(dma_sub_df)
245
+ #
246
+ # weights = np.array(
247
+ # [[[alpha ** (i - j) if i >= j and j >= i - cutoff else 0. for j in range(n)] for i in range(n)] for
248
+ # alpha in alphas])
249
+ # X = dma_sub_df[features].to_numpy()
250
+ #
251
+ # res = pd.DataFrame(np.hstack(weights @ X),
252
+ # columns=[f'{col}_adstock_{alpha}' for alpha in alphas for col in features])
253
+ #
254
+ # transformed_data = pd.concat([transformed_data, res], axis=0)
255
+ # transformed_data.reset_index(drop=True, inplace=True)
256
+ # return pd.concat([transformed_data, df], axis=1)
257
+ #
258
+ # else:
259
+ #
260
+ # n = len(df)
261
+ #
262
+ # weights = np.array(
263
+ # [[[alpha ** (i - j) if i >= j and j >= i - cutoff else 0. for j in range(n)] for i in range(n)] for alpha in
264
+ # alphas])
265
+ #
266
+ # X = df[features].to_numpy()
267
+ # res = pd.DataFrame(np.hstack(weights @ X),
268
+ # columns=[f'{col}_adstock_{alpha}' for alpha in alphas for col in features])
269
+ # return pd.concat([res, df], axis=1)
270
+
271
+
272
+ # Section 2 - Begin Transformations
273
+
274
+ if 'media_data' not in st.session_state:
275
+ st.session_state['media_data'] = pd.DataFrame()
276
+
277
+ # Sprint3
278
+ if "orig_media_data" not in st.session_state:
279
+ st.session_state['orig_media_data'] = pd.DataFrame()
280
+
281
+ # Sprint3 additions
282
+ if 'random_effects' not in st.session_state:
283
+ st.session_state['random_effects'] = pd.DataFrame()
284
+ if 'pred_train' not in st.session_state:
285
+ st.session_state['pred_train'] = []
286
+ if 'pred_test' not in st.session_state:
287
+ st.session_state['pred_test'] = []
288
+ # end of Sprint3 additions
289
+
290
+ # variables_to_be_transformed=[col for col in media_data.columns if col.lower() not in ['dma','panel'] ] # change for buckets
291
+ # variables_to_be_transformed = [col for col in media_data.columns if
292
+ # '_clicks' in col.lower() or '_impress' in col.lower()] # srishti - change
293
+ #
294
+ # with columns[0]:
295
+ # if st.button('Apply Transformations'):
296
+ # with st.spinner('Applying Transformations'):
297
+ # transformed_data_lag = lag(media_data, features=variables_to_be_transformed,
298
+ # lags=np.arange(slider_value_lag[0], slider_value_lag[1] + 1, 1), dma=dma)
299
+ #
300
+ # # variables_to_be_transformed=[col for col in list(transformed_data_lag.columns) if col not in ['Date','DMA','Panel']] #change for buckets
301
+ # variables_to_be_transformed = [col for col in media_data.columns if
302
+ # '_clicks' in col.lower() or '_impress' in col.lower()] # srishti - change
303
+ #
304
+ # transformed_data_adstock = adstock(df=transformed_data_lag,
305
+ # alphas=np.arange(slider_value_adstock[0], slider_value_adstock[1], 0.1),
306
+ # cutoff=8, features=variables_to_be_transformed, dma=dma)
307
+ #
308
+ # # st.success('Done')
309
+ # st.success("Transformations complete!")
310
+ #
311
+ # st.write(f'old shape {old_shape}, new shape {transformed_data_adstock.shape}')
312
+ #
313
+ # transformed_data_adstock.columns = [c.replace(".", "_") for c in
314
+ # transformed_data_adstock.columns] # srishti
315
+ # st.session_state['media_data'] = transformed_data_adstock # srishti
316
+ # # Sprint3
317
+ # orig_media_data = media_data.copy()
318
+ # orig_media_data[date_col] = date
319
+ # orig_media_data[target_col] = y
320
+ # st.session_state['orig_media_data'] = orig_media_data # srishti
321
+ #
322
+ # # with st.spinner('Applying Transformations'):
323
+ # # time.sleep(2)
324
+ # # st.success("Transformations complete!")
325
+ #
326
+ # # if st.session_state['media_data'].shape[1]>old_shape[1]:
327
+ # # with columns[0]:
328
+ # # st.write(f'Total no.of variables before transformation: {old_shape[1]}, Total no.of variables after transformation: {st.session_state["media_data"].shape[1]}')
329
+ # # st.write(f'Total no.of variables after transformation: {st.session_state["media_data"].shape[1]}')
330
+
331
+ # Section 3 - Create combinations
332
+
333
+ # bucket=['paid_search', 'kwai','indicacao','infleux', 'influencer','FB: Level Achieved - Tier 1 Impressions',
334
+ # ' FB: Level Achieved - Tier 2 Impressions','paid_social_others',
335
+ # ' GA App: Will And Cid Pequena Baixo Risco Clicks',
336
+ # 'digital_tactic_others',"programmatic"
337
+ # ]
338
+
339
+ # srishti - bucket names changed
340
+ bucket = ['paid_search', 'kwai', 'indicacao', 'infleux', 'influencer', 'fb_level_achieved_tier_2',
341
+ 'fb_level_achieved_tier_1', 'paid_social_others',
342
+ 'ga_app',
343
+ 'digital_tactic_others', "programmatic"
344
+ ]
345
+
346
+ with columns[0]:
347
+ if st.button('Create Combinations of Variables'):
348
+
349
+ top_3_correlated_features = []
350
+ # # for col in st.session_state['media_data'].columns[:19]:
351
+ # original_cols = [c for c in st.session_state['media_data'].columns if
352
+ # "_clicks" in c.lower() or "_impressions" in c.lower()]
353
+ #original_cols = [c for c in original_cols if "_lag" not in c.lower() and "_adstock" not in c.lower()]
354
+
355
+ original_cols=st.session_state['bin_dict']['Media'] + st.session_state['bin_dict']['Internal']
356
+
357
+ original_cols=[col.lower().replace('.','_').replace('@','_').replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for col in original_cols]
358
+
359
+ #st.write(original_cols)
360
+ # for col in st.session_state['media_data'].columns[:19]:
361
+ for col in original_cols: # srishti - new
362
+ corr_df = pd.concat([st.session_state['media_data'].filter(regex=col),
363
+ y], axis=1).corr()[target_col].iloc[:-1]
364
+ top_3_correlated_features.append(list(corr_df.sort_values(ascending=False).head(2).index))
365
+ flattened_list = [item for sublist in top_3_correlated_features for item in sublist]
366
+ # all_features_set={var:[col for col in flattened_list if var in col] for var in bucket}
367
+ all_features_set = {var: [col for col in flattened_list if var in col] for var in bucket if
368
+ len([col for col in flattened_list if var in col]) > 0} # srishti
369
+
370
+ channels_all = [values for values in all_features_set.values()]
371
+ st.session_state['combinations'] = list(itertools.product(*channels_all))
372
+ # if 'combinations' not in st.session_state:
373
+ # st.session_state['combinations']=combinations_all
374
+
375
+ st.session_state['final_selection'] = st.session_state['combinations']
376
+ st.success('Done')
377
+
378
+ # revenue.reset_index(drop=True,inplace=True)
379
+ y.reset_index(drop=True, inplace=True)
380
+ if 'Model_results' not in st.session_state:
381
+ st.session_state['Model_results'] = {'Model_object': [],
382
+ 'Model_iteration': [],
383
+ 'Feature_set': [],
384
+ 'MAPE': [],
385
+ 'R2': [],
386
+ 'ADJR2': [],
387
+ 'pos_count': []
388
+ }
389
+
390
+
391
+ def reset_model_result_dct():
392
+ st.session_state['Model_results'] = {'Model_object': [],
393
+ 'Model_iteration': [],
394
+ 'Feature_set': [],
395
+ 'MAPE': [],
396
+ 'R2': [],
397
+ 'ADJR2': [],
398
+ 'pos_count': []
399
+ }
400
+
401
+ # if st.button('Build Model'):
402
+
403
+
404
+ if 'iterations' not in st.session_state:
405
+ st.session_state['iterations'] = 0
406
+
407
+ if 'final_selection' not in st.session_state:
408
+ st.session_state['final_selection'] = False
409
+
410
+ save_path = r"Model/"
411
+ with columns[1]:
412
+ if st.session_state['final_selection']:
413
+ st.write(f'Total combinations created {format_numbers(len(st.session_state["final_selection"]))}')
414
+
415
+ if st.checkbox('Build all iterations'):
416
+ iterations = len(st.session_state['final_selection'])
417
+ else:
418
+ iterations = st.number_input('Select the number of iterations to perform', min_value=0, step=100,
419
+ value=st.session_state['iterations'], on_change=reset_model_result_dct)
420
+ # st.write("iterations=", iterations)
421
+
422
+
423
+ if st.button('Build Model', on_click=reset_model_result_dct):
424
+ st.session_state['iterations'] = iterations
425
+
426
+ # Section 4 - Model
427
+ # st.session_state['media_data'] = st.session_state['media_data'].fillna(method='ffill')
428
+ st.session_state['media_data'] = st.session_state['media_data'].ffill()
429
+ st.markdown(
430
+ 'Data Split -- Training Period: May 9th, 2023 - October 5th,2023 , Testing Period: October 6th, 2023 - November 7th, 2023 ')
431
+ progress_bar = st.progress(0) # Initialize the progress bar
432
+ # time_remaining_text = st.empty() # Create an empty space for time remaining text
433
+ start_time = time.time() # Record the start time
434
+ progress_text = st.empty()
435
+
436
+ # time_elapsed_text = st.empty()
437
+ # for i, selected_features in enumerate(st.session_state["final_selection"][40000:40000 + int(iterations)]):
438
+ # st.write(st.session_state["final_selection"])
439
+ # for i, selected_features in enumerate(st.session_state["final_selection"]):
440
+
441
+ if is_panel == True:
442
+ for i, selected_features in enumerate(st.session_state["final_selection"][0:int(iterations)]): # srishti
443
+ df = st.session_state['media_data']
444
+
445
+ fet = [var for var in selected_features if len(var) > 0]
446
+ inp_vars_str = " + ".join(fet) # new
447
+
448
+ X = df[fet]
449
+ y = df[target_col]
450
+ ss = MinMaxScaler()
451
+ X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
452
+
453
+ X[target_col] = y # Sprint2
454
+ X[panel_col] = df[panel_col] # Sprint2
455
+
456
+ X_train = X.iloc[:8000]
457
+ X_test = X.iloc[8000:]
458
+ y_train = y.iloc[:8000]
459
+ y_test = y.iloc[8000:]
460
+
461
+ print(X_train.shape)
462
+ # model = sm.OLS(y_train, X_train).fit()
463
+ md_str = target_col + " ~ " + inp_vars_str
464
+ # md = smf.mixedlm("total_approved_accounts_revenue ~ {}".format(inp_vars_str),
465
+ # data=X_train[[target_col] + fet],
466
+ # groups=X_train[panel_col])
467
+ md = smf.mixedlm(md_str,
468
+ data=X_train[[target_col] + fet],
469
+ groups=X_train[panel_col])
470
+ mdf = md.fit()
471
+ predicted_values = mdf.fittedvalues
472
+
473
+ coefficients = mdf.fe_params.to_dict()
474
+ model_positive = [col for col in coefficients.keys() if coefficients[col] > 0]
475
+
476
+ pvalues = [var for var in list(mdf.pvalues) if var <= 0.06]
477
+
478
+ if (len(model_positive) / len(selected_features)) > 0 and (
479
+ len(pvalues) / len(selected_features)) >= 0: # srishti - changed just for testing, revert later
480
+ # predicted_values = model.predict(X_train)
481
+ mape = mean_absolute_percentage_error(y_train, predicted_values)
482
+ r2 = r2_score(y_train, predicted_values)
483
+ adjr2 = 1 - (1 - r2) * (len(y_train) - 1) / (len(y_train) - len(selected_features) - 1)
484
+
485
+ filename = os.path.join(save_path, f"model_{i}.pkl")
486
+ with open(filename, "wb") as f:
487
+ pickle.dump(mdf, f)
488
+ # with open(r"C:\Users\ManojP\Documents\MMM\simopt\Model\model.pkl", 'rb') as file:
489
+ # model = pickle.load(file)
490
+
491
+ st.session_state['Model_results']['Model_object'].append(filename)
492
+ st.session_state['Model_results']['Model_iteration'].append(i)
493
+ st.session_state['Model_results']['Feature_set'].append(fet)
494
+ st.session_state['Model_results']['MAPE'].append(mape)
495
+ st.session_state['Model_results']['R2'].append(r2)
496
+ st.session_state['Model_results']['pos_count'].append(len(model_positive))
497
+ st.session_state['Model_results']['ADJR2'].append(adjr2)
498
+
499
+ current_time = time.time()
500
+ time_taken = current_time - start_time
501
+ time_elapsed_minutes = time_taken / 60
502
+ completed_iterations_text = f"{i + 1}/{iterations}"
503
+ progress_bar.progress((i + 1) / int(iterations))
504
+ progress_text.text(
505
+ f'Completed iterations: {completed_iterations_text},Time Elapsed (min): {time_elapsed_minutes:.2f}')
506
+ st.write(
507
+ f'Out of {st.session_state["iterations"]} iterations : {len(st.session_state["Model_results"]["Model_object"])} valid models')
508
+
509
+ else:
510
+
511
+ for i, selected_features in enumerate(st.session_state["final_selection"][0:int(iterations)]): # srishti
512
+ df = st.session_state['media_data']
513
+
514
+ fet = [var for var in selected_features if len(var) > 0]
515
+ inp_vars_str = " + ".join(fet)
516
+
517
+ X = df[fet]
518
+ y = df[target_col]
519
+ ss = MinMaxScaler()
520
+ X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
521
+ X = sm.add_constant(X)
522
+ X_train = X.iloc[:130]
523
+ X_test = X.iloc[130:]
524
+ y_train = y.iloc[:130]
525
+ y_test = y.iloc[130:]
526
+
527
+ model = sm.OLS(y_train, X_train).fit()
528
+
529
+
530
+ coefficients = model.params.to_list()
531
+ model_positive = [coef for coef in coefficients if coef > 0]
532
+ predicted_values = model.predict(X_train)
533
+ pvalues = [var for var in list(model.pvalues) if var <= 0.06]
534
+
535
+ # if (len(model_possitive) / len(selected_features)) > 0.9 and (len(pvalues) / len(selected_features)) >= 0.8:
536
+ if (len(model_positive) / len(selected_features)) > 0 and (len(pvalues) / len(
537
+ selected_features)) >= 0.5: # srishti - changed just for testing, revert later VALID MODEL CRITERIA
538
+ # predicted_values = model.predict(X_train)
539
+ mape = mean_absolute_percentage_error(y_train, predicted_values)
540
+ adjr2 = model.rsquared_adj
541
+ r2 = model.rsquared
542
+
543
+ filename = os.path.join(save_path, f"model_{i}.pkl")
544
+ with open(filename, "wb") as f:
545
+ pickle.dump(model, f)
546
+ # with open(r"C:\Users\ManojP\Documents\MMM\simopt\Model\model.pkl", 'rb') as file:
547
+ # model = pickle.load(file)
548
+
549
+ st.session_state['Model_results']['Model_object'].append(filename)
550
+ st.session_state['Model_results']['Model_iteration'].append(i)
551
+ st.session_state['Model_results']['Feature_set'].append(fet)
552
+ st.session_state['Model_results']['MAPE'].append(mape)
553
+ st.session_state['Model_results']['R2'].append(r2)
554
+ st.session_state['Model_results']['ADJR2'].append(adjr2)
555
+ st.session_state['Model_results']['pos_count'].append(len(model_positive))
556
+
557
+ current_time = time.time()
558
+ time_taken = current_time - start_time
559
+ time_elapsed_minutes = time_taken / 60
560
+ completed_iterations_text = f"{i + 1}/{iterations}"
561
+ progress_bar.progress((i + 1) / int(iterations))
562
+ progress_text.text(
563
+ f'Completed iterations: {completed_iterations_text},Time Elapsed (min): {time_elapsed_minutes:.2f}')
564
+ st.write(
565
+ f'Out of {st.session_state["iterations"]} iterations : {len(st.session_state["Model_results"]["Model_object"])} valid models')
566
+
567
+ pd.DataFrame(st.session_state['Model_results']).to_csv('model_output.csv')
568
+
569
+
570
+ def to_percentage(value):
571
+ return f'{value * 100:.1f}%'
572
+
573
+ ## Section 5 - Select Model
574
+ st.title('2. Select Models')
575
+ if 'tick' not in st.session_state:
576
+ st.session_state['tick'] = False
577
+ if st.checkbox('Show results of top 10 models (based on MAPE and Adj. R2)', value=st.session_state['tick']):
578
+ st.session_state['tick'] = True
579
+ st.write('Select one model iteration to generate performance metrics for it:')
580
+ data = pd.DataFrame(st.session_state['Model_results'])
581
+ data = data[data['pos_count']==data['pos_count'].max()].reset_index(drop=True) # Sprint4 -- Srishti -- only show models with the lowest num of neg coeffs
582
+ data.sort_values(by=['ADJR2'], ascending=False, inplace=True)
583
+ data.drop_duplicates(subset='Model_iteration', inplace=True)
584
+ top_10 = data.head(10)
585
+ top_10['Rank'] = np.arange(1, len(top_10) + 1, 1)
586
+ top_10[['MAPE', 'R2', 'ADJR2']] = np.round(top_10[['MAPE', 'R2', 'ADJR2']], 4).applymap(to_percentage)
587
+ top_10_table = top_10[['Rank', 'Model_iteration', 'MAPE', 'ADJR2', 'R2']]
588
+ # top_10_table.columns=[['Rank','Model Iteration Index','MAPE','Adjusted R2','R2']]
589
+ gd = GridOptionsBuilder.from_dataframe(top_10_table)
590
+ gd.configure_pagination(enabled=True)
591
+
592
+ gd.configure_selection(
593
+ use_checkbox=True,
594
+ selection_mode="single",
595
+ pre_select_all_rows=False,
596
+ pre_selected_rows=[1],
597
+ )
598
+
599
+ gridoptions = gd.build()
600
+
601
+ table = AgGrid(top_10, gridOptions=gridoptions, update_mode=GridUpdateMode.SELECTION_CHANGED)
602
+
603
+ selected_rows = table.selected_rows
604
+ # if st.session_state["selected_rows"] != selected_rows:
605
+ # st.session_state["build_rc_cb"] = False
606
+ st.session_state["selected_rows"] = selected_rows
607
+ if 'Model' not in st.session_state:
608
+ st.session_state['Model'] = {}
609
+
610
+ # Section 6 - Display Results
611
+
612
+ if len(selected_rows) > 0:
613
+ st.header('2.1 Results Summary')
614
+
615
+ model_object = data[data['Model_iteration'] == selected_rows[0]['Model_iteration']]['Model_object']
616
+ features_set = data[data['Model_iteration'] == selected_rows[0]['Model_iteration']]['Feature_set']
617
+
618
+ with open(str(model_object.values[0]), 'rb') as file:
619
+ # print(file)
620
+ model = pickle.load(file)
621
+ st.write(model.summary())
622
+ st.header('2.2 Actual vs. Predicted Plot')
623
+
624
+ if is_panel :
625
+ df = st.session_state['media_data']
626
+ X = df[features_set.values[0]]
627
+ y = df[target_col]
628
+
629
+ ss = MinMaxScaler()
630
+ X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
631
+
632
+ # Sprint2 changes
633
+ X[target_col] = y # new
634
+ X[panel_col] = df[panel_col]
635
+ X[date_col] = date
636
+
637
+ X_train = X.iloc[:8000]
638
+ X_test = X.iloc[8000:].reset_index(drop=True)
639
+ y_train = y.iloc[:8000]
640
+ y_test = y.iloc[8000:].reset_index(drop=True)
641
+
642
+ test_spends = spends_data[8000:] # Sprint3 - test spends for resp curves
643
+ random_eff_df = get_random_effects(media_data, panel_col, model)
644
+ train_pred = model.fittedvalues
645
+ test_pred = mdf_predict(X_test, model, random_eff_df)
646
+ print("__" * 20, test_pred.isna().sum())
647
+
648
+ else :
649
+ df = st.session_state['media_data']
650
+ X = df[features_set.values[0]]
651
+ y = df[target_col]
652
+
653
+ ss = MinMaxScaler()
654
+ X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
655
+ X = sm.add_constant(X)
656
+
657
+ X[date_col] = date
658
+
659
+ X_train = X.iloc[:130]
660
+ X_test = X.iloc[130:].reset_index(drop=True)
661
+ y_train = y.iloc[:130]
662
+ y_test = y.iloc[130:].reset_index(drop=True)
663
+
664
+ test_spends = spends_data[130:] # Sprint3 - test spends for resp curves
665
+ train_pred = model.predict(X_train[features_set.values[0]+['const']])
666
+ test_pred = model.predict(X_test[features_set.values[0]+['const']])
667
+
668
+
669
+ # save x test to test - srishti
670
+ x_test_to_save = X_test.copy()
671
+ x_test_to_save['Actuals'] = y_test
672
+ x_test_to_save['Predictions'] = test_pred
673
+
674
+ x_train_to_save = X_train.copy()
675
+ x_train_to_save['Actuals'] = y_train
676
+ x_train_to_save['Predictions'] = train_pred
677
+
678
+ x_train_to_save.to_csv('Test/x_train_to_save.csv', index=False)
679
+ x_test_to_save.to_csv('Test/x_test_to_save.csv', index=False)
680
+
681
+ st.session_state['X'] = X_train
682
+ st.session_state['features_set'] = features_set.values[0]
683
+ print("**" * 20, "selected model features : ", features_set.values[0])
684
+ metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(X_train[date_col], y_train, train_pred,
685
+ model, target_column=sel_target_col,
686
+ is_panel=is_panel) # Sprint2
687
+
688
+ st.plotly_chart(actual_vs_predicted_plot, use_container_width=True)
689
+
690
+ st.markdown('## 2.3 Residual Analysis')
691
+ columns = st.columns(2)
692
+ with columns[0]:
693
+ fig = plot_residual_predicted(y_train, train_pred, X_train) # Sprint2
694
+ st.plotly_chart(fig)
695
+
696
+ with columns[1]:
697
+ st.empty()
698
+ fig = qqplot(y_train, train_pred) # Sprint2
699
+ st.plotly_chart(fig)
700
+
701
+ with columns[0]:
702
+ fig = residual_distribution(y_train, train_pred) # Sprint2
703
+ st.pyplot(fig)
704
+
705
+ vif_data = pd.DataFrame()
706
+ # X=X.drop('const',axis=1)
707
+ X_train_orig = X_train.copy() # Sprint2 -- creating a copy of xtrain. Later deleting panel, target & date from xtrain
708
+ del_col_list = list(set([target_col, panel_col, date_col]).intersection(list(X_train.columns)))
709
+ X_train.drop(columns=del_col_list, inplace=True) # Sprint2
710
+
711
+ vif_data["Variable"] = X_train.columns
712
+ vif_data["VIF"] = [variance_inflation_factor(X_train.values, i) for i in range(X_train.shape[1])]
713
+ vif_data.sort_values(by=['VIF'], ascending=False, inplace=True)
714
+ vif_data = np.round(vif_data)
715
+ vif_data['VIF'] = vif_data['VIF'].astype(float)
716
+ st.header('2.4 Variance Inflation Factor (VIF)')
717
+ # st.dataframe(vif_data)
718
+ color_mapping = {
719
+ 'darkgreen': (vif_data['VIF'] < 3),
720
+ 'orange': (vif_data['VIF'] >= 3) & (vif_data['VIF'] <= 10),
721
+ 'darkred': (vif_data['VIF'] > 10)
722
+ }
723
+
724
+ # Create a horizontal bar plot
725
+ fig, ax = plt.subplots()
726
+ fig.set_figwidth(10) # Adjust the width of the figure as needed
727
+
728
+ # Sort the bars by descending VIF values
729
+ vif_data = vif_data.sort_values(by='VIF', ascending=False)
730
+
731
+ # Iterate through the color mapping and plot bars with corresponding colors
732
+ for color, condition in color_mapping.items():
733
+ subset = vif_data[condition]
734
+ bars = ax.barh(subset["Variable"], subset["VIF"], color=color, label=color)
735
+
736
+ # Add text annotations on top of the bars
737
+ for bar in bars:
738
+ width = bar.get_width()
739
+ ax.annotate(f'{width:}', xy=(width, bar.get_y() + bar.get_height() / 2), xytext=(5, 0),
740
+ textcoords='offset points', va='center')
741
+
742
+ # Customize the plot
743
+ ax.set_xlabel('VIF Values')
744
+ # ax.set_title('2.4 Variance Inflation Factor (VIF)')
745
+ # ax.legend(loc='upper right')
746
+
747
+ # Display the plot in Streamlit
748
+ st.pyplot(fig)
749
+
750
+ with st.expander('Results Summary Test data'):
751
+ # ss = MinMaxScaler()
752
+ # X_test = pd.DataFrame(ss.fit_transform(X_test), columns=X_test.columns)
753
+ st.header('2.2 Actual vs. Predicted Plot')
754
+
755
+ metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(X_test[date_col], y_test,
756
+ test_pred, model,
757
+ target_column=sel_target_col,
758
+ is_panel=is_panel) # Sprint2
759
+
760
+ st.plotly_chart(actual_vs_predicted_plot, use_container_width=True)
761
+
762
+ st.markdown('## 2.3 Residual Analysis')
763
+ columns = st.columns(2)
764
+ with columns[0]:
765
+ fig = plot_residual_predicted(y, test_pred, X_test) # Sprint2
766
+ st.plotly_chart(fig)
767
+
768
+ with columns[1]:
769
+ st.empty()
770
+ fig = qqplot(y, test_pred) # Sprint2
771
+ st.plotly_chart(fig)
772
+
773
+ with columns[0]:
774
+ fig = residual_distribution(y, test_pred) # Sprint2
775
+ st.pyplot(fig)
776
+
777
+ value = False
778
+ save_button_model = st.checkbox('Save this model to tune', key='build_rc_cb') # , on_click=set_save())
779
+
780
+ if save_button_model:
781
+ mod_name = st.text_input('Enter model name')
782
+ if len(mod_name) > 0:
783
+ mod_name = mod_name + "__" + target_col # Sprint4 - adding target col to model name
784
+ if is_panel :
785
+ pred_train= model.fittedvalues
786
+ pred_test= mdf_predict(X_test, model, random_eff_df)
787
+ else :
788
+ st.session_state['features_set'] = st.session_state['features_set'] + ['const']
789
+ pred_train= model.predict(X_train_orig[st.session_state['features_set']])
790
+ pred_test= model.predict(X_test[st.session_state['features_set']])
791
+
792
+ st.session_state['Model'][mod_name] = {"Model_object": model,
793
+ 'feature_set': st.session_state['features_set'],
794
+ 'X_train': X_train_orig,
795
+ 'X_test': X_test,
796
+ 'y_train': y_train,
797
+ 'y_test': y_test,
798
+ 'pred_train':pred_train,
799
+ 'pred_test': pred_test
800
+ }
801
+ st.session_state['X_train'] = X_train_orig
802
+ # st.session_state['X_test'] = X_test
803
+ # st.session_state['y_train'] = y_train
804
+ # st.session_state['y_test'] = y_test
805
+ st.session_state['X_test_spends'] = test_spends
806
+ # st.session_state['base_model'] = model
807
+ # st.session_state['base_model_feature_set'] = st.session_state['features_set']
808
+ st.session_state['saved_model_names'].append(mod_name)
809
+ # Sprint3 additions
810
+ if is_panel :
811
+ random_eff_df = get_random_effects(media_data, panel_col, model)
812
+ st.session_state['random_effects'] = random_eff_df
813
+
814
+ # st.session_state['pred_train'] = model.fittedvalues
815
+ # st.session_state['pred_test'] = mdf_predict(X_test, model, random_eff_df)
816
+ # # End of Sprint3 additions
817
+
818
+ with open("best_models.pkl", "wb") as f:
819
+ pickle.dump(st.session_state['Model'], f)
820
+ st.success(mod_name + ' model saved! Proceed to the next page to tune the model')
821
+ urm = st.session_state['used_response_metrics']
822
+ urm.append(sel_target_col)
823
+ st.session_state['used_response_metrics'] = list(set(urm))
824
+ mod_name = ""
825
+ # Sprint4 - add the formatted name of the target col to used resp metrics
826
+ value = False
pages/4_Saved_Model_Results.py ADDED
@@ -0,0 +1,607 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import plotly.express as px
2
+ import numpy as np
3
+ import plotly.graph_objects as go
4
+ import streamlit as st
5
+ import pandas as pd
6
+ import statsmodels.api as sm
7
+ from sklearn.metrics import mean_absolute_percentage_error
8
+ import sys
9
+ import os
10
+ from utilities import set_header, load_local_css, load_authenticator
11
+ import seaborn as sns
12
+ import matplotlib.pyplot as plt
13
+ import sweetviz as sv
14
+ import tempfile
15
+ from sklearn.preprocessing import MinMaxScaler
16
+ from st_aggrid import AgGrid
17
+ from st_aggrid import GridOptionsBuilder, GridUpdateMode
18
+ from st_aggrid import GridOptionsBuilder
19
+ import sys
20
+ import re
21
+
22
+ sys.setrecursionlimit(10**6)
23
+
24
+ original_stdout = sys.stdout
25
+ sys.stdout = open("temp_stdout.txt", "w")
26
+ sys.stdout.close()
27
+ sys.stdout = original_stdout
28
+
29
+ st.set_page_config(layout="wide")
30
+ load_local_css("styles.css")
31
+ set_header()
32
+
33
+ for k, v in st.session_state.items():
34
+ if k not in ["logout", "login", "config"] and not k.startswith("FormSubmitter"):
35
+ st.session_state[k] = v
36
+
37
+ authenticator = st.session_state.get("authenticator")
38
+ if authenticator is None:
39
+ authenticator = load_authenticator()
40
+
41
+ name, authentication_status, username = authenticator.login("Login", "main")
42
+ auth_status = st.session_state.get("authentication_status")
43
+
44
+ if auth_status == True:
45
+ is_state_initiaized = st.session_state.get("initialized", False)
46
+ if not is_state_initiaized:
47
+ a = 1
48
+
49
+ def plot_residual_predicted(actual, predicted, df_):
50
+ df_["Residuals"] = actual - pd.Series(predicted)
51
+ df_["StdResidual"] = (df_["Residuals"] - df_["Residuals"].mean()) / df_[
52
+ "Residuals"
53
+ ].std()
54
+
55
+ # Create a Plotly scatter plot
56
+ fig = px.scatter(
57
+ df_,
58
+ x=predicted,
59
+ y="StdResidual",
60
+ opacity=0.5,
61
+ color_discrete_sequence=["#11B6BD"],
62
+ )
63
+
64
+ # Add horizontal lines
65
+ fig.add_hline(y=0, line_dash="dash", line_color="darkorange")
66
+ fig.add_hline(y=2, line_color="red")
67
+ fig.add_hline(y=-2, line_color="red")
68
+
69
+ fig.update_xaxes(title="Predicted")
70
+ fig.update_yaxes(title="Standardized Residuals (Actual - Predicted)")
71
+
72
+ # Set the same width and height for both figures
73
+ fig.update_layout(
74
+ title="Residuals over Predicted Values",
75
+ autosize=False,
76
+ width=600,
77
+ height=400,
78
+ )
79
+
80
+ return fig
81
+
82
+ def residual_distribution(actual, predicted):
83
+ Residuals = actual - pd.Series(predicted)
84
+
85
+ # Create a Seaborn distribution plot
86
+ sns.set(style="whitegrid")
87
+ plt.figure(figsize=(6, 4))
88
+ sns.histplot(Residuals, kde=True, color="#11B6BD")
89
+
90
+ plt.title(" Distribution of Residuals")
91
+ plt.xlabel("Residuals")
92
+ plt.ylabel("Probability Density")
93
+
94
+ return plt
95
+
96
+ def qqplot(actual, predicted):
97
+ Residuals = actual - pd.Series(predicted)
98
+ Residuals = pd.Series(Residuals)
99
+ Resud_std = (Residuals - Residuals.mean()) / Residuals.std()
100
+
101
+ # Create a QQ plot using Plotly with custom colors
102
+ fig = go.Figure()
103
+ fig.add_trace(
104
+ go.Scatter(
105
+ x=sm.ProbPlot(Resud_std).theoretical_quantiles,
106
+ y=sm.ProbPlot(Resud_std).sample_quantiles,
107
+ mode="markers",
108
+ marker=dict(size=5, color="#11B6BD"),
109
+ name="QQ Plot",
110
+ )
111
+ )
112
+
113
+ # Add the 45-degree reference line
114
+ diagonal_line = go.Scatter(
115
+ x=[-2, 2], # Adjust the x values as needed to fit the range of your data
116
+ y=[-2, 2], # Adjust the y values accordingly
117
+ mode="lines",
118
+ line=dict(color="red"), # Customize the line color and style
119
+ name=" ",
120
+ )
121
+ fig.add_trace(diagonal_line)
122
+
123
+ # Customize the layout
124
+ fig.update_layout(
125
+ title="QQ Plot of Residuals",
126
+ title_x=0.5,
127
+ autosize=False,
128
+ width=600,
129
+ height=400,
130
+ xaxis_title="Theoretical Quantiles",
131
+ yaxis_title="Sample Quantiles",
132
+ )
133
+
134
+ return fig
135
+
136
+ def plot_actual_vs_predicted(date, y, predicted_values, model):
137
+
138
+ fig = go.Figure()
139
+
140
+ fig.add_trace(
141
+ go.Scatter(
142
+ x=date, y=y, mode="lines", name="Actual", line=dict(color="blue")
143
+ )
144
+ )
145
+ fig.add_trace(
146
+ go.Scatter(
147
+ x=date,
148
+ y=predicted_values,
149
+ mode="lines",
150
+ name="Predicted",
151
+ line=dict(color="orange"),
152
+ )
153
+ )
154
+
155
+ # Calculate MAPE
156
+ mape = mean_absolute_percentage_error(y, predicted_values) * 100
157
+
158
+ # Calculate R-squared
159
+ rss = np.sum((y - predicted_values) ** 2)
160
+ tss = np.sum((y - np.mean(y)) ** 2)
161
+ r_squared = 1 - (rss / tss)
162
+
163
+ # Get the number of predictors
164
+ num_predictors = model.df_model
165
+
166
+ # Get the number of samples
167
+ num_samples = len(y)
168
+
169
+ # Calculate Adjusted R-squared
170
+ adj_r_squared = 1 - (
171
+ (1 - r_squared) * ((num_samples - 1) / (num_samples - num_predictors - 1))
172
+ )
173
+ metrics_table = pd.DataFrame(
174
+ {
175
+ "Metric": ["MAPE", "R-squared", "AdjR-squared"],
176
+ "Value": [mape, r_squared, adj_r_squared],
177
+ }
178
+ )
179
+ fig.update_layout(
180
+ xaxis=dict(title="Date"),
181
+ yaxis=dict(title="Value"),
182
+ title=f"MAPE : {mape:.2f}%, AdjR2: {adj_r_squared:.2f}",
183
+ xaxis_tickangle=-30,
184
+ )
185
+
186
+ return metrics_table, fig
187
+
188
+ def contributions(X, model):
189
+ X1 = X.copy()
190
+ for j, col in enumerate(X1.columns):
191
+ X1[col] = X1[col] * model.params.values[j]
192
+
193
+ return np.round(
194
+ (X1.sum() / sum(X1.sum()) * 100).sort_values(ascending=False), 2
195
+ )
196
+
197
+ transformed_data = pd.read_csv("transformed_data.csv")
198
+
199
+ # hard coded for now, need to get features set from model
200
+
201
+ feature_set_dct = {
202
+ "app_installs_-_appsflyer": [
203
+ "paid_search_clicks",
204
+ "fb:_level_achieved_-_tier_1_impressions_lag2",
205
+ "fb:_level_achieved_-_tier_2_clicks_lag2",
206
+ "paid_social_others_impressions_adst.1",
207
+ "ga_app:_will_and_cid_pequena_baixo_risco_clicks_lag2",
208
+ "digital_tactic_others_clicks",
209
+ "kwai_clicks_adst.3",
210
+ "programmaticclicks",
211
+ "indicacao_clicks_adst.1",
212
+ "infleux_clicks_adst.4",
213
+ "influencer_clicks",
214
+ ],
215
+ "account_requests_-_appsflyer": [
216
+ "paid_search_impressions",
217
+ "fb:_level_achieved_-_tier_1_clicks_adst.1",
218
+ "fb:_level_achieved_-_tier_2_clicks_adst.1",
219
+ "paid_social_others_clicks_lag2",
220
+ "ga_app:_will_and_cid_pequena_baixo_risco_clicks_lag5_adst.1",
221
+ "digital_tactic_others_clicks_adst.1",
222
+ "kwai_clicks_adst.2",
223
+ "programmaticimpressions_lag4_adst.1",
224
+ "indicacao_clicks",
225
+ "infleux_clicks_adst.2",
226
+ "influencer_clicks",
227
+ ],
228
+ "total_approved_accounts_-_appsflyer": [
229
+ "paid_search_clicks",
230
+ "fb:_level_achieved_-_tier_1_impressions_lag2_adst.1",
231
+ "fb:_level_achieved_-_tier_2_impressions_lag2",
232
+ "paid_social_others_clicks_lag2_adst.2",
233
+ "ga_app:_will_and_cid_pequena_baixo_risco_impressions_lag4",
234
+ "digital_tactic_others_clicks",
235
+ "kwai_impressions_adst.2",
236
+ "programmaticclicks_adst.5",
237
+ "indicacao_clicks_adst.1",
238
+ "infleux_clicks_adst.3",
239
+ "influencer_clicks",
240
+ ],
241
+ "total_approved_accounts_-_revenue": [
242
+ "paid_search_impressions_adst.5",
243
+ "kwai_impressions_lag2_adst.3",
244
+ "indicacao_clicks_adst.3",
245
+ "infleux_clicks_adst.3",
246
+ "programmaticclicks_adst.4",
247
+ "influencer_clicks_adst.3",
248
+ "fb:_level_achieved_-_tier_1_impressions_adst.2",
249
+ "fb:_level_achieved_-_tier_2_impressions_lag3_adst.5",
250
+ "paid_social_others_impressions_adst.3",
251
+ "ga_app:_will_and_cid_pequena_baixo_risco_clicks_lag3_adst.5",
252
+ "digital_tactic_others_clicks_adst.2",
253
+ ],
254
+ }
255
+
256
+ # """ the above part should be modified so that we are fetching features set from the saved model"""
257
+
258
+ def contributions(X, model, target):
259
+ X1 = X.copy()
260
+ for j, col in enumerate(X1.columns):
261
+ X1[col] = X1[col] * model.params.values[j]
262
+
263
+ contributions = np.round(
264
+ (X1.sum() / sum(X1.sum()) * 100).sort_values(ascending=False), 2
265
+ )
266
+ contributions = (
267
+ pd.DataFrame(contributions, columns=target)
268
+ .reset_index()
269
+ .rename(columns={"index": "Channel"})
270
+ )
271
+ contributions["Channel"] = [
272
+ re.split(r"_imp|_cli", col)[0] for col in contributions["Channel"]
273
+ ]
274
+
275
+ return contributions
276
+
277
+ def model_fit(features_set, target):
278
+ X = transformed_data[features_set]
279
+ y = transformed_data[target]
280
+ ss = MinMaxScaler()
281
+ X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
282
+ X = sm.add_constant(X)
283
+ X_train = X.iloc[:150]
284
+ X_test = X.iloc[150:]
285
+ y_train = y.iloc[:150]
286
+ y_test = y.iloc[150:]
287
+ model = sm.OLS(y_train, X_train).fit()
288
+ predicted_values_train = model.predict(X_train)
289
+ r2 = model.rsquared
290
+ adjr2 = model.rsquared_adj
291
+ train_mape = mean_absolute_percentage_error(y_train, predicted_values_train)
292
+ test_mape = mean_absolute_percentage_error(y_test, model.predict(X_test))
293
+ summary = model.summary()
294
+ train_contributions = contributions(X_train, model, [target])
295
+ return (
296
+ pd.DataFrame(
297
+ {
298
+ "Model": target,
299
+ "R2": np.round(r2, 2),
300
+ "ADJr2": np.round(adjr2, 2),
301
+ "Train Mape": np.round(train_mape, 2),
302
+ "Test Mape": np.round(test_mape, 2),
303
+ "Summary": summary,
304
+ "Model_object": model,
305
+ },
306
+ index=[0],
307
+ ),
308
+ train_contributions,
309
+ )
310
+
311
+ metrics_table = pd.DataFrame()
312
+
313
+ if "contribution_df" not in st.session_state:
314
+ st.session_state["contribution_df"] = pd.DataFrame()
315
+
316
+ for target, feature_set in feature_set_dct.items():
317
+ metrics_table = pd.concat(
318
+ [metrics_table, model_fit(features_set=feature_set, target=target)[0]]
319
+ )
320
+ if st.session_state["contribution_df"].empty:
321
+ st.session_state["contribution_df"] = model_fit(
322
+ features_set=feature_set, target=target
323
+ )[1]
324
+ else:
325
+ st.session_state["contribution_df"] = pd.merge(
326
+ st.session_state["contribution_df"],
327
+ model_fit(features_set=feature_set, target=target)[1],
328
+ )
329
+
330
+ # st.write(st.session_state["contribution_df"])
331
+
332
+ metrics_table.reset_index(drop=True, inplace=True)
333
+
334
+ eda_columns = st.columns(2)
335
+ with eda_columns[1]:
336
+ eda = st.button(
337
+ "Generate EDA Report",
338
+ help="Click to generate a bivariate report for the selected response metric from the table below.",
339
+ )
340
+
341
+ # st.markdown('Model Metrics')
342
+
343
+ st.title("Contribution Overview")
344
+
345
+ contribution_selections = st.multiselect(
346
+ "Select the models to compare contributions",
347
+ [
348
+ col
349
+ for col in st.session_state["contribution_df"].columns
350
+ if col.lower() != "channel"
351
+ ],
352
+ default=[
353
+ col
354
+ for col in st.session_state["contribution_df"].columns
355
+ if col.lower() != "channel"
356
+ ][-1],
357
+ )
358
+ trace_data = []
359
+
360
+ for selection in contribution_selections:
361
+
362
+ trace = go.Bar(
363
+ x=st.session_state["contribution_df"]["Channel"],
364
+ y=st.session_state["contribution_df"][selection],
365
+ name=selection,
366
+ text=np.round(st.session_state["contribution_df"][selection], 0)
367
+ .astype(int)
368
+ .astype(str)
369
+ + "%",
370
+ textposition="outside",
371
+ )
372
+ trace_data.append(trace)
373
+
374
+ layout = go.Layout(
375
+ title="Metrics Contribution by Channel",
376
+ xaxis=dict(title="Channel Name"),
377
+ yaxis=dict(title="Metrics Contribution"),
378
+ barmode="group",
379
+ )
380
+ fig = go.Figure(data=trace_data, layout=layout)
381
+ st.plotly_chart(fig, use_container_width=True)
382
+
383
+ ############################################ Waterfall Chart ############################################
384
+ # import plotly.graph_objects as go
385
+
386
+ # # Initialize a Plotly figure
387
+ # fig = go.Figure()
388
+
389
+ # for selection in contribution_selections:
390
+ # # Ensure y_values are numeric
391
+ # y_values = st.session_state["contribution_df"][selection].values.astype(float)
392
+
393
+ # # Generating text labels for each bar, ensuring operations are compatible with string formats
394
+ # text_values = [f"{val}%" for val in np.round(y_values, 0).astype(int)]
395
+
396
+ # fig.add_trace(
397
+ # go.Waterfall(
398
+ # name=selection,
399
+ # orientation="v",
400
+ # measure=["relative"]
401
+ # * len(y_values), # Adjust if you have absolute values at certain points
402
+ # x=st.session_state["contribution_df"]["Channel"].tolist(),
403
+ # text=text_values,
404
+ # textposition="outside",
405
+ # y=y_values,
406
+ # increasing={"marker": {"color": "green"}},
407
+ # decreasing={"marker": {"color": "red"}},
408
+ # totals={"marker": {"color": "blue"}},
409
+ # )
410
+ # )
411
+
412
+ # fig.update_layout(
413
+ # title="Metrics Contribution by Channel",
414
+ # xaxis={"title": "Channel Name"},
415
+ # yaxis={"title": "Metrics Contribution"},
416
+ # height=600,
417
+ # )
418
+
419
+ # # Displaying the waterfall chart in Streamlit
420
+ # st.plotly_chart(fig, use_container_width=True)
421
+
422
+ import plotly.graph_objects as go
423
+
424
+ # Initialize a Plotly figure
425
+ fig = go.Figure()
426
+
427
+ for selection in contribution_selections:
428
+ # Ensure contributions are numeric
429
+ contributions = (
430
+ st.session_state["contribution_df"][selection].values.astype(float).tolist()
431
+ )
432
+ channel_names = st.session_state["contribution_df"]["Channel"].tolist()
433
+
434
+ display_name, display_contribution, base_contribution = [], [], 0
435
+ for channel_name, contribution in zip(channel_names, contributions):
436
+ if channel_name != "const":
437
+ display_name.append(channel_name)
438
+ display_contribution.append(contribution)
439
+ else:
440
+ base_contribution = contribution
441
+
442
+ display_name = ["Base Sales"] + display_name
443
+ display_contribution = [base_contribution] + display_contribution
444
+
445
+ # Generating text labels for each bar, ensuring operations are compatible with string formats
446
+ text_values = [
447
+ f"{val}%" for val in np.round(display_contribution, 0).astype(int)
448
+ ]
449
+
450
+ fig.add_trace(
451
+ go.Waterfall(
452
+ orientation="v",
453
+ measure=["relative"]
454
+ * len(
455
+ display_contribution
456
+ ), # Adjust if you have absolute values at certain points
457
+ x=display_name,
458
+ text=text_values,
459
+ textposition="outside",
460
+ y=display_contribution,
461
+ increasing={"marker": {"color": "green"}},
462
+ decreasing={"marker": {"color": "red"}},
463
+ totals={"marker": {"color": "blue"}},
464
+ )
465
+ )
466
+
467
+ fig.update_layout(
468
+ title="Metrics Contribution by Channel",
469
+ xaxis={"title": "Channel Name"},
470
+ yaxis={"title": "Metrics Contribution"},
471
+ height=600,
472
+ )
473
+
474
+ # Displaying the waterfall chart in Streamlit
475
+ st.plotly_chart(fig, use_container_width=True)
476
+
477
+ ############################################ Waterfall Chart ############################################
478
+
479
+ st.title("Analysis of Models Result")
480
+ # st.markdown()
481
+ gd_table = metrics_table.iloc[:, :-2]
482
+
483
+ gd = GridOptionsBuilder.from_dataframe(gd_table)
484
+ # gd.configure_pagination(enabled=True)
485
+ gd.configure_selection(
486
+ use_checkbox=True,
487
+ selection_mode="single",
488
+ pre_select_all_rows=False,
489
+ pre_selected_rows=[1],
490
+ )
491
+
492
+ gridoptions = gd.build()
493
+ table = AgGrid(
494
+ gd_table, gridOptions=gridoptions, fit_columns_on_grid_load=True, height=200
495
+ )
496
+ # table=metrics_table.iloc[:,:-2]
497
+ # table.insert(0, "Select", False)
498
+ # selection_table=st.data_editor(table,column_config={"Select": st.column_config.CheckboxColumn(required=True)})
499
+
500
+ if len(table.selected_rows) == 0:
501
+ st.warning(
502
+ "Click on the checkbox to view comprehensive results of the selected model."
503
+ )
504
+ st.stop()
505
+ else:
506
+ target_column = table.selected_rows[0]["Model"]
507
+ feature_set = feature_set_dct[target_column]
508
+
509
+ with eda_columns[1]:
510
+ if eda:
511
+
512
+ def generate_report_with_target(channel_data, target_feature):
513
+ report = sv.analyze(
514
+ [channel_data, "Dataset"], target_feat=target_feature, verbose=False
515
+ )
516
+ temp_dir = tempfile.mkdtemp()
517
+ report_path = os.path.join(temp_dir, "report.html")
518
+ report.show_html(
519
+ filepath=report_path, open_browser=False
520
+ ) # Generate the report as an HTML file
521
+ return report_path
522
+
523
+ report_data = transformed_data[feature_set]
524
+ report_data[target_column] = transformed_data[target_column]
525
+ report_file = generate_report_with_target(report_data, target_column)
526
+
527
+ if os.path.exists(report_file):
528
+ with open(report_file, "rb") as f:
529
+ st.download_button(
530
+ label="Download EDA Report",
531
+ data=f.read(),
532
+ file_name="report.html",
533
+ mime="text/html",
534
+ )
535
+ else:
536
+ st.warning("Report generation failed. Unable to find the report file.")
537
+
538
+ model = metrics_table[metrics_table["Model"] == target_column]["Model_object"].iloc[
539
+ 0
540
+ ]
541
+ st.header("Model Summary")
542
+ st.write(model.summary())
543
+ X = transformed_data[feature_set]
544
+ ss = MinMaxScaler()
545
+ X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
546
+ X = sm.add_constant(X)
547
+ y = transformed_data[target_column]
548
+ X_train = X.iloc[:150]
549
+ X_test = X.iloc[150:]
550
+ y_train = y.iloc[:150]
551
+ y_test = y.iloc[150:]
552
+ X.index = transformed_data["date"]
553
+ y.index = transformed_data["date"]
554
+
555
+ metrics_table_train, fig_train = plot_actual_vs_predicted(
556
+ X_train.index, y_train, model.predict(X_train), model
557
+ )
558
+ metrics_table_test, fig_test = plot_actual_vs_predicted(
559
+ X_test.index, y_test, model.predict(X_test), model
560
+ )
561
+
562
+ metrics_table_train = metrics_table_train.set_index("Metric").transpose()
563
+ metrics_table_train.index = ["Train"]
564
+ metrics_table_test = metrics_table_test.set_index("Metric").transpose()
565
+ metrics_table_test.index = ["test"]
566
+ metrics_table = np.round(pd.concat([metrics_table_train, metrics_table_test]), 2)
567
+
568
+ st.markdown("Result Overview")
569
+ st.dataframe(np.round(metrics_table, 2), use_container_width=True)
570
+
571
+ st.subheader("Actual vs Predicted Plot Train")
572
+
573
+ st.plotly_chart(fig_train, use_container_width=True)
574
+ st.subheader("Actual vs Predicted Plot Test")
575
+ st.plotly_chart(fig_test, use_container_width=True)
576
+
577
+ st.markdown("## Residual Analysis")
578
+ columns = st.columns(2)
579
+
580
+ Xtrain1 = X_train.copy()
581
+ with columns[0]:
582
+ fig = plot_residual_predicted(y_train, model.predict(Xtrain1), Xtrain1)
583
+ st.plotly_chart(fig)
584
+
585
+ with columns[1]:
586
+ st.empty()
587
+ fig = qqplot(y_train, model.predict(X_train))
588
+ st.plotly_chart(fig)
589
+
590
+ with columns[0]:
591
+ fig = residual_distribution(y_train, model.predict(X_train))
592
+ st.pyplot(fig)
593
+
594
+
595
+ elif auth_status == False:
596
+ st.error("Username/Password is incorrect")
597
+ try:
598
+ username_forgot_pw, email_forgot_password, random_password = (
599
+ authenticator.forgot_password("Forgot password")
600
+ )
601
+ if username_forgot_pw:
602
+ st.success("New password sent securely")
603
+ # Random password to be transferred to the user securely
604
+ elif username_forgot_pw == False:
605
+ st.error("Username not found")
606
+ except Exception as e:
607
+ st.error(e)
pages/5_Model_Tuning_with_panel.py ADDED
@@ -0,0 +1,527 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ MMO Build Sprint 3
3
+ date :
4
+ changes : capability to tune MixedLM as well as simple LR in the same page
5
+ '''
6
+
7
+ import streamlit as st
8
+ import pandas as pd
9
+ from Eda_functions import format_numbers
10
+ import pickle
11
+ from utilities import set_header, load_local_css
12
+ import statsmodels.api as sm
13
+ import re
14
+ from sklearn.preprocessing import MinMaxScaler
15
+ import matplotlib.pyplot as plt
16
+ from statsmodels.stats.outliers_influence import variance_inflation_factor
17
+
18
+ st.set_option('deprecation.showPyplotGlobalUse', False)
19
+ import statsmodels.formula.api as smf
20
+ from Data_prep_functions import *
21
+
22
+ # for i in ["model_tuned", "X_train_tuned", "X_test_tuned", "tuned_model_features", "tuned_model", "tuned_model_dict"] :
23
+
24
+ st.set_page_config(
25
+ page_title="Model Tuning",
26
+ page_icon=":shark:",
27
+ layout="wide",
28
+ initial_sidebar_state='collapsed'
29
+ )
30
+ load_local_css('styles.css')
31
+ set_header()
32
+
33
+ # Sprint3
34
+ # is_panel = st.session_state['is_panel']
35
+ # panel_col = 'markets' # set the panel column
36
+ date_col = 'date'
37
+
38
+ panel_col = [col.lower().replace('.','_').replace('@','_').replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for col in st.session_state['bin_dict']['Panel Level 1'] ] [0]# set the panel column
39
+ is_panel = True if len(panel_col)>0 else False
40
+
41
+
42
+ # flag indicating there is not tuned model till now
43
+
44
+ # Sprint4 - model tuned dict
45
+ if 'Model_Tuned' not in st.session_state:
46
+ st.session_state['Model_Tuned'] = {}
47
+
48
+ st.title('1. Model Tuning')
49
+ # st.write(st.session_state['base_model_feature_set'])
50
+
51
+ if "X_train" not in st.session_state:
52
+ st.error(
53
+ "Oops! It seems there are no saved models available. Please build and save a model from the previous page to proceed.")
54
+ st.stop()
55
+ # X_train=st.session_state['X_train']
56
+ # X_test=st.session_state['X_test']
57
+ # y_train=st.session_state['y_train']
58
+ # y_test=st.session_state['y_test']
59
+ # df=st.session_state['media_data']
60
+
61
+
62
+ # st.write(X_train.columns)
63
+ # st.write(X_test.columns)
64
+ if "is_tuned_model" not in st.session_state:
65
+ st.session_state["is_tuned_model"] = {}
66
+ # Sprint4 - if used_response_metrics is not blank, then select one of the used_response_metrics, else target is revenue by default
67
+ if "used_response_metrics" in st.session_state and st.session_state['used_response_metrics'] != []:
68
+ sel_target_col = st.selectbox("Select the response metric", st.session_state['used_response_metrics'])
69
+ target_col = sel_target_col.lower().replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_")
70
+
71
+ else:
72
+ sel_target_col = 'Total Approved Accounts - Revenue'
73
+ target_col = 'total_approved_accounts_revenue'
74
+
75
+ # Sprint4 - Look through all saved models, only show saved models of the sel resp metric (target_col)
76
+ saved_models = st.session_state['saved_model_names']
77
+ required_saved_models = [m.split("__")[0] for m in saved_models if m.split("__")[1] == target_col]
78
+ sel_model = st.selectbox("Select the model to tune", required_saved_models)
79
+
80
+ with open("best_models.pkl", 'rb') as file:
81
+ model_dict = pickle.load(file)
82
+
83
+ sel_model_dict = model_dict[sel_model + "__" + target_col] # Sprint4 - get the model obj of the selected model
84
+ # st.write(sel_model_dict)
85
+
86
+ X_train = sel_model_dict['X_train']
87
+ X_test = sel_model_dict['X_test']
88
+ y_train = sel_model_dict['y_train']
89
+ y_test = sel_model_dict['y_test']
90
+ df = st.session_state['media_data']
91
+
92
+ if 'selected_model' not in st.session_state:
93
+ st.session_state['selected_model'] = 0
94
+
95
+ # st.write(model_dict[st.session_state["selected_model"]]['X_train'].columns)
96
+
97
+ st.markdown('### 1.1 Event Flags')
98
+ st.markdown('Helps in quantifying the impact of specific occurrences of events')
99
+ with st.expander('Apply Event Flags'):
100
+ # st.session_state["selected_model"]=st.selectbox('Select Model to apply flags',model_dict.keys())
101
+ model = sel_model_dict['Model_object']
102
+ date = st.session_state['date']
103
+ date = pd.to_datetime(date)
104
+ X_train = sel_model_dict['X_train']
105
+
106
+ # features_set= model_dict[st.session_state["selected_model"]]['feature_set']
107
+ features_set = sel_model_dict["feature_set"]
108
+
109
+ col = st.columns(3)
110
+ min_date = min(date)
111
+ max_date = max(date)
112
+ with col[0]:
113
+ start_date = st.date_input('Select Start Date', min_date, min_value=min_date, max_value=max_date)
114
+ with col[1]:
115
+ end_date = st.date_input('Select End Date', max_date, min_value=min_date, max_value=max_date)
116
+ with col[2]:
117
+ repeat = st.selectbox('Repeat Annually', ['Yes', 'No'], index=1)
118
+ if repeat == 'Yes':
119
+ repeat = True
120
+ else:
121
+ repeat = False
122
+
123
+ if 'Flags' not in st.session_state:
124
+ st.session_state['Flags'] = {}
125
+ # print("**"*50)
126
+ # print(y_train)
127
+ # print("**"*50)
128
+ # print(model.fittedvalues)
129
+ if is_panel: # Sprint3
130
+ met, line_values, fig_flag = plot_actual_vs_predicted(X_train[date_col], y_train,
131
+ model.fittedvalues, model,
132
+ target_column=sel_target_col,
133
+ flag=(start_date, end_date),
134
+ repeat_all_years=repeat, is_panel=True)
135
+ st.plotly_chart(fig_flag, use_container_width=True)
136
+
137
+ # create flag on test
138
+ met, test_line_values, fig_flag = plot_actual_vs_predicted(X_test[date_col], y_test,
139
+ sel_model_dict['pred_test'], model,
140
+ target_column=sel_target_col,
141
+ flag=(start_date, end_date),
142
+ repeat_all_years=repeat, is_panel=True)
143
+
144
+ else:
145
+ pred_train=model.predict(X_train[features_set])
146
+ met, line_values, fig_flag = plot_actual_vs_predicted(X_train[date_col], y_train, pred_train, model,
147
+ flag=(start_date, end_date), repeat_all_years=repeat,is_panel=False)
148
+ st.plotly_chart(fig_flag, use_container_width=True)
149
+
150
+ pred_test=model.predict(X_test[features_set])
151
+ met, test_line_values, fig_flag = plot_actual_vs_predicted(X_test[date_col], y_test, pred_test, model,
152
+ flag=(start_date, end_date), repeat_all_years=repeat,is_panel=False)
153
+ flag_name = 'f1_flag'
154
+ flag_name = st.text_input('Enter Flag Name')
155
+ # Sprint4 - add selected target col to flag name
156
+ if st.button('Update flag'):
157
+ st.session_state['Flags'][flag_name + '__'+ target_col] = {}
158
+ st.session_state['Flags'][flag_name + '__'+ target_col]['train'] = line_values
159
+ st.session_state['Flags'][flag_name + '__'+ target_col]['test'] = test_line_values
160
+ # st.write(st.session_state['Flags'][flag_name])
161
+ st.success(f'{flag_name + "__" + target_col} stored')
162
+
163
+ # Sprint4 - only show flag created for the particular target col
164
+ st.write(st.session_state['Flags'].keys() )
165
+ target_model_flags = [f.split("__")[0] for f in st.session_state['Flags'].keys() if f.split("__")[1] == target_col]
166
+ options = list(target_model_flags)
167
+ selected_options = []
168
+ num_columns = 4
169
+ num_rows = -(-len(options) // num_columns)
170
+
171
+ tick = False
172
+ if st.checkbox('Select all'):
173
+ tick = True
174
+ selected_options = []
175
+ for row in range(num_rows):
176
+ cols = st.columns(num_columns)
177
+ for col in cols:
178
+ if options:
179
+ option = options.pop(0)
180
+ selected = col.checkbox(option, value=tick)
181
+ if selected:
182
+ selected_options.append(option)
183
+
184
+ st.markdown('### 1.2 Select Parameters to Apply')
185
+ parameters = st.columns(3)
186
+ with parameters[0]:
187
+ Trend = st.checkbox("**Trend**")
188
+ st.markdown('Helps account for long-term trends or seasonality that could influence advertising effectiveness')
189
+ with parameters[1]:
190
+ week_number = st.checkbox('**Week_number**')
191
+ st.markdown('Assists in detecting and incorporating weekly patterns or seasonality')
192
+ with parameters[2]:
193
+ sine_cosine = st.checkbox('**Sine and Cosine Waves**')
194
+ st.markdown('Helps in capturing cyclical patterns or seasonality in the data')
195
+ #
196
+ # def get_tuned_model():
197
+ # st.session_state['build_tuned_model']=True
198
+
199
+ if st.button('Build model with Selected Parameters and Flags', key='build_tuned_model'):
200
+ new_features = features_set
201
+ st.header('2.1 Results Summary')
202
+ # date=list(df.index)
203
+ # df = df.reset_index(drop=True)
204
+ # st.write(df.head(2))
205
+ # X_train=df[features_set]
206
+ ss = MinMaxScaler()
207
+ if is_panel == True:
208
+ X_train_tuned = X_train[features_set]
209
+ # X_train_tuned = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
210
+ X_train_tuned[target_col] = X_train[target_col]
211
+ X_train_tuned[date_col] = X_train[date_col]
212
+ X_train_tuned[panel_col] = X_train[panel_col]
213
+
214
+ X_test_tuned = X_test[features_set]
215
+ # X_test_tuned = pd.DataFrame(ss.transform(X), columns=X.columns)
216
+ X_test_tuned[target_col] = X_test[target_col]
217
+ X_test_tuned[date_col] = X_test[date_col]
218
+ X_test_tuned[panel_col] = X_test[panel_col]
219
+
220
+ else:
221
+ X_train_tuned = X_train[features_set]
222
+ # X_train_tuned = pd.DataFrame(ss.fit_transform(X_train_tuned), columns=X_train_tuned.columns)
223
+
224
+ X_test_tuned = X_test[features_set]
225
+ # X_test_tuned = pd.DataFrame(ss.transform(X_test_tuned), columns=X_test_tuned.columns)
226
+
227
+ for flag in selected_options:
228
+ # Spirnt4 - added target_col in flag name
229
+ X_train_tuned[flag] = st.session_state['Flags'][flag + "__" + target_col]['train']
230
+ X_test_tuned[flag] = st.session_state['Flags'][flag + "__" + target_col]['test']
231
+
232
+ # test
233
+ # X_train_tuned.to_csv("Test/X_train_tuned_flag.csv",index=False)
234
+ # X_test_tuned.to_csv("Test/X_test_tuned_flag.csv",index=False)
235
+
236
+ # print("()()"*20,flag, len(st.session_state['Flags'][flag]))
237
+ if Trend:
238
+ # Sprint3 - group by panel, calculate trend of each panel spearately. Add trend to new feature set
239
+ if is_panel:
240
+ newdata = pd.DataFrame()
241
+ panel_wise_end_point_train = {}
242
+ for panel, groupdf in X_train_tuned.groupby(panel_col):
243
+ groupdf.sort_values(date_col, inplace=True)
244
+ groupdf['Trend'] = np.arange(1, len(groupdf) + 1, 1)
245
+ newdata = pd.concat([newdata, groupdf])
246
+ panel_wise_end_point_train[panel] = len(groupdf)
247
+ X_train_tuned = newdata.copy()
248
+
249
+ test_newdata = pd.DataFrame()
250
+ for panel, test_groupdf in X_test_tuned.groupby(panel_col):
251
+ test_groupdf.sort_values(date_col, inplace=True)
252
+ start = panel_wise_end_point_train[panel] + 1
253
+ end = start + len(test_groupdf) # should be + 1? - Sprint4
254
+ # print("??"*20, panel, len(test_groupdf), len(np.arange(start, end, 1)), start)
255
+ test_groupdf['Trend'] = np.arange(start, end, 1)
256
+ test_newdata = pd.concat([test_newdata, test_groupdf])
257
+ X_test_tuned = test_newdata.copy()
258
+
259
+ new_features = new_features + ['Trend']
260
+
261
+ else:
262
+ X_train_tuned['Trend'] = np.arange(1, len(X_train_tuned) + 1, 1)
263
+ X_test_tuned['Trend'] = np.arange(len(X_train_tuned) + 1, len(X_train_tuned) + len(X_test_tuned) + 1, 1)
264
+ new_features = new_features + ['Trend']
265
+
266
+
267
+ if week_number:
268
+ # Sprint3 - create weeknumber from date column in xtrain tuned. add week num to new feature set
269
+ if is_panel:
270
+ X_train_tuned[date_col] = pd.to_datetime(X_train_tuned[date_col])
271
+ X_train_tuned['Week_number'] = X_train_tuned[date_col].dt.day_of_week
272
+ if X_train_tuned['Week_number'].nunique() == 1:
273
+ st.write("All dates in the data are of the same week day. Hence Week number can't be used.")
274
+ else:
275
+ X_test_tuned[date_col] = pd.to_datetime(X_test_tuned[date_col])
276
+ X_test_tuned['Week_number'] = X_test_tuned[date_col].dt.day_of_week
277
+ new_features = new_features + ['Week_number']
278
+
279
+ else:
280
+ date = pd.to_datetime(date.values)
281
+ X_train_tuned['Week_number'] = pd.to_datetime(X_train[date_col]).dt.day_of_week
282
+ X_test_tuned['Week_number'] = pd.to_datetime(X_test[date_col]).dt.day_of_week
283
+ new_features = new_features + ['Week_number']
284
+
285
+ if sine_cosine:
286
+ # Sprint3 - create panel wise sine cosine waves in xtrain tuned. add to new feature set
287
+ if is_panel:
288
+ new_features = new_features + ['sine_wave', 'cosine_wave']
289
+ newdata = pd.DataFrame()
290
+ newdata_test = pd.DataFrame()
291
+ groups = X_train_tuned.groupby(panel_col)
292
+ frequency = 2 * np.pi / 365 # Adjust the frequency as needed
293
+
294
+ train_panel_wise_end_point = {}
295
+ for panel, groupdf in groups:
296
+ num_samples = len(groupdf)
297
+ train_panel_wise_end_point[panel] = num_samples
298
+ days_since_start = np.arange(num_samples)
299
+ sine_wave = np.sin(frequency * days_since_start)
300
+ cosine_wave = np.cos(frequency * days_since_start)
301
+ sine_cosine_df = pd.DataFrame({'sine_wave': sine_wave, 'cosine_wave': cosine_wave})
302
+ assert len(sine_cosine_df) == len(groupdf)
303
+ # groupdf = pd.concat([groupdf, sine_cosine_df], axis=1)
304
+ groupdf['sine_wave'] = sine_wave
305
+ groupdf['cosine_wave'] = cosine_wave
306
+ newdata = pd.concat([newdata, groupdf])
307
+
308
+ X_train_tuned = newdata.copy()
309
+
310
+ test_groups = X_test_tuned.groupby(panel_col)
311
+ for panel, test_groupdf in test_groups:
312
+ num_samples = len(test_groupdf)
313
+ start = train_panel_wise_end_point[panel]
314
+ days_since_start = np.arange(start, start + num_samples, 1)
315
+ # print("##", panel, num_samples, start, len(np.arange(start, start+num_samples, 1)))
316
+ sine_wave = np.sin(frequency * days_since_start)
317
+ cosine_wave = np.cos(frequency * days_since_start)
318
+ sine_cosine_df = pd.DataFrame({'sine_wave': sine_wave, 'cosine_wave': cosine_wave})
319
+ assert len(sine_cosine_df) == len(test_groupdf)
320
+ # groupdf = pd.concat([groupdf, sine_cosine_df], axis=1)
321
+ test_groupdf['sine_wave'] = sine_wave
322
+ test_groupdf['cosine_wave'] = cosine_wave
323
+ newdata_test = pd.concat([newdata_test, test_groupdf])
324
+
325
+ X_test_tuned = newdata_test.copy()
326
+
327
+
328
+ else:
329
+ new_features = new_features + ['sine_wave', 'cosine_wave']
330
+
331
+ num_samples = len(X_train_tuned)
332
+ frequency = 2 * np.pi / 365 # Adjust the frequency as needed
333
+ days_since_start = np.arange(num_samples)
334
+ sine_wave = np.sin(frequency * days_since_start)
335
+ cosine_wave = np.cos(frequency * days_since_start)
336
+ sine_cosine_df = pd.DataFrame({'sine_wave': sine_wave, 'cosine_wave': cosine_wave})
337
+ # Concatenate the sine and cosine waves with the scaled X DataFrame
338
+ X_train_tuned = pd.concat([X_train_tuned, sine_cosine_df], axis=1)
339
+
340
+ test_num_samples = len(X_test_tuned)
341
+ start = num_samples
342
+ days_since_start = np.arange(start, start + test_num_samples, 1)
343
+ sine_wave = np.sin(frequency * days_since_start)
344
+ cosine_wave = np.cos(frequency * days_since_start)
345
+ sine_cosine_df = pd.DataFrame({'sine_wave': sine_wave, 'cosine_wave': cosine_wave})
346
+ # Concatenate the sine and cosine waves with the scaled X DataFrame
347
+ X_test_tuned = pd.concat([X_test_tuned, sine_cosine_df], axis=1)
348
+
349
+ # model
350
+ if selected_options:
351
+ new_features = new_features + selected_options
352
+ if is_panel:
353
+ inp_vars_str = " + ".join(new_features)
354
+ new_features=list(set(new_features))
355
+ # X_train_tuned.to_csv("Test/X_train_tuned.csv",index=False)
356
+ # st.write(X_train_tuned[['total_approved_accounts_revenue'] + new_features].dtypes)
357
+ # st.write(X_train_tuned[['total_approved_accounts_revenue', panel_col] + new_features].isna().sum())
358
+ md_str = target_col + " ~ " + inp_vars_str
359
+ md_tuned = smf.mixedlm(md_str,
360
+ data=X_train_tuned[[target_col] + new_features],
361
+ groups=X_train_tuned[panel_col])
362
+ model_tuned = md_tuned.fit()
363
+
364
+ # plot act v pred for original model and tuned model
365
+ metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(X_train[date_col], y_train,
366
+ model.fittedvalues, model,
367
+ target_column=sel_target_col,
368
+ is_panel=True)
369
+ metrics_table_tuned, line, actual_vs_predicted_plot_tuned = plot_actual_vs_predicted(X_train_tuned[date_col],
370
+ X_train_tuned[target_col],
371
+ model_tuned.fittedvalues,
372
+ model_tuned,
373
+ target_column=sel_target_col,
374
+ is_panel=True)
375
+
376
+ else:
377
+ new_features=list(set(new_features))
378
+ # st.write(new_features)
379
+ model_tuned = sm.OLS(y_train, X_train_tuned[new_features]).fit()
380
+ # st.write(X_train_tuned.columns)
381
+ metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(date[:130], y_train,
382
+ model.predict(X_train[features_set]), model,
383
+ target_column=sel_target_col)
384
+ metrics_table_tuned, line, actual_vs_predicted_plot_tuned = plot_actual_vs_predicted(date[:130], y_train,
385
+ model_tuned.predict(
386
+ X_train_tuned),
387
+ model_tuned,
388
+ target_column=sel_target_col)
389
+
390
+ # st.write(metrics_table_tuned)
391
+ mape = np.round(metrics_table.iloc[0, 1], 2)
392
+ r2 = np.round(metrics_table.iloc[1, 1], 2)
393
+ adjr2 = np.round(metrics_table.iloc[2, 1], 2)
394
+
395
+ mape_tuned = np.round(metrics_table_tuned.iloc[0, 1], 2)
396
+ r2_tuned = np.round(metrics_table_tuned.iloc[1, 1], 2)
397
+ adjr2_tuned = np.round(metrics_table_tuned.iloc[2, 1], 2)
398
+
399
+ parameters_ = st.columns(3)
400
+ with parameters_[0]:
401
+ st.metric('R2', r2_tuned, np.round(r2_tuned - r2, 2))
402
+ with parameters_[1]:
403
+ st.metric('Adjusted R2', adjr2_tuned, np.round(adjr2_tuned - adjr2, 2))
404
+ with parameters_[2]:
405
+ st.metric('MAPE', mape_tuned, np.round(mape_tuned - mape, 2), 'inverse')
406
+ st.write(model_tuned.summary())
407
+
408
+ X_train_tuned[date_col] = X_train[date_col]
409
+ X_test_tuned[date_col] = X_test[date_col]
410
+ X_train_tuned[target_col] = y_train
411
+ X_test_tuned[target_col] = y_test
412
+
413
+ st.header('2.2 Actual vs. Predicted Plot')
414
+ # if is_panel:
415
+ # metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(date, y_train, model.predict(X_train),
416
+ # model, target_column='Revenue',is_panel=True)
417
+ # else:
418
+ # metrics_table,line,actual_vs_predicted_plot=plot_actual_vs_predicted(date, y_train, model.predict(X_train), model,target_column='Revenue')
419
+ if is_panel :
420
+ metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(X_train_tuned[date_col],
421
+ X_train_tuned[target_col],
422
+ model_tuned.fittedvalues, model_tuned,
423
+ target_column=sel_target_col,
424
+ is_panel=True)
425
+ else :
426
+ metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(X_train_tuned[date_col],
427
+ X_train_tuned[target_col],
428
+ model_tuned.predict(X_train_tuned[new_features]),
429
+ model_tuned,
430
+ target_column=sel_target_col,
431
+ is_panel=False)
432
+ # plot_actual_vs_predicted(X_train[date_col], y_train,
433
+ # model.fittedvalues, model,
434
+ # target_column='Revenue',
435
+ # is_panel=is_panel)
436
+
437
+ st.plotly_chart(actual_vs_predicted_plot, use_container_width=True)
438
+
439
+ st.markdown('## 2.3 Residual Analysis')
440
+ if is_panel :
441
+ columns = st.columns(2)
442
+ with columns[0]:
443
+ fig = plot_residual_predicted(y_train, model_tuned.fittedvalues, X_train_tuned)
444
+ st.plotly_chart(fig)
445
+
446
+ with columns[1]:
447
+ st.empty()
448
+ fig = qqplot(y_train, model_tuned.fittedvalues)
449
+ st.plotly_chart(fig)
450
+
451
+ with columns[0]:
452
+ fig = residual_distribution(y_train, model_tuned.fittedvalues)
453
+ st.pyplot(fig)
454
+ else:
455
+ columns = st.columns(2)
456
+ with columns[0]:
457
+ fig = plot_residual_predicted(y_train, model_tuned.predict(X_train_tuned[new_features]), X_train)
458
+ st.plotly_chart(fig)
459
+
460
+ with columns[1]:
461
+ st.empty()
462
+ fig = qqplot(y_train, model_tuned.predict(X_train_tuned[new_features]))
463
+ st.plotly_chart(fig)
464
+
465
+ with columns[0]:
466
+ fig = residual_distribution(y_train, model_tuned.predict(X_train_tuned[new_features]))
467
+ st.pyplot(fig)
468
+
469
+ st.session_state['is_tuned_model'][target_col] = True
470
+ # Sprint4 - saved tuned model in a dict
471
+ st.session_state['Model_Tuned'][sel_model + "__" + target_col] = {
472
+ "Model_object": model_tuned,
473
+ 'feature_set': new_features,
474
+ 'X_train_tuned': X_train_tuned,
475
+ 'X_test_tuned': X_test_tuned
476
+ }
477
+
478
+ # Pending
479
+ # if st.session_state['build_tuned_model']==True:
480
+ if st.session_state['Model_Tuned'] is not None :
481
+ if st.checkbox('Use this model to build response curves', key='save_model'):
482
+ # save_model = st.button('Use this model to build response curves', key='saved_tuned_model')
483
+ # if save_model:
484
+ st.session_state["is_tuned_model"][target_col]=True
485
+ with open("tuned_model.pkl", "wb") as f:
486
+ # pickle.dump(st.session_state['tuned_model'], f)
487
+ pickle.dump(st.session_state['Model_Tuned'], f) # Sprint4
488
+
489
+ # X_test_tuned.to_csv("Test/X_test_tuned_final.csv", index=False)
490
+ # X_train_tuned.to_csv("Test/X_train_tuned.csv", index=False)
491
+ st.success(sel_model + "__" + target_col + ' Tuned saved!')
492
+
493
+
494
+ # if is_panel:
495
+ # # st.session_state["tuned_model_features"] = new_features
496
+ # with open("tuned_model.pkl", "wb") as f:
497
+ # # pickle.dump(st.session_state['tuned_model'], f)
498
+ # pickle.dump(st.session_state['Model_Tuned'], f) # Sprint4
499
+ # st.success(sel_model + "__" + target_col + ' Tuned saved!')
500
+
501
+ # raw_data=df[features_set]
502
+ # columns_raw=[re.split(r"(_lag|_adst)",col)[0] for col in raw_data.columns]
503
+ # raw_data.columns=columns_raw
504
+ # columns_media=[col for col in columns_raw if Categorised_data[col]['BB']=='Media']
505
+ # raw_data=raw_data[columns_media]
506
+
507
+ # raw_data['Date']=list(df.index)
508
+
509
+ # spends_var=[col for col in df.columns if "spends" in col.lower() and 'adst' not in col.lower() and 'lag' not in col.lower()]
510
+ # spends_df=df[spends_var]
511
+ # spends_df['Week']=list(df.index)
512
+
513
+
514
+ # j=0
515
+ # X1=X.copy()
516
+ # col=X1.columns
517
+ # for i in model.params.values:
518
+ # X1[col[j]]=X1.iloc[:,j]*i
519
+ # j+=1
520
+ # contribution_df=X1
521
+ # contribution_df['Date']=list(df.index)
522
+ # excel_file='Overview_data.xlsx'
523
+
524
+ # with pd.ExcelWriter(excel_file,engine='xlsxwriter') as writer:
525
+ # raw_data.to_excel(writer,sheet_name='RAW DATA MMM',index=False)
526
+ # spends_df.to_excel(writer,sheet_name='SPEND INPUT',index=False)
527
+ # contribution_df.to_excel(writer,sheet_name='CONTRIBUTION MMM')
pages/6_Model_Result_Overview.py ADDED
@@ -0,0 +1,348 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ MMO Build Sprint 3
3
+ additions : contributions calculated using tuned Mixed LM model
4
+ pending : contributions calculations using - 1. not tuned Mixed LM model, 2. tuned OLS model, 3. not tuned OLS model
5
+
6
+ MMO Build Sprint 4
7
+ additions : response metrics selection
8
+ pending : contributions calculations using - 1. not tuned Mixed LM model, 2. tuned OLS model, 3. not tuned OLS model
9
+ '''
10
+
11
+ import streamlit as st
12
+ import pandas as pd
13
+ from sklearn.preprocessing import MinMaxScaler
14
+ import pickle
15
+
16
+
17
+
18
+ from utilities_with_panel import (set_header,
19
+ overview_test_data_prep_panel,
20
+ overview_test_data_prep_nonpanel,
21
+ initialize_data,
22
+ load_local_css,
23
+ create_channel_summary,
24
+ create_contribution_pie,
25
+ create_contribuion_stacked_plot,
26
+ create_channel_spends_sales_plot,
27
+ format_numbers,
28
+ channel_name_formating)
29
+
30
+ import plotly.graph_objects as go
31
+ import streamlit_authenticator as stauth
32
+ import yaml
33
+ from yaml import SafeLoader
34
+ import time
35
+
36
+ st.set_page_config(layout='wide')
37
+ load_local_css('styles.css')
38
+ set_header()
39
+
40
+
41
+ def get_random_effects(media_data, panel_col, mdf):
42
+ random_eff_df = pd.DataFrame(columns=[panel_col, "random_effect"])
43
+
44
+ for i, market in enumerate(media_data[panel_col].unique()):
45
+ print(i, end='\r')
46
+ intercept = mdf.random_effects[market].values[0]
47
+ random_eff_df.loc[i, 'random_effect'] = intercept
48
+ random_eff_df.loc[i, panel_col] = market
49
+
50
+ return random_eff_df
51
+
52
+
53
+ def process_train_and_test(train, test, features, panel_col, target_col):
54
+ X1 = train[features]
55
+
56
+ ss = MinMaxScaler()
57
+ X1 = pd.DataFrame(ss.fit_transform(X1), columns=X1.columns)
58
+
59
+ X1[panel_col] = train[panel_col]
60
+ X1[target_col] = train[target_col]
61
+
62
+ if test is not None:
63
+ X2 = test[features]
64
+ X2 = pd.DataFrame(ss.transform(X2), columns=X2.columns)
65
+ X2[panel_col] = test[panel_col]
66
+ X2[target_col] = test[target_col]
67
+ return X1, X2
68
+ return X1
69
+
70
+ def mdf_predict(X_df, mdf, random_eff_df) :
71
+ X=X_df.copy()
72
+ X=pd.merge(X, random_eff_df[[panel_col,'random_effect']], on=panel_col, how='left')
73
+ X['pred_fixed_effect'] = mdf.predict(X)
74
+
75
+ X['pred'] = X['pred_fixed_effect'] + X['random_effect']
76
+ X.to_csv('Test/merged_df_contri.csv',index=False)
77
+ X.drop(columns=['pred_fixed_effect', 'random_effect'], inplace=True)
78
+
79
+ return X
80
+
81
+
82
+ target='Revenue'
83
+
84
+ # is_panel=False
85
+ # is_panel = st.session_state['is_panel']
86
+ panel_col = [col.lower().replace('.','_').replace('@','_').replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for col in st.session_state['bin_dict']['Panel Level 1'] ] [0]# set the panel column
87
+ date_col = 'date'
88
+
89
+ #st.write(media_data)
90
+
91
+ is_panel = True if len(panel_col)>0 else False
92
+
93
+ # panel_col='markets'
94
+ date_col = 'date'
95
+
96
+ # Sprint4 - if used_response_metrics is not blank, then select one of the used_response_metrics, else target is revenue by default
97
+ if "used_response_metrics" in st.session_state and st.session_state['used_response_metrics']!=[]:
98
+ sel_target_col = st.selectbox("Select the response metric", st.session_state['used_response_metrics'])
99
+ target_col = sel_target_col.lower().replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_")
100
+ else :
101
+ sel_target_col = 'Total Approved Accounts - Revenue'
102
+ target_col = 'total_approved_accounts_revenue'
103
+
104
+ # Sprint4 - Look through all saved tuned models, only show saved models of the sel resp metric (target_col)
105
+ # saved_models = st.session_state['saved_model_names']
106
+ # Sprint4 - get the model obj of the selected model
107
+ # st.write(sel_model_dict)
108
+
109
+ # Sprint3 - Contribution
110
+ if is_panel:
111
+ # read tuned mixedLM model
112
+ # if st.session_state["tuned_model"] is not None :
113
+
114
+ if st.session_state["is_tuned_model"][target_col]==True: #Sprint4
115
+ with open("tuned_model.pkl", 'rb') as file:
116
+ model_dict = pickle.load(file)
117
+ saved_models = list(model_dict.keys())
118
+ required_saved_models = [m.split("__")[0] for m in saved_models if m.split("__")[1] == target_col]
119
+ sel_model = st.selectbox("Select the model to review", required_saved_models)
120
+ sel_model_dict = model_dict[sel_model + "__" + target_col]
121
+
122
+ # model=st.session_state["tuned_model"]
123
+ # X_train=st.session_state["X_train_tuned"]
124
+ # X_test=st.session_state["X_test_tuned"]
125
+ # best_feature_set=st.session_state["tuned_model_features"]
126
+ model=sel_model_dict["Model_object"]
127
+ X_train=sel_model_dict["X_train_tuned"]
128
+ X_test=sel_model_dict["X_test_tuned"]
129
+ best_feature_set=sel_model_dict["feature_set"]
130
+
131
+ # st.write("features", best_feature_set)
132
+ # st.write(X_test.columns)
133
+
134
+ else : # if non tuned model to be used # Pending
135
+ with open("best_models.pkl", 'rb') as file:
136
+ model_dict = pickle.load(file)
137
+ saved_models = list(model_dict.keys())
138
+ required_saved_models = [m.split("__")[0] for m in saved_models if m.split("__")[1] == target_col]
139
+ sel_model = st.selectbox("Select the model to review", required_saved_models)
140
+ sel_model_dict = model_dict[sel_model + "__" + target_col]
141
+ model=st.session_state["base_model"]
142
+ X_train = st.session_state['X_train']
143
+ X_test = st.session_state['X_test']
144
+ # y_train = st.session_state['y_train']
145
+ # y_test = st.session_state['y_test']
146
+ best_feature_set = st.session_state['base_model_feature_set']
147
+ # st.write(best_feature_set)
148
+ # st.write(X_test.columns)
149
+
150
+ # Calculate contributions
151
+
152
+ with open("data_import.pkl", "rb") as f:
153
+ data = pickle.load(f)
154
+
155
+ # Accessing the loaded objects
156
+ st.session_state['orig_media_data'] = data["final_df"]
157
+
158
+ st.session_state['orig_media_data'].columns=[col.lower().replace('.','_').replace('@','_').replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for col in st.session_state['orig_media_data'].columns]
159
+
160
+ media_data = st.session_state["media_data"]
161
+
162
+
163
+ # st.session_state['orig_media_data']=st.session_state["media_data"]
164
+
165
+ #st.write(media_data)
166
+
167
+ contri_df = pd.DataFrame()
168
+
169
+ y = []
170
+ y_pred = []
171
+
172
+ random_eff_df = get_random_effects(media_data, panel_col, model)
173
+ random_eff_df['fixed_effect'] = model.fe_params['Intercept']
174
+ random_eff_df['panel_effect'] = random_eff_df['random_effect'] + random_eff_df['fixed_effect']
175
+ # random_eff_df.to_csv("Test/random_eff_df_contri.csv", index=False)
176
+
177
+ coef_df = pd.DataFrame(model.fe_params)
178
+ coef_df.columns = ['coef']
179
+
180
+ # coef_df.reset_index().to_csv("Test/coef_df_contri1.csv",index=False)
181
+ # print(model.fe_params)
182
+
183
+ x_train_contribution = X_train.copy()
184
+ x_test_contribution = X_test.copy()
185
+
186
+ # preprocessing not needed since X_train is already preprocessed
187
+ # X1, X2 = process_train_and_test(x_train_contribution, x_test_contribution, best_feature_set, panel_col, target_col)
188
+ # x_train_contribution[best_feature_set] = X1[best_feature_set]
189
+ # x_test_contribution[best_feature_set] = X2[best_feature_set]
190
+
191
+ x_train_contribution = mdf_predict(x_train_contribution, model, random_eff_df)
192
+ x_test_contribution = mdf_predict(x_test_contribution, model, random_eff_df)
193
+
194
+ x_train_contribution = pd.merge(x_train_contribution, random_eff_df[[panel_col, 'panel_effect']], on=panel_col,
195
+ how='left')
196
+ x_test_contribution = pd.merge(x_test_contribution, random_eff_df[[panel_col, 'panel_effect']], on=panel_col,
197
+ how='left')
198
+
199
+ inp_coef = coef_df['coef'][1:].tolist() # 0th index is intercept
200
+
201
+ for i in range(len(inp_coef)):
202
+ x_train_contribution[str(best_feature_set[i]) + "_contr"] = inp_coef[i] * x_train_contribution[best_feature_set[i]]
203
+ x_test_contribution[str(best_feature_set[i]) + "_contr"] = inp_coef[i] * x_test_contribution[best_feature_set[i]]
204
+
205
+ x_train_contribution['sum_contributions'] = x_train_contribution.filter(regex="contr").sum(axis=1)
206
+ x_train_contribution['sum_contributions'] = x_train_contribution['sum_contributions'] + x_train_contribution['panel_effect']
207
+
208
+ x_test_contribution['sum_contributions'] = x_test_contribution.filter(regex="contr").sum(axis=1)
209
+ x_test_contribution['sum_contributions'] = x_test_contribution['sum_contributions'] + x_test_contribution['panel_effect']
210
+
211
+ # # test
212
+ x_train_contribution.to_csv("Test/x_train_contribution.csv",index=False)
213
+ x_test_contribution.to_csv("Test/x_test_contribution.csv",index=False)
214
+ #
215
+ # st.session_state['orig_media_data'].to_csv("Test/transformed_data.csv",index=False)
216
+ # st.session_state['X_test_spends'].to_csv("Test/test_spends.csv",index=False)
217
+ # # st.write(st.session_state['orig_media_data'].columns)
218
+
219
+ st.write(date_col,panel_col)
220
+ # st.write(x_test_contribution)
221
+
222
+ overview_test_data_prep_panel(x_test_contribution, st.session_state['orig_media_data'], st.session_state['X_test_spends'],
223
+ date_col, panel_col, target_col)
224
+
225
+ else : # NON PANEL
226
+ if st.session_state["is_tuned_model"][target_col]==True: #Sprint4
227
+ with open("tuned_model.pkl", 'rb') as file:
228
+ model_dict = pickle.load(file)
229
+ saved_models = list(model_dict.keys())
230
+ required_saved_models = [m.split("__")[0] for m in saved_models if m.split("__")[1] == target_col]
231
+ sel_model = st.selectbox("Select the model to review", required_saved_models)
232
+ sel_model_dict = model_dict[sel_model + "__" + target_col]
233
+
234
+ model=sel_model_dict["Model_object"]
235
+ X_train=sel_model_dict["X_train_tuned"]
236
+ X_test=sel_model_dict["X_test_tuned"]
237
+ best_feature_set=sel_model_dict["feature_set"]
238
+
239
+ else : #Sprint4
240
+ with open("best_models.pkl", 'rb') as file:
241
+ model_dict = pickle.load(file)
242
+ saved_models = list(model_dict.keys())
243
+ required_saved_models = [m.split("__")[0] for m in saved_models if m.split("__")[1] == target_col]
244
+ sel_model = st.selectbox("Select the model to review", required_saved_models)
245
+ sel_model_dict = model_dict[sel_model + "__" + target_col]
246
+
247
+ model=sel_model_dict["Model_object"]
248
+ X_train=sel_model_dict["X_train"]
249
+ X_test=sel_model_dict["X_test"]
250
+ best_feature_set=sel_model_dict["feature_set"]
251
+
252
+ x_train_contribution = X_train.copy()
253
+ x_test_contribution = X_test.copy()
254
+
255
+ x_train_contribution['pred'] = model.predict(x_train_contribution[best_feature_set])
256
+ x_test_contribution['pred'] = model.predict(x_test_contribution[best_feature_set])
257
+
258
+ for num,i in enumerate(model.params.values):
259
+ col=best_feature_set[num]
260
+ x_train_contribution[col + "_contr"] = X_train[col] * i
261
+ x_test_contribution[col + "_contr"] = X_test[col] * i
262
+
263
+ x_test_contribution.to_csv("Test/x_test_contribution_non_panel.csv",index=False)
264
+ overview_test_data_prep_nonpanel(x_test_contribution, st.session_state['orig_media_data'].copy(), st.session_state['X_test_spends'].copy(), date_col, target_col)
265
+ # for k, v in st.session_sta
266
+ # te.items():
267
+
268
+ # if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
269
+ # st.session_state[k] = v
270
+
271
+ # authenticator = st.session_state.get('authenticator')
272
+
273
+ # if authenticator is None:
274
+ # authenticator = load_authenticator()
275
+
276
+ # name, authentication_status, username = authenticator.login('Login', 'main')
277
+ # auth_status = st.session_state['authentication_status']
278
+
279
+ # if auth_status:
280
+ # authenticator.logout('Logout', 'main')
281
+
282
+ # is_state_initiaized = st.session_state.get('initialized',False)
283
+ # if not is_state_initiaized:
284
+
285
+ initialize_data(target_col)
286
+ scenario = st.session_state['scenario']
287
+ raw_df = st.session_state['raw_df']
288
+ st.header('Overview of previous spends')
289
+
290
+ # st.write(scenario.actual_total_spends)
291
+ # st.write(scenario.actual_total_sales)
292
+ columns = st.columns((1,1,3))
293
+
294
+ with columns[0]:
295
+ st.metric(label='Spends', value=format_numbers(float(scenario.actual_total_spends)))
296
+ ###print(f"##################### {scenario.actual_total_sales} ##################")
297
+ with columns[1]:
298
+ st.metric(label=target, value=format_numbers(float(scenario.actual_total_sales),include_indicator=False))
299
+
300
+
301
+ actual_summary_df = create_channel_summary(scenario)
302
+ actual_summary_df['Channel'] = actual_summary_df['Channel'].apply(channel_name_formating)
303
+
304
+ columns = st.columns((2,1))
305
+ with columns[0]:
306
+ with st.expander('Channel wise overview'):
307
+ st.markdown(actual_summary_df.style.set_table_styles(
308
+ [{
309
+ 'selector': 'th',
310
+ 'props': [('background-color', '#11B6BD')]
311
+ },
312
+ {
313
+ 'selector' : 'tr:nth-child(even)',
314
+ 'props' : [('background-color', '#11B6BD')]
315
+ }]).to_html(), unsafe_allow_html=True)
316
+
317
+ st.markdown("<hr>",unsafe_allow_html=True)
318
+ ##############################
319
+
320
+ st.plotly_chart(create_contribution_pie(scenario),use_container_width=True)
321
+ st.markdown("<hr>",unsafe_allow_html=True)
322
+
323
+
324
+ ################################3
325
+ st.plotly_chart(create_contribuion_stacked_plot(scenario),use_container_width=True)
326
+ st.markdown("<hr>",unsafe_allow_html=True)
327
+ #######################################
328
+
329
+ selected_channel_name = st.selectbox('Channel', st.session_state['channels_list'] + ['non media'], format_func=channel_name_formating)
330
+ selected_channel = scenario.channels.get(selected_channel_name,None)
331
+
332
+ st.plotly_chart(create_channel_spends_sales_plot(selected_channel), use_container_width=True)
333
+
334
+ st.markdown("<hr>",unsafe_allow_html=True)
335
+
336
+ # elif auth_status == False:
337
+ # st.error('Username/Password is incorrect')
338
+
339
+ # if auth_status != True:
340
+ # try:
341
+ # username_forgot_pw, email_forgot_password, random_password = authenticator.forgot_password('Forgot password')
342
+ # if username_forgot_pw:
343
+ # st.success('New password sent securely')
344
+ # # Random password to be transferred to user securely
345
+ # elif username_forgot_pw == False:
346
+ # st.error('Username not found')
347
+ # except Exception as e:
348
+ # st.error(e)
pages/7_Build_Response_Curves.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import plotly.express as px
3
+ import numpy as np
4
+ import plotly.graph_objects as go
5
+ from utilities_with_panel import channel_name_formating, load_authenticator, initialize_data
6
+ from sklearn.metrics import r2_score
7
+ from collections import OrderedDict
8
+ from classes import class_from_dict,class_to_dict
9
+ import pickle
10
+ import json
11
+ from utilities import (
12
+ load_local_css,
13
+ set_header,
14
+ channel_name_formating,
15
+ )
16
+
17
+ for k, v in st.session_state.items():
18
+ if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
19
+ st.session_state[k] = v
20
+
21
+ def s_curve(x,K,b,a,x0):
22
+ return K / (1 + b*np.exp(-a*(x-x0)))
23
+
24
+ def save_scenario(scenario_name):
25
+ """
26
+ Save the current scenario with the mentioned name in the session state
27
+
28
+ Parameters
29
+ ----------
30
+ scenario_name
31
+ Name of the scenario to be saved
32
+ """
33
+ if 'saved_scenarios' not in st.session_state:
34
+ st.session_state = OrderedDict()
35
+
36
+ #st.session_state['saved_scenarios'][scenario_name] = st.session_state['scenario'].save()
37
+ st.session_state['saved_scenarios'][scenario_name] = class_to_dict(st.session_state['scenario'])
38
+ st.session_state['scenario_input'] = ""
39
+ print(type(st.session_state['saved_scenarios']))
40
+ with open('../saved_scenarios.pkl', 'wb') as f:
41
+ pickle.dump(st.session_state['saved_scenarios'],f)
42
+
43
+
44
+ def reset_curve_parameters():
45
+ del st.session_state['K']
46
+ del st.session_state['b']
47
+ del st.session_state['a']
48
+ del st.session_state['x0']
49
+
50
+ def update_response_curve():
51
+ # st.session_state['rcs'][selected_channel_name]['K'] = st.session_state['K']
52
+ # st.session_state['rcs'][selected_channel_name]['b'] = st.session_state['b']
53
+ # st.session_state['rcs'][selected_channel_name]['a'] = st.session_state['a']
54
+ # st.session_state['rcs'][selected_channel_name]['x0'] = st.session_state['x0']
55
+ # rcs = st.session_state['rcs']
56
+ _channel_class = st.session_state['scenario'].channels[selected_channel_name]
57
+ _channel_class.update_response_curves({
58
+ 'K' : st.session_state['K'],
59
+ 'b' : st.session_state['b'],
60
+ 'a' : st.session_state['a'],
61
+ 'x0' : st.session_state['x0']})
62
+
63
+
64
+ # authenticator = st.session_state.get('authenticator')
65
+ # if authenticator is None:
66
+ # authenticator = load_authenticator()
67
+
68
+ # name, authentication_status, username = authenticator.login('Login', 'main')
69
+ # auth_status = st.session_state.get('authentication_status')
70
+
71
+ # if auth_status == True:
72
+ # is_state_initiaized = st.session_state.get('initialized',False)
73
+ # if not is_state_initiaized:
74
+ # print("Scenario page state reloaded")
75
+
76
+ # Sprint4 - if used_response_metrics is not blank, then select one of the used_response_metrics, else target is revenue by default
77
+ st.set_page_config(layout='wide')
78
+ load_local_css('styles.css')
79
+ set_header()
80
+
81
+ if "used_response_metrics" in st.session_state and st.session_state['used_response_metrics']!=[]:
82
+ sel_target_col = st.selectbox("Select the response metric", st.session_state['used_response_metrics'])
83
+ target_col = sel_target_col.lower().replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_")
84
+ else :
85
+ sel_target_col = 'Total Approved Accounts - Revenue'
86
+ target_col = 'total_approved_accounts_revenue'
87
+
88
+ initialize_data(target_col)
89
+
90
+ st.subheader("Build response curves")
91
+
92
+ channels_list = st.session_state['channels_list']
93
+ selected_channel_name = st.selectbox('Channel', st.session_state['channels_list'] + ['Others'], format_func=channel_name_formating,on_change=reset_curve_parameters)
94
+
95
+ rcs = {}
96
+ for channel_name in channels_list:
97
+ rcs[channel_name] = st.session_state['scenario'].channels[channel_name].response_curve_params
98
+ # rcs = st.session_state['rcs']
99
+
100
+
101
+ if 'K' not in st.session_state:
102
+ st.session_state['K'] = rcs[selected_channel_name]['K']
103
+ if 'b' not in st.session_state:
104
+ st.session_state['b'] = rcs[selected_channel_name]['b']
105
+ if 'a' not in st.session_state:
106
+ st.session_state['a'] = rcs[selected_channel_name]['a']
107
+ if 'x0' not in st.session_state:
108
+ st.session_state['x0'] = rcs[selected_channel_name]['x0']
109
+
110
+ x = st.session_state['actual_input_df'][selected_channel_name].values
111
+ y = st.session_state['actual_contribution_df'][selected_channel_name].values
112
+
113
+ power = (np.ceil(np.log(x.max()) / np.log(10) )- 3)
114
+
115
+ # fig = px.scatter(x, s_curve(x/10**power,
116
+ # st.session_state['K'],
117
+ # st.session_state['b'],
118
+ # st.session_state['a'],
119
+ # st.session_state['x0']))
120
+
121
+ fig = px.scatter(x=x, y=y)
122
+ fig.add_trace(go.Scatter(x=sorted(x), y=s_curve(sorted(x)/10**power,st.session_state['K'],
123
+ st.session_state['b'],
124
+ st.session_state['a'],
125
+ st.session_state['x0']),
126
+ line=dict(color='red')))
127
+
128
+ fig.update_layout(title_text="Response Curve",showlegend=False)
129
+ fig.update_annotations(font_size=10)
130
+ fig.update_xaxes(title='Spends')
131
+ fig.update_yaxes(title=sel_target_col)
132
+
133
+ st.plotly_chart(fig,use_container_width=True)
134
+
135
+ r2 = r2_score(y, s_curve(x / 10**power,
136
+ st.session_state['K'],
137
+ st.session_state['b'],
138
+ st.session_state['a'],
139
+ st.session_state['x0']))
140
+
141
+ st.metric('R2',round(r2,2))
142
+ columns = st.columns(4)
143
+
144
+ with columns[0]:
145
+ st.number_input('K',key='K',format="%0.5f")
146
+ with columns[1]:
147
+ st.number_input('b',key='b',format="%0.5f")
148
+ with columns[2]:
149
+ st.number_input('a',key='a',step=0.0001,format="%0.5f")
150
+ with columns[3]:
151
+ st.number_input('x0',key='x0',format="%0.5f")
152
+
153
+
154
+ st.button('Update parameters',on_click=update_response_curve)
155
+ st.button('Reset parameters',on_click=reset_curve_parameters)
156
+ scenario_name = st.text_input('Scenario name', key='scenario_input',placeholder='Scenario name',label_visibility='collapsed')
157
+ st.button('Save', on_click=lambda : save_scenario(scenario_name),disabled=len(st.session_state['scenario_input']) == 0)
158
+
159
+ file_name = st.text_input('rcs download file name', key='file_name_input',placeholder='file name',label_visibility='collapsed')
160
+ st.download_button(
161
+ label="Download response curves",
162
+ data=json.dumps(rcs),
163
+ file_name=f"{file_name}.json",
164
+ mime="application/json",
165
+ disabled= len(file_name) == 0,
166
+ )
167
+
168
+
169
+ def s_curve_derivative(x, K, b, a, x0):
170
+ # Derivative of the S-curve function
171
+ return a * b * K * np.exp(-a * (x - x0)) / ((1 + b * np.exp(-a * (x - x0))) ** 2)
172
+
173
+ # Parameters of the S-curve
174
+ K = st.session_state['K']
175
+ b = st.session_state['b']
176
+ a = st.session_state['a']
177
+ x0 = st.session_state['x0']
178
+
179
+ # Optimized spend value obtained from the tool
180
+ optimized_spend = st.number_input('value of x') # Replace this with your optimized spend value
181
+
182
+ # Calculate the slope at the optimized spend value
183
+ slope_at_optimized_spend = s_curve_derivative(optimized_spend, K, b, a, x0)
184
+
185
+ st.write("Slope ", slope_at_optimized_spend)
pages/8_Scenario_Planner.py ADDED
@@ -0,0 +1,1424 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from numerize.numerize import numerize
3
+ import numpy as np
4
+ from functools import partial
5
+ from collections import OrderedDict
6
+ from plotly.subplots import make_subplots
7
+ import plotly.graph_objects as go
8
+ from utilities import (
9
+ format_numbers,
10
+ load_local_css,
11
+ set_header,
12
+ initialize_data,
13
+ load_authenticator,
14
+ send_email,
15
+ channel_name_formating,
16
+ )
17
+ from classes import class_from_dict, class_to_dict
18
+ import pickle
19
+ import streamlit_authenticator as stauth
20
+ import yaml
21
+ from yaml import SafeLoader
22
+ import re
23
+ import pandas as pd
24
+ import plotly.express as px
25
+
26
+
27
+ st.set_page_config(layout="wide")
28
+ load_local_css("styles.css")
29
+ set_header()
30
+
31
+ for k, v in st.session_state.items():
32
+ if k not in ["logout", "login", "config"] and not k.startswith("FormSubmitter"):
33
+ st.session_state[k] = v
34
+ # ======================================================== #
35
+ # ======================= Functions ====================== #
36
+ # ======================================================== #
37
+
38
+
39
+ def optimize(key, status_placeholder):
40
+ """
41
+ Optimize the spends for the sales
42
+ """
43
+
44
+ channel_list = [
45
+ key for key, value in st.session_state["optimization_channels"].items() if value
46
+ ]
47
+
48
+ if len(channel_list) > 0:
49
+ scenario = st.session_state["scenario"]
50
+ if key.lower() == "media spends":
51
+ with status_placeholder:
52
+ with st.spinner("Optimizing"):
53
+ result = st.session_state["scenario"].optimize(
54
+ st.session_state["total_spends_change"], channel_list
55
+ )
56
+ # elif key.lower() == "revenue":
57
+ else:
58
+ with status_placeholder:
59
+ with st.spinner("Optimizing"):
60
+
61
+ result = st.session_state["scenario"].optimize_spends(
62
+ st.session_state["total_sales_change"], channel_list
63
+ )
64
+ for channel_name, modified_spends in result:
65
+
66
+ st.session_state[channel_name] = numerize(
67
+ modified_spends * scenario.channels[channel_name].conversion_rate,
68
+ 1,
69
+ )
70
+ prev_spends = (
71
+ st.session_state["scenario"].channels[channel_name].actual_total_spends
72
+ )
73
+ st.session_state[f"{channel_name}_change"] = round(
74
+ 100 * (modified_spends - prev_spends) / prev_spends, 2
75
+ )
76
+
77
+
78
+ def save_scenario(scenario_name):
79
+ """
80
+ Save the current scenario with the mentioned name in the session state
81
+
82
+ Parameters
83
+ ----------
84
+ scenario_name
85
+ Name of the scenario to be saved
86
+ """
87
+ if "saved_scenarios" not in st.session_state:
88
+ st.session_state = OrderedDict()
89
+
90
+ # st.session_state['saved_scenarios'][scenario_name] = st.session_state['scenario'].save()
91
+ st.session_state["saved_scenarios"][scenario_name] = class_to_dict(
92
+ st.session_state["scenario"]
93
+ )
94
+ st.session_state["scenario_input"] = ""
95
+ # print(type(st.session_state['saved_scenarios']))
96
+ with open("../saved_scenarios.pkl", "wb") as f:
97
+ pickle.dump(st.session_state["saved_scenarios"], f)
98
+
99
+
100
+ if "allow_spends_update" not in st.session_state:
101
+ st.session_state["allow_spends_update"] = True
102
+
103
+ if "allow_sales_update" not in st.session_state:
104
+ st.session_state["allow_sales_update"] = True
105
+
106
+
107
+ def update_sales_abs_slider():
108
+ actual_sales = _scenario.actual_total_sales
109
+ if validate_input(st.session_state["total_sales_change_abs_slider"]):
110
+ modified_sales = extract_number_for_string(
111
+ st.session_state["total_sales_change_abs_slider"]
112
+ )
113
+ st.session_state["total_sales_change"] = round(
114
+ ((modified_sales / actual_sales) - 1) * 100
115
+ )
116
+ st.session_state["total_sales_change_abs"] = numerize(modified_sales, 1)
117
+
118
+
119
+ def update_sales_abs():
120
+ if (
121
+ st.session_state["total_sales_change_abs"]
122
+ in st.session_state["total_sales_change_abs_slider_options"]
123
+ ):
124
+ st.session_state["allow_sales_update"] = True
125
+ else:
126
+ st.session_state["allow_sales_update"] = False
127
+
128
+ actual_sales = _scenario.actual_total_sales
129
+ if (
130
+ validate_input(st.session_state["total_sales_change_abs"])
131
+ and st.session_state["allow_sales_update"]
132
+ ):
133
+ modified_sales = extract_number_for_string(
134
+ st.session_state["total_sales_change_abs"]
135
+ )
136
+ st.session_state["total_sales_change"] = round(
137
+ ((modified_sales / actual_sales) - 1) * 100
138
+ )
139
+ st.session_state["total_sales_change_abs_slider"] = numerize(modified_sales, 1)
140
+
141
+
142
+ def update_sales():
143
+ st.session_state["total_sales_change_abs"] = numerize(
144
+ (1 + st.session_state["total_sales_change"] / 100)
145
+ * _scenario.actual_total_sales,
146
+ 1,
147
+ )
148
+ st.session_state["total_sales_change_abs_slider"] = numerize(
149
+ (1 + st.session_state["total_sales_change"] / 100)
150
+ * _scenario.actual_total_sales,
151
+ 1,
152
+ )
153
+
154
+
155
+ def update_all_spends_abs_slider():
156
+ actual_spends = _scenario.actual_total_spends
157
+ if validate_input(st.session_state["total_spends_change_abs_slider"]):
158
+ modified_spends = extract_number_for_string(
159
+ st.session_state["total_spends_change_abs_slider"]
160
+ )
161
+ st.session_state["total_spends_change"] = round(
162
+ ((modified_spends / actual_spends) - 1) * 100
163
+ )
164
+ st.session_state["total_spends_change_abs"] = numerize(modified_spends, 1)
165
+
166
+ update_all_spends()
167
+
168
+
169
+ # def update_all_spends_abs_slider():
170
+ # actual_spends = _scenario.actual_total_spends
171
+ # if validate_input(st.session_state["total_spends_change_abs_slider"]):
172
+ # print("#" * 100)
173
+ # print(st.session_state["total_spends_change_abs_slider"])
174
+ # print("#" * 100)
175
+
176
+ # modified_spends = extract_number_for_string(
177
+ # st.session_state["total_spends_change_abs_slider"]
178
+ # )
179
+ # st.session_state["total_spends_change"] = (
180
+ # (modified_spends / actual_spends) - 1
181
+ # ) * 100
182
+ # st.session_state["total_spends_change_abs"] = st.session_state[
183
+ # "total_spends_change_abs_slider"
184
+ # ]
185
+
186
+ # update_all_spends()
187
+
188
+
189
+ def update_all_spends_abs():
190
+ if (
191
+ st.session_state["total_spends_change_abs"]
192
+ in st.session_state["total_spends_change_abs_slider_options"]
193
+ ):
194
+ st.session_state["allow_spends_update"] = True
195
+ else:
196
+ st.session_state["allow_spends_update"] = False
197
+
198
+ actual_spends = _scenario.actual_total_spends
199
+ if (
200
+ validate_input(st.session_state["total_spends_change_abs"])
201
+ and st.session_state["allow_spends_update"]
202
+ ):
203
+ modified_spends = extract_number_for_string(
204
+ st.session_state["total_spends_change_abs"]
205
+ )
206
+ st.session_state["total_spends_change"] = (
207
+ (modified_spends / actual_spends) - 1
208
+ ) * 100
209
+ st.session_state["total_spends_change_abs_slider"] = st.session_state[
210
+ "total_spends_change_abs"
211
+ ]
212
+
213
+ update_all_spends()
214
+
215
+
216
+ def update_spends():
217
+ st.session_state["total_spends_change_abs"] = numerize(
218
+ (1 + st.session_state["total_spends_change"] / 100)
219
+ * _scenario.actual_total_spends,
220
+ 1,
221
+ )
222
+ st.session_state["total_spends_change_abs_slider"] = numerize(
223
+ (1 + st.session_state["total_spends_change"] / 100)
224
+ * _scenario.actual_total_spends,
225
+ 1,
226
+ )
227
+
228
+ update_all_spends()
229
+
230
+
231
+ def update_all_spends():
232
+ """
233
+ Updates spends for all the channels with the given overall spends change
234
+ """
235
+ percent_change = st.session_state["total_spends_change"]
236
+
237
+ for channel_name in st.session_state["channels_list"]:
238
+ channel = st.session_state["scenario"].channels[channel_name]
239
+ current_spends = channel.actual_total_spends
240
+ modified_spends = (1 + percent_change / 100) * current_spends
241
+ st.session_state["scenario"].update(channel_name, modified_spends)
242
+ st.session_state[channel_name] = numerize(
243
+ modified_spends * channel.conversion_rate, 1
244
+ )
245
+ st.session_state[f"{channel_name}_change"] = percent_change
246
+
247
+
248
+ def extract_number_for_string(string_input):
249
+ string_input = string_input.upper()
250
+ if string_input.endswith("K"):
251
+ return float(string_input[:-1]) * 10**3
252
+ elif string_input.endswith("M"):
253
+ return float(string_input[:-1]) * 10**6
254
+ elif string_input.endswith("B"):
255
+ return float(string_input[:-1]) * 10**9
256
+
257
+
258
+ def validate_input(string_input):
259
+ pattern = r"\d+\.?\d*[K|M|B]$"
260
+ match = re.match(pattern, string_input)
261
+ if match is None:
262
+ return False
263
+ return True
264
+
265
+
266
+ def update_data_by_percent(channel_name):
267
+ prev_spends = (
268
+ st.session_state["scenario"].channels[channel_name].actual_total_spends
269
+ * st.session_state["scenario"].channels[channel_name].conversion_rate
270
+ )
271
+ modified_spends = prev_spends * (
272
+ 1 + st.session_state[f"{channel_name}_change"] / 100
273
+ )
274
+ st.session_state[channel_name] = numerize(modified_spends, 1)
275
+ st.session_state["scenario"].update(
276
+ channel_name,
277
+ modified_spends
278
+ / st.session_state["scenario"].channels[channel_name].conversion_rate,
279
+ )
280
+
281
+
282
+ def update_data(channel_name):
283
+ """
284
+ Updates the spends for the given channel
285
+ """
286
+
287
+ if validate_input(st.session_state[channel_name]):
288
+ modified_spends = extract_number_for_string(st.session_state[channel_name])
289
+ prev_spends = (
290
+ st.session_state["scenario"].channels[channel_name].actual_total_spends
291
+ * st.session_state["scenario"].channels[channel_name].conversion_rate
292
+ )
293
+ st.session_state[f"{channel_name}_change"] = round(
294
+ 100 * (modified_spends - prev_spends) / prev_spends, 2
295
+ )
296
+ st.session_state["scenario"].update(
297
+ channel_name,
298
+ modified_spends
299
+ / st.session_state["scenario"].channels[channel_name].conversion_rate,
300
+ )
301
+ # st.session_state['scenario'].update(channel_name, modified_spends)
302
+ # else:
303
+ # try:
304
+ # modified_spends = float(st.session_state[channel_name])
305
+ # prev_spends = st.session_state['scenario'].channels[channel_name].actual_total_spends * st.session_state['scenario'].channels[channel_name].conversion_rate
306
+ # st.session_state[f'{channel_name}_change'] = round(100*(modified_spends - prev_spends) / prev_spends,2)
307
+ # st.session_state['scenario'].update(channel_name, modified_spends/st.session_state['scenario'].channels[channel_name].conversion_rate)
308
+ # st.session_state[f'{channel_name}'] = numerize(modified_spends,1)
309
+ # except ValueError:
310
+ # st.write('Invalid input')
311
+
312
+
313
+ def select_channel_for_optimization(channel_name):
314
+ """
315
+ Marks the given channel for optimization
316
+ """
317
+ st.session_state["optimization_channels"][channel_name] = st.session_state[
318
+ f"{channel_name}_selected"
319
+ ]
320
+
321
+
322
+ def select_all_channels_for_optimization():
323
+ """
324
+ Marks all the channel for optimization
325
+ """
326
+ for channel_name in st.session_state["optimization_channels"].keys():
327
+ st.session_state[f"{channel_name}_selected"] = st.session_state[
328
+ "optimze_all_channels"
329
+ ]
330
+ st.session_state["optimization_channels"][channel_name] = st.session_state[
331
+ "optimze_all_channels"
332
+ ]
333
+
334
+
335
+ def update_penalty():
336
+ """
337
+ Updates the penalty flag for sales calculation
338
+ """
339
+ st.session_state["scenario"].update_penalty(st.session_state["apply_penalty"])
340
+
341
+
342
+ def reset_scenario(panel_selected, file_selected, updated_rcs):
343
+ # #print(st.session_state['default_scenario_dict'])
344
+ # st.session_state['scenario'] = class_from_dict(st.session_state['default_scenario_dict'])
345
+ # for channel in st.session_state['scenario'].channels.values():
346
+ # st.session_state[channel.name] = float(channel.actual_total_spends * channel.conversion_rate)
347
+ # initialize_data()
348
+
349
+ if panel_selected == "Aggregated":
350
+ initialize_data(
351
+ panel=panel_selected,
352
+ target_file=file_selected,
353
+ updated_rcs=updated_rcs,
354
+ metrics=metrics_selected,
355
+ )
356
+ panel = None
357
+ else:
358
+ initialize_data(
359
+ panel=panel_selected,
360
+ target_file=file_selected,
361
+ updated_rcs=updated_rcs,
362
+ metrics=metrics_selected,
363
+ )
364
+
365
+ for channel_name in st.session_state["channels_list"]:
366
+ st.session_state[f"{channel_name}_selected"] = False
367
+ st.session_state[f"{channel_name}_change"] = 0
368
+ st.session_state["optimze_all_channels"] = False
369
+
370
+ st.session_state["total_sales_change"] = 0
371
+
372
+ update_spends()
373
+ update_sales()
374
+
375
+ reset_inputs()
376
+
377
+ # st.rerun()
378
+
379
+
380
+ def format_number(num):
381
+ if num >= 1_000_000:
382
+ return f"{num / 1_000_000:.2f}M"
383
+ elif num >= 1_000:
384
+ return f"{num / 1_000:.0f}K"
385
+ else:
386
+ return f"{num:.2f}"
387
+
388
+
389
+ def summary_plot(data, x, y, title, text_column):
390
+ fig = px.bar(
391
+ data,
392
+ x=x,
393
+ y=y,
394
+ orientation="h",
395
+ title=title,
396
+ text=text_column,
397
+ color="Channel_name",
398
+ )
399
+
400
+ # Convert text_column to numeric values
401
+ data[text_column] = pd.to_numeric(data[text_column], errors="coerce")
402
+
403
+ # Update the format of the displayed text based on magnitude
404
+ fig.update_traces(
405
+ texttemplate="%{text:.2s}",
406
+ textposition="outside",
407
+ hovertemplate="%{x:.2s}",
408
+ )
409
+
410
+ fig.update_layout(xaxis_title=x, yaxis_title="Channel Name", showlegend=False)
411
+ return fig
412
+
413
+
414
+ def s_curve(x, K, b, a, x0):
415
+ return K / (1 + b * np.exp(-a * (x - x0)))
416
+
417
+
418
+ def find_segment_value(x, roi, mroi):
419
+ start_value = x[0]
420
+ end_value = x[len(x) - 1]
421
+
422
+ # Condition for green region: Both MROI and ROI > 1
423
+ green_condition = (roi > 1) & (mroi > 1)
424
+ left_indices = np.where(green_condition)[0]
425
+ left_value = x[left_indices[0]] if left_indices.size > 0 else x[0]
426
+
427
+ right_indices = np.where(green_condition)[0]
428
+ right_value = x[right_indices[-1]] if right_indices.size > 0 else x[0]
429
+
430
+ return start_value, end_value, left_value, right_value
431
+
432
+
433
+ def calculate_rgba(
434
+ start_value, end_value, left_value, right_value, current_channel_spends
435
+ ):
436
+ # Initialize alpha to None for clarity
437
+ alpha = None
438
+
439
+ # Determine the color and calculate relative_position and alpha based on the point's position
440
+ if start_value <= current_channel_spends <= left_value:
441
+ color = "yellow"
442
+ relative_position = (current_channel_spends - start_value) / (
443
+ left_value - start_value
444
+ )
445
+ alpha = 0.8 - (0.6 * relative_position) # Alpha decreases from start to end
446
+
447
+ elif left_value < current_channel_spends <= right_value:
448
+ color = "green"
449
+ relative_position = (current_channel_spends - left_value) / (
450
+ right_value - left_value
451
+ )
452
+ alpha = 0.8 - (0.6 * relative_position) # Alpha decreases from start to end
453
+
454
+ elif right_value < current_channel_spends <= end_value:
455
+ color = "red"
456
+ relative_position = (current_channel_spends - right_value) / (
457
+ end_value - right_value
458
+ )
459
+ alpha = 0.2 + (0.6 * relative_position) # Alpha increases from start to end
460
+
461
+ else:
462
+ # Default case, if the spends are outside the defined ranges
463
+ return "rgba(136, 136, 136, 0.5)" # Grey for values outside the range
464
+
465
+ # Ensure alpha is within the intended range in case of any calculation overshoot
466
+ alpha = max(0.2, min(alpha, 0.8))
467
+
468
+ # Define color codes for RGBA
469
+ color_codes = {
470
+ "yellow": "255, 255, 0", # RGB for yellow
471
+ "green": "0, 128, 0", # RGB for green
472
+ "red": "255, 0, 0", # RGB for red
473
+ }
474
+
475
+ rgba = f"rgba({color_codes[color]}, {alpha})"
476
+ return rgba
477
+
478
+
479
+ def debug_temp(x_test, power, K, b, a, x0):
480
+ print("*" * 100)
481
+ # Calculate the count of bins
482
+ count_lower_bin = sum(1 for x in x_test if x <= 2524)
483
+ count_center_bin = sum(1 for x in x_test if x > 2524 and x <= 3377)
484
+ count_ = sum(1 for x in x_test if x > 3377)
485
+
486
+ print(
487
+ f"""
488
+ lower : {count_lower_bin}
489
+ center : {count_center_bin}
490
+ upper : {count_}
491
+ """
492
+ )
493
+
494
+
495
+ # @st.cache
496
+ def plot_response_curves():
497
+ cols = 4
498
+ rows = (
499
+ len(channels_list) // cols
500
+ if len(channels_list) % cols == 0
501
+ else len(channels_list) // cols + 1
502
+ )
503
+ rcs = st.session_state["rcs"]
504
+ shapes = []
505
+ fig = make_subplots(rows=rows, cols=cols, subplot_titles=channels_list)
506
+ for i in range(0, len(channels_list)):
507
+ col = channels_list[i]
508
+ x_actual = st.session_state["scenario"].channels[col].actual_spends
509
+ # x_modified = st.session_state["scenario"].channels[col].modified_spends
510
+
511
+ power = np.ceil(np.log(x_actual.max()) / np.log(10)) - 3
512
+
513
+ K = rcs[col]["K"]
514
+ b = rcs[col]["b"]
515
+ a = rcs[col]["a"]
516
+ x0 = rcs[col]["x0"]
517
+
518
+ x_plot = np.linspace(0, 5 * x_actual.sum(), 50)
519
+
520
+ x, y, marginal_roi = [], [], []
521
+ for x_p in x_plot:
522
+ x.append(x_p * x_actual / x_actual.sum())
523
+
524
+ for index in range(len(x_plot)):
525
+ y.append(s_curve(x[index] / 10**power, K, b, a, x0))
526
+
527
+ for index in range(len(x_plot)):
528
+ marginal_roi.append(
529
+ a * y[index] * (1 - y[index] / np.maximum(K, np.finfo(float).eps))
530
+ )
531
+
532
+ x = (
533
+ np.sum(x, axis=1)
534
+ * st.session_state["scenario"].channels[col].conversion_rate
535
+ )
536
+ y = np.sum(y, axis=1)
537
+ marginal_roi = (
538
+ np.average(marginal_roi, axis=1)
539
+ / st.session_state["scenario"].channels[col].conversion_rate
540
+ )
541
+
542
+ roi = y / np.maximum(x, np.finfo(float).eps)
543
+
544
+ fig.add_trace(
545
+ go.Scatter(
546
+ x=x,
547
+ y=y,
548
+ name=col,
549
+ customdata=np.stack((roi, marginal_roi), axis=-1),
550
+ hovertemplate="Spend:%{x:$.2s}<br>Sale:%{y:$.2s}<br>ROI:%{customdata[0]:.3f}<br>MROI:%{customdata[1]:.3f}",
551
+ line=dict(color="blue"),
552
+ ),
553
+ row=1 + (i) // cols,
554
+ col=i % cols + 1,
555
+ )
556
+
557
+ x_optimal = (
558
+ st.session_state["scenario"].channels[col].modified_total_spends
559
+ * st.session_state["scenario"].channels[col].conversion_rate
560
+ )
561
+ y_optimal = st.session_state["scenario"].channels[col].modified_total_sales
562
+
563
+ # if col == "Paid_social_others":
564
+ # debug_temp(x_optimal * x_actual / x_actual.sum(), power, K, b, a, x0)
565
+
566
+ fig.add_trace(
567
+ go.Scatter(
568
+ x=[x_optimal],
569
+ y=[y_optimal],
570
+ name=col,
571
+ legendgroup=col,
572
+ showlegend=False,
573
+ marker=dict(color=["black"]),
574
+ ),
575
+ row=1 + (i) // cols,
576
+ col=i % cols + 1,
577
+ )
578
+
579
+ shapes.append(
580
+ go.layout.Shape(
581
+ type="line",
582
+ x0=0,
583
+ y0=y_optimal,
584
+ x1=x_optimal,
585
+ y1=y_optimal,
586
+ line_width=1,
587
+ line_dash="dash",
588
+ line_color="black",
589
+ xref=f"x{i+1}",
590
+ yref=f"y{i+1}",
591
+ )
592
+ )
593
+
594
+ shapes.append(
595
+ go.layout.Shape(
596
+ type="line",
597
+ x0=x_optimal,
598
+ y0=0,
599
+ x1=x_optimal,
600
+ y1=y_optimal,
601
+ line_width=1,
602
+ line_dash="dash",
603
+ line_color="black",
604
+ xref=f"x{i+1}",
605
+ yref=f"y{i+1}",
606
+ )
607
+ )
608
+
609
+ start_value, end_value, left_value, right_value = find_segment_value(
610
+ x,
611
+ roi,
612
+ marginal_roi,
613
+ )
614
+
615
+ # Adding background colors
616
+ y_max = y.max() * 1.3 # 30% extra space above the max
617
+
618
+ # Yellow region
619
+ shapes.append(
620
+ go.layout.Shape(
621
+ type="rect",
622
+ x0=start_value,
623
+ y0=0,
624
+ x1=left_value,
625
+ y1=y_max,
626
+ line=dict(width=0),
627
+ fillcolor="rgba(255, 255, 0, 0.3)",
628
+ layer="below",
629
+ xref=f"x{i+1}",
630
+ yref=f"y{i+1}",
631
+ )
632
+ )
633
+
634
+ # Green region
635
+ shapes.append(
636
+ go.layout.Shape(
637
+ type="rect",
638
+ x0=left_value,
639
+ y0=0,
640
+ x1=right_value,
641
+ y1=y_max,
642
+ line=dict(width=0),
643
+ fillcolor="rgba(0, 255, 0, 0.3)",
644
+ layer="below",
645
+ xref=f"x{i+1}",
646
+ yref=f"y{i+1}",
647
+ )
648
+ )
649
+
650
+ # Red region
651
+ shapes.append(
652
+ go.layout.Shape(
653
+ type="rect",
654
+ x0=right_value,
655
+ y0=0,
656
+ x1=end_value,
657
+ y1=y_max,
658
+ line=dict(width=0),
659
+ fillcolor="rgba(255, 0, 0, 0.3)",
660
+ layer="below",
661
+ xref=f"x{i+1}",
662
+ yref=f"y{i+1}",
663
+ )
664
+ )
665
+
666
+ fig.update_layout(
667
+ # height=1000,
668
+ # width=1000,
669
+ title_text=f"Response Curves (X: Spends Vs Y: {target})",
670
+ showlegend=False,
671
+ shapes=shapes,
672
+ )
673
+ fig.update_annotations(font_size=10)
674
+ # fig.update_xaxes(title="Spends")
675
+ # fig.update_yaxes(title=target)
676
+ fig.update_yaxes(
677
+ gridcolor="rgba(136, 136, 136, 0.5)", gridwidth=0.5, griddash="dash"
678
+ )
679
+
680
+ return fig
681
+
682
+
683
+ # @st.cache
684
+ # def plot_response_curves():
685
+ # cols = 4
686
+ # rcs = st.session_state["rcs"]
687
+ # shapes = []
688
+ # fig = make_subplots(rows=6, cols=cols, subplot_titles=channels_list)
689
+ # for i in range(0, len(channels_list)):
690
+ # col = channels_list[i]
691
+ # x = st.session_state["actual_df"][col].values
692
+ # spends = x.sum()
693
+ # power = np.ceil(np.log(x.max()) / np.log(10)) - 3
694
+ # x = np.linspace(0, 3 * x.max(), 200)
695
+
696
+ # K = rcs[col]["K"]
697
+ # b = rcs[col]["b"]
698
+ # a = rcs[col]["a"]
699
+ # x0 = rcs[col]["x0"]
700
+
701
+ # y = s_curve(x / 10**power, K, b, a, x0)
702
+ # roi = y / x
703
+ # marginal_roi = a * (y) * (1 - y / K)
704
+ # fig.add_trace(
705
+ # go.Scatter(
706
+ # x=52
707
+ # * x
708
+ # * st.session_state["scenario"].channels[col].conversion_rate,
709
+ # y=52 * y,
710
+ # name=col,
711
+ # customdata=np.stack((roi, marginal_roi), axis=-1),
712
+ # hovertemplate="Spend:%{x:$.2s}<br>Sale:%{y:$.2s}<br>ROI:%{customdata[0]:.3f}<br>MROI:%{customdata[1]:.3f}",
713
+ # ),
714
+ # row=1 + (i) // cols,
715
+ # col=i % cols + 1,
716
+ # )
717
+
718
+ # fig.add_trace(
719
+ # go.Scatter(
720
+ # x=[
721
+ # spends
722
+ # * st.session_state["scenario"]
723
+ # .channels[col]
724
+ # .conversion_rate
725
+ # ],
726
+ # y=[52 * s_curve(spends / (10**power * 52), K, b, a, x0)],
727
+ # name=col,
728
+ # legendgroup=col,
729
+ # showlegend=False,
730
+ # marker=dict(color=["black"]),
731
+ # ),
732
+ # row=1 + (i) // cols,
733
+ # col=i % cols + 1,
734
+ # )
735
+
736
+ # shapes.append(
737
+ # go.layout.Shape(
738
+ # type="line",
739
+ # x0=0,
740
+ # y0=52 * s_curve(spends / (10**power * 52), K, b, a, x0),
741
+ # x1=spends
742
+ # * st.session_state["scenario"].channels[col].conversion_rate,
743
+ # y1=52 * s_curve(spends / (10**power * 52), K, b, a, x0),
744
+ # line_width=1,
745
+ # line_dash="dash",
746
+ # line_color="black",
747
+ # xref=f"x{i+1}",
748
+ # yref=f"y{i+1}",
749
+ # )
750
+ # )
751
+
752
+ # shapes.append(
753
+ # go.layout.Shape(
754
+ # type="line",
755
+ # x0=spends
756
+ # * st.session_state["scenario"].channels[col].conversion_rate,
757
+ # y0=0,
758
+ # x1=spends
759
+ # * st.session_state["scenario"].channels[col].conversion_rate,
760
+ # y1=52 * s_curve(spends / (10**power * 52), K, b, a, x0),
761
+ # line_width=1,
762
+ # line_dash="dash",
763
+ # line_color="black",
764
+ # xref=f"x{i+1}",
765
+ # yref=f"y{i+1}",
766
+ # )
767
+ # )
768
+
769
+ # fig.update_layout(
770
+ # height=1500,
771
+ # width=1000,
772
+ # title_text="Response Curves",
773
+ # showlegend=False,
774
+ # shapes=shapes,
775
+ # )
776
+ # fig.update_annotations(font_size=10)
777
+ # fig.update_xaxes(title="Spends")
778
+ # fig.update_yaxes(title=target)
779
+ # return fig
780
+
781
+
782
+ # ======================================================== #
783
+ # ==================== HTML Components =================== #
784
+ # ======================================================== #
785
+
786
+
787
+ def generate_spending_header(heading):
788
+ return st.markdown(
789
+ f"""<h2 class="spends-header">{heading}</h2>""", unsafe_allow_html=True
790
+ )
791
+
792
+
793
+ # ======================================================== #
794
+ # =================== Session variables ================== #
795
+ # ======================================================== #
796
+
797
+ with open("config.yaml") as file:
798
+ config = yaml.load(file, Loader=SafeLoader)
799
+ st.session_state["config"] = config
800
+
801
+ authenticator = stauth.Authenticate(
802
+ config["credentials"],
803
+ config["cookie"]["name"],
804
+ config["cookie"]["key"],
805
+ config["cookie"]["expiry_days"],
806
+ config["preauthorized"],
807
+ )
808
+ st.session_state["authenticator"] = authenticator
809
+ name, authentication_status, username = authenticator.login("Login", "main")
810
+ auth_status = st.session_state.get("authentication_status")
811
+
812
+ import os
813
+ import glob
814
+
815
+
816
+ def get_excel_names(directory):
817
+ # Create a list to hold the final parts of the filenames
818
+ last_portions = []
819
+
820
+ # Patterns to match Excel files (.xlsx and .xls) that contain @#
821
+ patterns = [
822
+ os.path.join(directory, "*@#*.xlsx"),
823
+ os.path.join(directory, "*@#*.xls"),
824
+ ]
825
+
826
+ # Process each pattern
827
+ for pattern in patterns:
828
+ files = glob.glob(pattern)
829
+
830
+ # Extracting the last portion after @# for each file
831
+ for file in files:
832
+ base_name = os.path.basename(file)
833
+ last_portion = base_name.split("@#")[-1]
834
+ last_portion = last_portion.replace(".xlsx", "").replace(
835
+ ".xls", ""
836
+ ) # Removing extensions
837
+ last_portions.append(last_portion)
838
+
839
+ return last_portions
840
+
841
+
842
+ def name_formating(channel_name):
843
+ # Replace underscores with spaces
844
+ name_mod = channel_name.replace("_", " ")
845
+
846
+ # Capitalize the first letter of each word
847
+ name_mod = name_mod.title()
848
+
849
+ return name_mod
850
+
851
+
852
+ @st.cache_resource(show_spinner=False)
853
+ def panel_fetch(file_selected):
854
+ raw_data_mmm_df = pd.read_excel(file_selected, sheet_name="RAW DATA MMM")
855
+
856
+ if "Panel" in raw_data_mmm_df.columns:
857
+ panel = list(set(raw_data_mmm_df["Panel"]))
858
+ else:
859
+ raw_data_mmm_df = None
860
+ panel = None
861
+
862
+ return panel
863
+
864
+
865
+ def reset_inputs():
866
+ if "total_spends_change_abs" in st.session_state:
867
+ del st.session_state.total_spends_change_abs
868
+ if "total_spends_change" in st.session_state:
869
+ del st.session_state.total_spends_change
870
+ if "total_spends_change_abs_slider" in st.session_state:
871
+ del st.session_state.total_spends_change_abs_slider
872
+
873
+ if "total_sales_change_abs" in st.session_state:
874
+ del st.session_state.total_sales_change_abs
875
+ if "total_sales_change" in st.session_state:
876
+ del st.session_state.total_sales_change
877
+ if "total_sales_change_abs_slider" in st.session_state:
878
+ del st.session_state.total_sales_change_abs_slider
879
+
880
+ st.session_state["initialized"] = False
881
+
882
+
883
+ if auth_status == True:
884
+ authenticator.logout("Logout", "main")
885
+
886
+ st.header("Simulation")
887
+ col1, col2 = st.columns([1, 1])
888
+
889
+ # Response Metrics
890
+ directory = "metrics_level_data"
891
+ metrics_list = get_excel_names(directory)
892
+ metrics_selected = col1.selectbox(
893
+ "Response Metrics",
894
+ metrics_list,
895
+ format_func=name_formating,
896
+ index=0,
897
+ on_change=reset_inputs,
898
+ )
899
+
900
+ # Target
901
+ target = name_formating(metrics_selected)
902
+
903
+ file_selected = (
904
+ f".\metrics_level_data\Overview_data_test_panel@#{metrics_selected}.xlsx"
905
+ )
906
+
907
+ # Panel List
908
+ panel_list = panel_fetch(file_selected)
909
+
910
+ # Panel Selected
911
+ panel_selected = col2.selectbox(
912
+ "Panel",
913
+ ["Aggregated"] + panel_list,
914
+ index=0,
915
+ on_change=reset_inputs,
916
+ )
917
+
918
+ if "update_rcs" in st.session_state:
919
+ updated_rcs = st.session_state["update_rcs"]
920
+ else:
921
+ updated_rcs = None
922
+
923
+ if "first_time" not in st.session_state:
924
+ st.session_state["first_time"] = True
925
+
926
+ # Check if state is initiaized
927
+ is_state_initiaized = st.session_state.get("initialized", False)
928
+ if not is_state_initiaized or st.session_state["first_time"]:
929
+ # initialize_data()
930
+ if panel_selected == "Aggregated":
931
+ initialize_data(
932
+ panel=panel_selected,
933
+ target_file=file_selected,
934
+ updated_rcs=updated_rcs,
935
+ metrics=metrics_selected,
936
+ )
937
+ panel = None
938
+ else:
939
+ initialize_data(
940
+ panel=panel_selected,
941
+ target_file=file_selected,
942
+ updated_rcs=updated_rcs,
943
+ metrics=metrics_selected,
944
+ )
945
+ st.session_state["initialized"] = True
946
+ st.session_state["first_time"] = False
947
+
948
+ # Channels List
949
+ channels_list = st.session_state["channels_list"]
950
+
951
+ # ======================================================== #
952
+ # ========================== UI ========================== #
953
+ # ======================================================== #
954
+
955
+ # print(list(st.session_state.keys()))
956
+ main_header = st.columns((2, 2))
957
+ sub_header = st.columns((1, 1, 1, 1))
958
+ _scenario = st.session_state["scenario"]
959
+
960
+ if "total_spends_change" not in st.session_state:
961
+ st.session_state.total_spends_change = 0
962
+
963
+ if "total_sales_change" not in st.session_state:
964
+ st.session_state.total_sales_change = 0
965
+
966
+ if "total_spends_change_abs" not in st.session_state:
967
+ st.session_state["total_spends_change_abs"] = numerize(
968
+ _scenario.actual_total_spends, 1
969
+ )
970
+
971
+ if "total_sales_change_abs" not in st.session_state:
972
+ st.session_state["total_sales_change_abs"] = numerize(
973
+ _scenario.actual_total_sales, 1
974
+ )
975
+
976
+ if "total_spends_change_abs_slider" not in st.session_state:
977
+ st.session_state.total_spends_change_abs_slider = numerize(
978
+ _scenario.actual_total_spends, 1
979
+ )
980
+
981
+ if "total_sales_change_abs_slider" not in st.session_state:
982
+ st.session_state.total_sales_change_abs_slider = numerize(
983
+ _scenario.actual_total_sales, 1
984
+ )
985
+
986
+ with main_header[0]:
987
+ st.subheader("Actual")
988
+
989
+ with main_header[-1]:
990
+ st.subheader("Simulated")
991
+
992
+ with sub_header[0]:
993
+ st.metric(label="Spends", value=format_numbers(_scenario.actual_total_spends))
994
+
995
+ with sub_header[1]:
996
+ st.metric(
997
+ label=target,
998
+ value=format_numbers(
999
+ float(_scenario.actual_total_sales), include_indicator=False
1000
+ ),
1001
+ )
1002
+
1003
+ with sub_header[2]:
1004
+ st.metric(
1005
+ label="Spends",
1006
+ value=format_numbers(_scenario.modified_total_spends),
1007
+ delta=numerize(_scenario.delta_spends, 1),
1008
+ )
1009
+
1010
+ with sub_header[3]:
1011
+ st.metric(
1012
+ label=target,
1013
+ value=format_numbers(
1014
+ float(_scenario.modified_total_sales), include_indicator=False
1015
+ ),
1016
+ delta=numerize(_scenario.delta_sales, 1),
1017
+ )
1018
+
1019
+ with st.expander("Channel Spends Simulator", expanded=True):
1020
+ _columns1 = st.columns((2, 2, 1, 1))
1021
+ with _columns1[0]:
1022
+ optimization_selection = st.selectbox(
1023
+ "Optimize", options=["Media Spends", target], key="optimization_key"
1024
+ )
1025
+
1026
+ with _columns1[1]:
1027
+ st.markdown("#")
1028
+ # if st.checkbox(
1029
+ # label="Optimize all Channels",
1030
+ # key="optimze_all_channels",
1031
+ # value=False,
1032
+ # # on_change=select_all_channels_for_optimization,
1033
+ # ):
1034
+ # select_all_channels_for_optimization()
1035
+
1036
+ st.checkbox(
1037
+ label="Optimize all Channels",
1038
+ key="optimze_all_channels",
1039
+ value=False,
1040
+ on_change=select_all_channels_for_optimization,
1041
+ )
1042
+
1043
+ with _columns1[2]:
1044
+ st.markdown("#")
1045
+ # st.button(
1046
+ # "Optimize",
1047
+ # on_click=optimize,
1048
+ # args=(st.session_state["optimization_key"]),
1049
+ # use_container_width=True,
1050
+ # )
1051
+
1052
+ optimize_placeholder = st.empty()
1053
+
1054
+ with _columns1[3]:
1055
+ st.markdown("#")
1056
+ st.button(
1057
+ "Reset",
1058
+ on_click=reset_scenario,
1059
+ args=(panel_selected, file_selected, updated_rcs),
1060
+ use_container_width=True,
1061
+ )
1062
+
1063
+ _columns2 = st.columns((2, 2, 2))
1064
+ if st.session_state["optimization_key"] == "Media Spends":
1065
+ with _columns2[0]:
1066
+ spend_input = st.text_input(
1067
+ "Absolute",
1068
+ key="total_spends_change_abs",
1069
+ # label_visibility="collapsed",
1070
+ on_change=update_all_spends_abs,
1071
+ )
1072
+
1073
+ with _columns2[1]:
1074
+ st.number_input(
1075
+ "Percent Change",
1076
+ key="total_spends_change",
1077
+ min_value=-50,
1078
+ max_value=50,
1079
+ step=1,
1080
+ on_change=update_spends,
1081
+ )
1082
+
1083
+ with _columns2[2]:
1084
+ min_value = round(_scenario.actual_total_spends * 0.5)
1085
+ max_value = round(_scenario.actual_total_spends * 1.5)
1086
+ st.session_state["total_spends_change_abs_slider_options"] = [
1087
+ numerize(value, 1)
1088
+ for value in range(min_value, max_value + 1, int(1e4))
1089
+ ]
1090
+
1091
+ st.select_slider(
1092
+ "Absolute Slider",
1093
+ options=st.session_state["total_spends_change_abs_slider_options"],
1094
+ key="total_spends_change_abs_slider",
1095
+ on_change=update_all_spends_abs_slider,
1096
+ )
1097
+
1098
+ elif st.session_state["optimization_key"] == target:
1099
+ with _columns2[0]:
1100
+ sales_input = st.text_input(
1101
+ "Absolute",
1102
+ key="total_sales_change_abs",
1103
+ on_change=update_sales_abs,
1104
+ )
1105
+
1106
+ with _columns2[1]:
1107
+ st.number_input(
1108
+ "Percent Change",
1109
+ key="total_sales_change",
1110
+ min_value=-50,
1111
+ max_value=50,
1112
+ step=1,
1113
+ on_change=update_sales,
1114
+ )
1115
+ with _columns2[2]:
1116
+ min_value = round(_scenario.actual_total_sales * 0.5)
1117
+ max_value = round(_scenario.actual_total_sales * 1.5)
1118
+ st.session_state["total_sales_change_abs_slider_options"] = [
1119
+ numerize(value, 1)
1120
+ for value in range(min_value, max_value + 1, int(1e5))
1121
+ ]
1122
+
1123
+ st.select_slider(
1124
+ "Absolute Slider",
1125
+ options=st.session_state["total_sales_change_abs_slider_options"],
1126
+ key="total_sales_change_abs_slider",
1127
+ on_change=update_sales_abs_slider,
1128
+ )
1129
+
1130
+ if (
1131
+ not st.session_state["allow_sales_update"]
1132
+ and optimization_selection == target
1133
+ ):
1134
+ st.warning("Invalid Input")
1135
+
1136
+ if (
1137
+ not st.session_state["allow_spends_update"]
1138
+ and optimization_selection == "Media Spends"
1139
+ ):
1140
+ st.warning("Invalid Input")
1141
+
1142
+ status_placeholder = st.empty()
1143
+
1144
+ # if optimize_placeholder.button("Optimize", use_container_width=True):
1145
+ # optimize(st.session_state["optimization_key"], status_placeholder)
1146
+ # st.rerun()
1147
+
1148
+ optimize_placeholder.button(
1149
+ "Optimize",
1150
+ on_click=optimize,
1151
+ args=(st.session_state["optimization_key"], status_placeholder),
1152
+ use_container_width=True,
1153
+ )
1154
+
1155
+ st.markdown("""<hr class="spends-heading-seperator">""", unsafe_allow_html=True)
1156
+ _columns = st.columns((2.5, 2, 1.5, 1.5, 1))
1157
+ with _columns[0]:
1158
+ generate_spending_header("Channel")
1159
+ with _columns[1]:
1160
+ generate_spending_header("Spends Input")
1161
+ with _columns[2]:
1162
+ generate_spending_header("Spends")
1163
+ with _columns[3]:
1164
+ generate_spending_header(target)
1165
+ with _columns[4]:
1166
+ generate_spending_header("Optimize")
1167
+
1168
+ st.markdown("""<hr class="spends-heading-seperator">""", unsafe_allow_html=True)
1169
+
1170
+ if "acutual_predicted" not in st.session_state:
1171
+ st.session_state["acutual_predicted"] = {
1172
+ "Channel_name": [],
1173
+ "Actual_spend": [],
1174
+ "Optimized_spend": [],
1175
+ "Delta": [],
1176
+ }
1177
+ for i, channel_name in enumerate(channels_list):
1178
+ _channel_class = st.session_state["scenario"].channels[channel_name]
1179
+ _columns = st.columns((2.5, 1.5, 1.5, 1.5, 1))
1180
+ with _columns[0]:
1181
+ st.write(channel_name_formating(channel_name))
1182
+ bin_placeholder = st.container()
1183
+
1184
+ with _columns[1]:
1185
+ channel_bounds = _channel_class.bounds
1186
+ channel_spends = float(_channel_class.actual_total_spends)
1187
+ min_value = float((1 + channel_bounds[0] / 100) * channel_spends)
1188
+ max_value = float((1 + channel_bounds[1] / 100) * channel_spends)
1189
+ ##print(st.session_state[channel_name])
1190
+ spend_input = st.text_input(
1191
+ channel_name,
1192
+ key=channel_name,
1193
+ label_visibility="collapsed",
1194
+ on_change=partial(update_data, channel_name),
1195
+ )
1196
+ if not validate_input(spend_input):
1197
+ st.error("Invalid input")
1198
+
1199
+ channel_name_current = f"{channel_name}_change"
1200
+
1201
+ st.number_input(
1202
+ "Percent Change",
1203
+ key=channel_name_current,
1204
+ step=1,
1205
+ on_change=partial(update_data_by_percent, channel_name),
1206
+ )
1207
+
1208
+ with _columns[2]:
1209
+ # spends
1210
+ current_channel_spends = float(
1211
+ _channel_class.modified_total_spends
1212
+ * _channel_class.conversion_rate
1213
+ )
1214
+ actual_channel_spends = float(
1215
+ _channel_class.actual_total_spends * _channel_class.conversion_rate
1216
+ )
1217
+ spends_delta = float(
1218
+ _channel_class.delta_spends * _channel_class.conversion_rate
1219
+ )
1220
+ st.session_state["acutual_predicted"]["Channel_name"].append(
1221
+ channel_name
1222
+ )
1223
+ st.session_state["acutual_predicted"]["Actual_spend"].append(
1224
+ actual_channel_spends
1225
+ )
1226
+ st.session_state["acutual_predicted"]["Optimized_spend"].append(
1227
+ current_channel_spends
1228
+ )
1229
+ st.session_state["acutual_predicted"]["Delta"].append(spends_delta)
1230
+ ## REMOVE
1231
+ st.metric(
1232
+ "Spends",
1233
+ format_numbers(current_channel_spends),
1234
+ delta=numerize(spends_delta, 1),
1235
+ label_visibility="collapsed",
1236
+ )
1237
+
1238
+ with _columns[3]:
1239
+ # sales
1240
+ current_channel_sales = float(_channel_class.modified_total_sales)
1241
+ actual_channel_sales = float(_channel_class.actual_total_sales)
1242
+ sales_delta = float(_channel_class.delta_sales)
1243
+ st.metric(
1244
+ target,
1245
+ format_numbers(current_channel_sales, include_indicator=False),
1246
+ delta=numerize(sales_delta, 1),
1247
+ label_visibility="collapsed",
1248
+ )
1249
+
1250
+ with _columns[4]:
1251
+
1252
+ # if st.checkbox(
1253
+ # label="select for optimization",
1254
+ # key=f"{channel_name}_selected",
1255
+ # value=False,
1256
+ # # on_change=partial(select_channel_for_optimization, channel_name),
1257
+ # label_visibility="collapsed",
1258
+ # ):
1259
+ # select_channel_for_optimization(channel_name)
1260
+
1261
+ st.checkbox(
1262
+ label="select for optimization",
1263
+ key=f"{channel_name}_selected",
1264
+ value=False,
1265
+ on_change=partial(select_channel_for_optimization, channel_name),
1266
+ label_visibility="collapsed",
1267
+ )
1268
+
1269
+ st.markdown(
1270
+ """<hr class="spends-child-seperator">""",
1271
+ unsafe_allow_html=True,
1272
+ )
1273
+
1274
+ # Bins
1275
+ col = channels_list[i]
1276
+ x_actual = st.session_state["scenario"].channels[col].actual_spends
1277
+ x_modified = st.session_state["scenario"].channels[col].modified_spends
1278
+
1279
+ x_total = x_modified.sum()
1280
+ power = np.ceil(np.log(x_actual.max()) / np.log(10)) - 3
1281
+
1282
+ updated_rcs_key = f"{metrics_selected}#@{panel_selected}#@{channel_name}"
1283
+
1284
+ if updated_rcs and updated_rcs_key in list(updated_rcs.keys()):
1285
+ K = updated_rcs[updated_rcs_key]["K"]
1286
+ b = updated_rcs[updated_rcs_key]["b"]
1287
+ a = updated_rcs[updated_rcs_key]["a"]
1288
+ x0 = updated_rcs[updated_rcs_key]["x0"]
1289
+ else:
1290
+ K = st.session_state["rcs"][col]["K"]
1291
+ b = st.session_state["rcs"][col]["b"]
1292
+ a = st.session_state["rcs"][col]["a"]
1293
+ x0 = st.session_state["rcs"][col]["x0"]
1294
+
1295
+ x_plot = np.linspace(0, 5 * x_actual.sum(), 200)
1296
+
1297
+ # Append current_channel_spends to the end of x_plot
1298
+ x_plot = np.append(x_plot, current_channel_spends)
1299
+
1300
+ x, y, marginal_roi = [], [], []
1301
+ for x_p in x_plot:
1302
+ x.append(x_p * x_actual / x_actual.sum())
1303
+
1304
+ for index in range(len(x_plot)):
1305
+ y.append(s_curve(x[index] / 10**power, K, b, a, x0))
1306
+
1307
+ for index in range(len(x_plot)):
1308
+ marginal_roi.append(
1309
+ a * y[index] * (1 - y[index] / np.maximum(K, np.finfo(float).eps))
1310
+ )
1311
+
1312
+ x = (
1313
+ np.sum(x, axis=1)
1314
+ * st.session_state["scenario"].channels[col].conversion_rate
1315
+ )
1316
+ y = np.sum(y, axis=1)
1317
+ marginal_roi = (
1318
+ np.average(marginal_roi, axis=1)
1319
+ / st.session_state["scenario"].channels[col].conversion_rate
1320
+ )
1321
+
1322
+ roi = y / np.maximum(x, np.finfo(float).eps)
1323
+
1324
+ roi_current, marginal_roi_current = roi[-1], marginal_roi[-1]
1325
+ x, y, roi, marginal_roi = (
1326
+ x[:-1],
1327
+ y[:-1],
1328
+ roi[:-1],
1329
+ marginal_roi[:-1],
1330
+ ) # Drop data for current spends
1331
+
1332
+ start_value, end_value, left_value, right_value = find_segment_value(
1333
+ x,
1334
+ roi,
1335
+ marginal_roi,
1336
+ )
1337
+
1338
+ rgba = calculate_rgba(
1339
+ start_value,
1340
+ end_value,
1341
+ left_value,
1342
+ right_value,
1343
+ current_channel_spends,
1344
+ )
1345
+
1346
+ with bin_placeholder:
1347
+ st.markdown(
1348
+ f"""
1349
+ <div style="
1350
+ border-radius: 12px;
1351
+ background-color: {rgba};
1352
+ padding: 10px;
1353
+ text-align: center;
1354
+ color: #006EC0;
1355
+ ">
1356
+ <p style="margin: 0; font-size: 20px;">ROI: {round(roi_current,1)}</p>
1357
+ <p style="margin: 0; font-size: 20px;">Marginal ROI: {round(marginal_roi_current,1)}</p>
1358
+ </div>
1359
+ """,
1360
+ unsafe_allow_html=True,
1361
+ )
1362
+
1363
+ with st.expander("See Response Curves", expanded=True):
1364
+ fig = plot_response_curves()
1365
+ st.plotly_chart(fig, use_container_width=True)
1366
+
1367
+ _columns = st.columns(2)
1368
+ with _columns[0]:
1369
+ st.subheader("Save Scenario")
1370
+ scenario_name = st.text_input(
1371
+ "Scenario name",
1372
+ key="scenario_input",
1373
+ placeholder="Scenario name",
1374
+ label_visibility="collapsed",
1375
+ )
1376
+ st.button(
1377
+ "Save",
1378
+ on_click=lambda: save_scenario(scenario_name),
1379
+ disabled=len(st.session_state["scenario_input"]) == 0,
1380
+ )
1381
+
1382
+ summary_df = pd.DataFrame(st.session_state["acutual_predicted"])
1383
+ summary_df.drop_duplicates(subset="Channel_name", keep="last", inplace=True)
1384
+
1385
+ summary_df_sorted = summary_df.sort_values(by="Delta", ascending=False)
1386
+ summary_df_sorted["Delta_percent"] = np.round(
1387
+ ((summary_df_sorted["Optimized_spend"] / summary_df_sorted["Actual_spend"]) - 1)
1388
+ * 100,
1389
+ 2,
1390
+ )
1391
+
1392
+ with open("summary_df.pkl", "wb") as f:
1393
+ pickle.dump(summary_df_sorted, f)
1394
+ # st.dataframe(summary_df_sorted)
1395
+ # ___columns=st.columns(3)
1396
+ # with ___columns[2]:
1397
+ # fig=summary_plot(summary_df_sorted, x='Delta_percent', y='Channel_name', title='Delta', text_column='Delta_percent')
1398
+ # st.plotly_chart(fig,use_container_width=True)
1399
+ # with ___columns[0]:
1400
+ # fig=summary_plot(summary_df_sorted, x='Actual_spend', y='Channel_name', title='Actual Spend', text_column='Actual_spend')
1401
+ # st.plotly_chart(fig,use_container_width=True)
1402
+ # with ___columns[1]:
1403
+ # fig=summary_plot(summary_df_sorted, x='Optimized_spend', y='Channel_name', title='Planned Spend', text_column='Optimized_spend')
1404
+ # st.plotly_chart(fig,use_container_width=True)
1405
+
1406
+ elif auth_status == False:
1407
+ st.error("Username/Password is incorrect")
1408
+
1409
+ if auth_status != True:
1410
+ try:
1411
+ username_forgot_pw, email_forgot_password, random_password = (
1412
+ authenticator.forgot_password("Forgot password")
1413
+ )
1414
+ if username_forgot_pw:
1415
+ st.session_state["config"]["credentials"]["usernames"][username_forgot_pw][
1416
+ "password"
1417
+ ] = stauth.Hasher([random_password]).generate()[0]
1418
+ send_email(email_forgot_password, random_password)
1419
+ st.success("New password sent securely")
1420
+ # Random password to be transferred to user securely
1421
+ elif username_forgot_pw == False:
1422
+ st.error("Username not found")
1423
+ except Exception as e:
1424
+ st.error(e)
pages/9_Saved_Scenarios.py ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from numerize.numerize import numerize
3
+ import io
4
+ import pandas as pd
5
+ from utilities import (format_numbers,decimal_formater,
6
+ channel_name_formating,
7
+ load_local_css,set_header,
8
+ initialize_data,
9
+ load_authenticator)
10
+ from openpyxl import Workbook
11
+ from openpyxl.styles import Alignment,Font,PatternFill
12
+ import pickle
13
+ import streamlit_authenticator as stauth
14
+ import yaml
15
+ from yaml import SafeLoader
16
+ from classes import class_from_dict
17
+
18
+ st.set_page_config(layout='wide')
19
+ load_local_css('styles.css')
20
+ set_header()
21
+
22
+ # for k, v in st.session_state.items():
23
+ # if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
24
+ # st.session_state[k] = v
25
+
26
+ def create_scenario_summary(scenario_dict):
27
+ summary_rows = []
28
+ for channel_dict in scenario_dict['channels']:
29
+ name_mod = channel_name_formating(channel_dict['name'])
30
+ summary_rows.append([name_mod,
31
+ channel_dict.get('actual_total_spends') * channel_dict.get('conversion_rate'),
32
+ channel_dict.get('modified_total_spends') * channel_dict.get('conversion_rate'),
33
+ channel_dict.get('actual_total_sales') ,
34
+ channel_dict.get('modified_total_sales'),
35
+ channel_dict.get('actual_total_sales') / (channel_dict.get('actual_total_spends') * channel_dict.get('conversion_rate')),
36
+ channel_dict.get('modified_total_sales') / (channel_dict.get('modified_total_spends') * channel_dict.get('conversion_rate')),
37
+ channel_dict.get('actual_mroi'),
38
+ channel_dict.get('modified_mroi'),
39
+ channel_dict.get('actual_total_spends') * channel_dict.get('conversion_rate') / channel_dict.get('actual_total_sales'),
40
+ channel_dict.get('modified_total_spends') * channel_dict.get('conversion_rate') / channel_dict.get('modified_total_sales')])
41
+
42
+ summary_rows.append(['Total',
43
+ scenario_dict.get('actual_total_spends'),
44
+ scenario_dict.get('modified_total_spends'),
45
+ scenario_dict.get('actual_total_sales'),
46
+ scenario_dict.get('modified_total_sales'),
47
+ scenario_dict.get('actual_total_sales') / scenario_dict.get('actual_total_spends'),
48
+ scenario_dict.get('modified_total_sales') / scenario_dict.get('modified_total_spends'),
49
+ '-',
50
+ '-',
51
+ scenario_dict.get('actual_total_spends') / scenario_dict.get('actual_total_sales'),
52
+ scenario_dict.get('modified_total_spends') / scenario_dict.get('modified_total_sales')])
53
+
54
+ columns_index = pd.MultiIndex.from_product([[''],['Channel']], names=["first", "second"])
55
+ columns_index = columns_index.append(pd.MultiIndex.from_product([['Spends','NRPU','ROI','MROI','Spend per NRPU'],['Actual','Simulated']], names=["first", "second"]))
56
+ return pd.DataFrame(summary_rows, columns=columns_index)
57
+
58
+
59
+
60
+ def summary_df_to_worksheet(df, ws):
61
+ heading_fill = PatternFill(fill_type='solid',start_color='FF11B6BD',end_color='FF11B6BD')
62
+ for j,header in enumerate(df.columns.values):
63
+ col = j + 1
64
+ for i in range(1,3):
65
+ ws.cell(row=i, column=j + 1, value=header[i - 1]).font = Font(bold=True, color='FF11B6BD')
66
+ ws.cell(row=i,column=j+1).fill = heading_fill
67
+ if col > 1 and (col - 6)%5==0:
68
+ ws.merge_cells(start_row=1, end_row=1, start_column = col-3, end_column=col)
69
+ ws.cell(row=1,column=col).alignment = Alignment(horizontal='center')
70
+ for i,row in enumerate(df.itertuples()):
71
+ for j,value in enumerate(row):
72
+ if j == 0:
73
+ continue
74
+ elif (j-2)%4 == 0 or (j-3)%4 == 0:
75
+ ws.cell(row=i+3, column = j, value=value).number_format = '$#,##0.0'
76
+ else:
77
+ ws.cell(row=i+3, column = j, value=value)
78
+
79
+ from openpyxl.utils import get_column_letter
80
+ from openpyxl.styles import Font, PatternFill
81
+ import logging
82
+
83
+ def scenario_df_to_worksheet(df, ws):
84
+ heading_fill = PatternFill(start_color='FF11B6BD', end_color='FF11B6BD', fill_type='solid')
85
+
86
+ for j, header in enumerate(df.columns.values):
87
+ cell = ws.cell(row=1, column=j + 1, value=header)
88
+ cell.font = Font(bold=True, color='FF11B6BD')
89
+ cell.fill = heading_fill
90
+
91
+ for i, row in enumerate(df.itertuples()):
92
+ for j, value in enumerate(row[1:], start=1): # Start from index 1 to skip the index column
93
+ try:
94
+ cell = ws.cell(row=i + 2, column=j, value=value)
95
+ if isinstance(value, (int, float)):
96
+ cell.number_format = '$#,##0.0'
97
+ elif isinstance(value, str):
98
+ cell.value = value[:32767]
99
+ else:
100
+ cell.value = str(value)
101
+ except ValueError as e:
102
+ logging.error(f"Error assigning value '{value}' to cell {get_column_letter(j)}{i+2}: {e}")
103
+ cell.value = None # Assign None to the cell where the error occurred
104
+
105
+ return ws
106
+
107
+
108
+
109
+
110
+
111
+
112
+ def download_scenarios():
113
+ """
114
+ Makes a excel with all saved scenarios and saves it locally
115
+ """
116
+ ## create summary page
117
+ if len(scenarios_to_download) == 0:
118
+ return
119
+ wb = Workbook()
120
+ wb.iso_dates = True
121
+ wb.remove(wb.active)
122
+ st.session_state['xlsx_buffer'] = io.BytesIO()
123
+ summary_df = None
124
+ #print(scenarios_to_download)
125
+ for scenario_name in scenarios_to_download:
126
+ scenario_dict = st.session_state['saved_scenarios'][scenario_name]
127
+ _spends = []
128
+ column_names = ['Date']
129
+ _sales = None
130
+ dates = None
131
+ summary_rows = []
132
+ for channel in scenario_dict['channels']:
133
+ if dates is None:
134
+ dates = channel.get('dates')
135
+ _spends.append(dates)
136
+ if _sales is None:
137
+ _sales = channel.get('modified_sales')
138
+ else:
139
+ _sales += channel.get('modified_sales')
140
+ _spends.append(channel.get('modified_spends') * channel.get('conversion_rate'))
141
+ column_names.append(channel.get('name'))
142
+
143
+ name_mod = channel_name_formating(channel['name'])
144
+ summary_rows.append([name_mod,
145
+ channel.get('modified_total_spends') * channel.get('conversion_rate') ,
146
+ channel.get('modified_total_sales'),
147
+ channel.get('modified_total_sales') / channel.get('modified_total_spends') * channel.get('conversion_rate'),
148
+ channel.get('modified_mroi'),
149
+ channel.get('modified_total_sales') / channel.get('modified_total_spends') * channel.get('conversion_rate')])
150
+ _spends.append(_sales)
151
+ column_names.append('NRPU')
152
+ scenario_df = pd.DataFrame(_spends).T
153
+ scenario_df.columns = column_names
154
+ ## write to sheet
155
+ ws = wb.create_sheet(scenario_name)
156
+ scenario_df_to_worksheet(scenario_df, ws)
157
+ summary_rows.append(['Total',
158
+ scenario_dict.get('modified_total_spends') ,
159
+ scenario_dict.get('modified_total_sales'),
160
+ scenario_dict.get('modified_total_sales') / scenario_dict.get('modified_total_spends'),
161
+ '-',
162
+ scenario_dict.get('modified_total_spends') / scenario_dict.get('modified_total_sales')])
163
+ columns_index = pd.MultiIndex.from_product([[''],['Channel']], names=["first", "second"])
164
+ columns_index = columns_index.append(pd.MultiIndex.from_product([[scenario_name],['Spends','NRPU','ROI','MROI','Spends per NRPU']], names=["first", "second"]))
165
+ if summary_df is None:
166
+ summary_df = pd.DataFrame(summary_rows, columns = columns_index)
167
+ summary_df = summary_df.set_index(('','Channel'))
168
+ else:
169
+ _df = pd.DataFrame(summary_rows, columns = columns_index)
170
+ _df = _df.set_index(('','Channel'))
171
+ summary_df = summary_df.merge(_df, left_index=True, right_index=True)
172
+ ws = wb.create_sheet('Summary',0)
173
+ summary_df_to_worksheet(summary_df.reset_index(), ws)
174
+ wb.save(st.session_state['xlsx_buffer'])
175
+ st.session_state['disable_download_button'] = False
176
+
177
+ def disable_download_button():
178
+ st.session_state['disable_download_button'] =True
179
+
180
+ def transform(x):
181
+ if x.name == ("",'Channel'):
182
+ return x
183
+ elif x.name[0] == 'ROI' or x.name[0] == 'MROI':
184
+ return x.apply(lambda y : y if isinstance(y,str) else decimal_formater(format_numbers(y,include_indicator=False,n_decimals=4),n_decimals=4))
185
+ else:
186
+ return x.apply(lambda y : y if isinstance(y,str) else format_numbers(y))
187
+
188
+ def delete_scenario():
189
+ if selected_scenario in st.session_state['saved_scenarios']:
190
+ del st.session_state['saved_scenarios'][selected_scenario]
191
+ with open('../saved_scenarios.pkl', 'wb') as f:
192
+ pickle.dump(st.session_state['saved_scenarios'],f)
193
+
194
+ def load_scenario():
195
+ if selected_scenario in st.session_state['saved_scenarios']:
196
+ st.session_state['scenario'] = class_from_dict(selected_scenario_details)
197
+
198
+
199
+
200
+ authenticator = st.session_state.get('authenticator')
201
+ if authenticator is None:
202
+ authenticator = load_authenticator()
203
+
204
+ name, authentication_status, username = authenticator.login('Login', 'main')
205
+ auth_status = st.session_state.get('authentication_status')
206
+
207
+ if auth_status == True:
208
+ is_state_initiaized = st.session_state.get('initialized',False)
209
+ if not is_state_initiaized:
210
+ #print("Scenario page state reloaded")
211
+ initialize_data()
212
+
213
+
214
+ saved_scenarios = st.session_state['saved_scenarios']
215
+
216
+
217
+ if len(saved_scenarios) ==0:
218
+ st.header('No saved scenarios')
219
+
220
+ else:
221
+
222
+ with st.sidebar:
223
+ selected_scenario = st.radio(
224
+ 'Pick a scenario to view details',
225
+ list(saved_scenarios.keys())
226
+ )
227
+ st.markdown("""<hr>""", unsafe_allow_html=True)
228
+ scenarios_to_download = st.multiselect('Select scenarios to download',
229
+ list(saved_scenarios.keys()))
230
+
231
+ st.button('Prepare download',on_click=download_scenarios)
232
+ st.download_button(
233
+ label="Download Scenarios",
234
+ data=st.session_state['xlsx_buffer'].getvalue(),
235
+ file_name="scenarios.xlsx",
236
+ mime="application/vnd.ms-excel",
237
+ disabled= st.session_state['disable_download_button'],
238
+ on_click= disable_download_button
239
+ )
240
+
241
+ column_1, column_2,column_3 = st.columns((6,1,1))
242
+ with column_1:
243
+ st.header(selected_scenario)
244
+ with column_2:
245
+ st.button('Delete scenarios', on_click=delete_scenario)
246
+ with column_3:
247
+ st.button('Load Scenario', on_click=load_scenario)
248
+
249
+ selected_scenario_details = saved_scenarios[selected_scenario]
250
+
251
+ pd.set_option('display.max_colwidth', 100)
252
+
253
+ st.markdown(create_scenario_summary(selected_scenario_details).transform(transform).style.set_table_styles(
254
+ [{
255
+ 'selector': 'th',
256
+ 'props': [('background-color', '#11B6BD')]
257
+ },
258
+ {
259
+ 'selector' : 'tr:nth-child(even)',
260
+ 'props' : [('background-color', '#11B6BD')]
261
+ }
262
+ ]).to_html(),unsafe_allow_html=True)
263
+
264
+ elif auth_status == False:
265
+ st.error('Username/Password is incorrect')
266
+
267
+ if auth_status != True:
268
+ try:
269
+ username_forgot_pw, email_forgot_password, random_password = authenticator.forgot_password('Forgot password')
270
+ if username_forgot_pw:
271
+ st.success('New password sent securely')
272
+ # Random password to be transferred to user securely
273
+ elif username_forgot_pw == False:
274
+ st.error('Username not found')
275
+ except Exception as e:
276
+ st.error(e)