Pragya Jatav commited on
Commit
110db19
·
2 Parent(s): 8dc7735 fff1872
Files changed (1) hide show
  1. response_curves_model_quality.py +0 -244
response_curves_model_quality.py DELETED
@@ -1,244 +0,0 @@
1
- import pandas as pd
2
- import numpy as np
3
- import matplotlib.pyplot as plt
4
- from scipy.optimize import curve_fit
5
- from sklearn.preprocessing import MinMaxScaler
6
- import warnings
7
- warnings.filterwarnings("ignore")
8
- import plotly.graph_objects as go
9
-
10
- ## reading input data
11
- df= pd.read_csv('response_curves_input_file.csv')
12
- df.dropna(inplace=True)
13
- df['Date'] = pd.to_datetime(df['Date'])
14
- df.reset_index(inplace=True)
15
-
16
- channel_cols = [
17
- 'BroadcastTV',
18
- 'CableTV',
19
- 'Connected&OTTTV',
20
- 'DisplayProspecting',
21
- 'DisplayRetargeting',
22
- 'Video',
23
- 'SocialProspecting',
24
- 'SocialRetargeting',
25
- 'SearchBrand',
26
- 'SearchNon-brand',
27
- 'DigitalPartners',
28
- 'Audio',
29
- 'Email']
30
- spend_cols = [
31
- 'tv_broadcast_spend',
32
- 'tv_cable_spend',
33
- 'stream_video_spend',
34
- 'disp_prospect_spend',
35
- 'disp_retarget_spend',
36
- 'olv_spend',
37
- 'social_prospect_spend',
38
- 'social_retarget_spend',
39
- 'search_brand_spend',
40
- 'search_nonbrand_spend',
41
- 'cm_spend',
42
- 'audio_spend',
43
- 'email_spend']
44
- prospect_cols = [
45
- 'Broadcast TV_Prospects',
46
- 'Cable TV_Prospects',
47
- 'Connected & OTT TV_Prospects',
48
- 'Display Prospecting_Prospects',
49
- 'Display Retargeting_Prospects',
50
- 'Video_Prospects',
51
- 'Social Prospecting_Prospects',
52
- 'Social Retargeting_Prospects',
53
- 'Search Brand_Prospects',
54
- 'Search Non-brand_Prospects',
55
- 'Digital Partners_Prospects',
56
- 'Audio_Prospects',
57
- 'Email_Prospects']
58
-
59
- def hill_equation(x, Kd, n):
60
- return x**n / (Kd**n + x**n)
61
-
62
-
63
- def hill_func(x_data,y_data,x_minmax,y_minmax):
64
- # Fit the Hill equation to the data
65
- initial_guess = [1, 1] # Initial guess for Kd and n
66
- params, covariance = curve_fit(hill_equation, x_data, y_data, p0=initial_guess,maxfev = 1000)
67
-
68
- # Extract the fitted parameters
69
- Kd_fit, n_fit = params
70
-
71
-
72
- # Generate y values using the fitted parameters
73
- y_fit = hill_equation(x_data, Kd_fit, n_fit)
74
-
75
- x_data_inv = x_minmax.inverse_transform(np.array(x_data).reshape(-1,1))
76
- y_data_inv = y_minmax.inverse_transform(np.array(y_data).reshape(-1,1))
77
- y_fit_inv = y_minmax.inverse_transform(np.array(y_fit).reshape(-1,1))
78
-
79
- # # Plot the original data and the fitted curve
80
- # plt.scatter(x_data_inv, y_data_inv, label='Actual Data')
81
- # plt.scatter(x_data_inv, y_fit_inv, label='Fit Data',color='red')
82
- # # plt.line(x_data_inv, y_fit_inv, label=f'Fitted Hill Equation (Kd={Kd_fit:.2f}, n={n_fit:.2f})', color='red')
83
- # plt.xlabel('Ligand Concentration')
84
- # plt.ylabel('Fraction of Binding')
85
- # plt.title('Fitting Hill Equation to Data')
86
- # plt.legend()
87
- # plt.show()
88
-
89
- return y_fit,y_fit_inv,Kd_fit, n_fit
90
-
91
- def data_output(channel,X,y,y_fit_inv,x_ext_data,y_fit_inv_ext):
92
- fit_col = 'Fit_Data_'+channel
93
- plot_df = pd.DataFrame()
94
-
95
- plot_df[f'{channel}_Spends'] = X
96
-
97
- plot_df['Date'] = df['Date']
98
- plot_df['MAT'] = df['MAT']
99
-
100
-
101
-
102
- y_fit_inv_v2 = []
103
- for i in range(len(y_fit_inv)):
104
- y_fit_inv_v2.append(y_fit_inv[i][0])
105
-
106
- plot_df[fit_col] = y_fit_inv_v2
107
-
108
- # adding extra data
109
-
110
- y_fit_inv_v2_ext = []
111
- for i in range(len(y_fit_inv_ext)):
112
- y_fit_inv_v2_ext.append(y_fit_inv_ext[i][0])
113
-
114
- # print(x_ext_data)
115
- ext_df = pd.DataFrame()
116
- ext_df[f'{channel}_Spends'] = x_ext_data
117
- ext_df[fit_col] = y_fit_inv_v2_ext
118
-
119
- ext_df['Date'] = [
120
- np.datetime64('1950-01-01'),
121
- np.datetime64('1950-06-15'),
122
- np.datetime64('1950-12-31')
123
- ]
124
-
125
- ext_df['MAT'] = ["ext","ext","ext"]
126
-
127
- print(ext_df)
128
- plot_df= plot_df.append(ext_df)
129
- return plot_df
130
-
131
- def input_data(df,spend_col,prospect_col):
132
- X = np.array(df[spend_col].tolist())
133
- y = np.array(df[prospect_col].tolist())
134
-
135
- x_minmax = MinMaxScaler()
136
- x_scaled = x_minmax.fit_transform(df[[spend_col]])
137
- x_data = []
138
- for i in range(len(x_scaled)):
139
- x_data.append(x_scaled[i][0])
140
-
141
- y_minmax = MinMaxScaler()
142
- y_scaled = y_minmax.fit_transform(df[[prospect_col]])
143
- y_data = []
144
- for i in range(len(y_scaled)):
145
- y_data.append(y_scaled[i][0])
146
-
147
- return X,y,x_data,y_data,x_minmax,y_minmax
148
-
149
- def extend_s_curve(x_max,x_minmax,y_minmax, Kd_fit, n_fit):
150
- print(x_max)
151
- x_ext_data = [x_max*1.2,x_max*1.3,x_max*1.5]
152
- # x_ext_data = [1500000,2000000,2500000]
153
- # x_ext_data = [x_max+100,x_max+200,x_max+5000]
154
- x_scaled = x_minmax.transform(pd.DataFrame(x_ext_data))
155
- x_data = []
156
- for i in range(len(x_scaled)):
157
- x_data.append(x_scaled[i][0])
158
-
159
- print(x_data)
160
- y_fit = hill_equation(x_data, Kd_fit, n_fit)
161
- y_fit_inv = y_minmax.inverse_transform(np.array(y_fit).reshape(-1,1))
162
-
163
- return x_ext_data,y_fit_inv
164
-
165
- def fit_data(spend_col,prospect_col,channel):
166
- ### getting k and n parameters
167
- temp_df = df[df[spend_col]>0]
168
- temp_df.reset_index(inplace=True)
169
-
170
- X,y,x_data,y_data,x_minmax,y_minmax = input_data(temp_df,spend_col,prospect_col)
171
- y_fit, y_fit_inv, Kd_fit, n_fit = hill_func(x_data,y_data,x_minmax,y_minmax)
172
- print('k: ',Kd_fit)
173
- print('n: ', n_fit)
174
-
175
- ##### extend_s_curve
176
- x_ext_data,y_fit_inv_ext= extend_s_curve(temp_df[spend_col].max(),x_minmax,y_minmax, Kd_fit, n_fit)
177
-
178
- plot_df = data_output(channel,X,y,y_fit_inv,x_ext_data,y_fit_inv_ext)
179
- return plot_df
180
-
181
- plotly_data = fit_data(spend_cols[0],prospect_cols[0],channel_cols[0])
182
- plotly_data.tail()
183
-
184
- for i in range(1,13):
185
- print(i)
186
- pdf = fit_data(spend_cols[i],prospect_cols[i],channel_cols[i])
187
- plotly_data = plotly_data.merge(pdf,on = ["Date","MAT"],how = "left")
188
-
189
- def response_curves(channel,x_modified,y_modified):
190
-
191
- # Initialize the Plotly figure
192
- fig = go.Figure()
193
-
194
- x_col = (channel+"_Spends").replace('\xa0', '')
195
- y_col = ("Fit_Data_"+channel).replace('\xa0', '')
196
-
197
- # fig.add_trace(go.Scatter(
198
- # x=plotly_data[x_col],
199
- # y=plotly_data[y_col],
200
- # mode='markers',
201
- # name=x_col.replace('_Spends', '')
202
- # ))
203
-
204
- fig.add_trace(go.Scatter(
205
- x=plotly_data.sort_values(by=x_col, ascending=True)[x_col],
206
- y=plotly_data.sort_values(by=x_col, ascending=True)[y_col],
207
- mode='lines+markers',
208
- name=x_col.replace('_Spends', '')
209
- ))
210
-
211
- plotly_data2 = plotly_data.copy()
212
- # .dropna(subset=[x_col]).reset_index(inplace = True)
213
- fig.add_trace(go.Scatter(
214
- x=plotly_data[plotly_data2['Date'] == plotly_data2['Date'].max()][x_col],
215
- y=plotly_data[plotly_data2['Date'] == plotly_data2['Date'].max()][y_col],
216
- mode='markers',
217
- marker=dict(
218
- size=13 # Adjust the size value to make the markers larger or smaller
219
- , color = 'green'
220
- ),
221
- name="Current Spends"
222
- ))
223
-
224
- fig.add_trace(go.Scatter(
225
- x=[x_modified],
226
- y=[y_modified],
227
- mode='markers',
228
- marker=dict(
229
- size=13 # Adjust the size value to make the markers larger or smaller
230
- , color = 'blue'
231
- ),
232
- name="Optimised Spends"
233
- ))
234
-
235
- # Update layout with titles
236
- fig.update_layout(
237
- title=channel+' Response Curve',
238
- xaxis_title='Weekly Spends',
239
- yaxis_title='Prospects'
240
- )
241
-
242
- # Show the figure
243
- return fig
244
-