howardroark commited on
Commit
dcbe801
·
1 Parent(s): 45da72a

charts update

Browse files
Files changed (2) hide show
  1. app.py +207 -45
  2. requirements.txt +1 -1
app.py CHANGED
@@ -5,6 +5,9 @@ from models_utils.ml_models import ModelTraining
5
  from eval_utils.evaluation import ModelEvaluator
6
 
7
  import matplotlib.pyplot as plt
 
 
 
8
  import numpy as np
9
  import pandas as pd
10
  import streamlit as st
@@ -104,9 +107,12 @@ if tabs == "Exploratory analysis":
104
  st.write('We can also visualize the tradeoff between conversions and platform benefit by plotting the mean benefit per user on the y-axis and the mean conversion rate on the x-axis, for each treatment group.')
105
  mean_benefit_vs_conversion = eda.compute_mean_benefit_vs_conversion()
106
 
107
- fig, ax = plt.subplots()
108
- mean_benefit_vs_conversion.plot.scatter(x='conversion', y='benefit', c='DarkBlue', s=50, ax=ax)
109
- st.pyplot(fig)
 
 
 
110
 
111
  st.write('''
112
  We further compute the Average Treatment Effect (ATE) for both the mean conversion rate and the mean benefit per user:
@@ -116,9 +122,12 @@ if tabs == "Exploratory analysis":
116
  ''')
117
  mean_conversions_ate = eda.compute_ate()
118
 
119
- fig, ax = plt.subplots()
120
- mean_conversions_ate.plot.scatter(x='conversion', y='benefit', c='DarkBlue', s=50, ax=ax)
121
- st.pyplot(fig)
 
 
 
122
 
123
  st.subheader('Feature importance')
124
 
@@ -131,10 +140,13 @@ if tabs == "Exploratory analysis":
131
 
132
  feature_importance = FeatureImportance(uplift_sim.df, X_names, y_name = 'conversion', treatment_group = treatment_group)
133
  fi = feature_importance.compute_feature_importance()
134
- fig, ax = plt.subplots()
135
  di_df_sorted = fi.sort_values(by='score', ascending=False)
136
- di_df_sorted[['feature', 'score']].plot.barh(x='feature', y='score', ax=ax)
137
- st.pyplot(fig)
 
 
 
138
 
139
  st.write("""
140
  - AccountLifetimeIndex: Longer-standing accounts are key predictors of customer response to promotions \n
@@ -194,13 +206,26 @@ if tabs == "Model training":
194
  st.subheader('Feature Importances')
195
  fig, ax = plt.subplots()
196
 
 
 
 
 
 
 
 
 
197
  for k, v in feature_importances.items():
198
- st.write(f"Feature importance for {k}")
199
- v.plot(kind='barh', ax=ax)
200
- ax.set_xlabel("Importance")
201
- ax.set_ylabel("Feature")
202
- ax.set_title(f"Feature Importance for {model_type}")
203
- st.pyplot(fig)
 
 
 
 
 
204
 
205
  else:
206
  st.error("Please generate and preprocess the dataset first.")
@@ -245,47 +270,184 @@ if tabs == "Economic effects":
245
 
246
  # Plotting CATE Conversion
247
  st.subheader("CATE Conversion vs Targeted Population")
248
- fig, ax_conversion = plt.subplots()
249
- for discount, color in zip(discounts, ['b', 'g', 'y']):
250
- qini_conversions[discount].plot(ax=ax_conversion, x='index', y='S', color=color)
251
- qini_conversions[discount].plot(ax=ax_conversion, x='index', y='Random', color='r', ls='--')
252
 
253
- ax_conversion.legend([f'{d} model' for d in discounts] + [f'{d} random' for d in discounts], prop={'size': 10})
254
- ax_conversion.set_xlabel('Fraction of Targeted Users')
255
- ax_conversion.set_ylabel('CATE Conversion')
256
- ax_conversion.set_title('CATE Conversion vs Targeted Population')
257
- st.pyplot(fig)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
 
259
  # Plotting CATE Benefit
260
  st.subheader("CATE Benefit vs Targeted Population")
261
- fig, ax_benefit = plt.subplots()
262
- for discount, color in zip(discounts, ['b', 'g', 'y']):
263
- qini_benefits[discount].plot(ax=ax_benefit, x='index', y='S', color=color)
264
- qini_benefits[discount].plot(ax=ax_benefit, x='index', y='Random', color='r', ls='--')
265
 
266
- ax_benefit.legend([f'{d} model' for d in discounts] + [f'{d} random' for d in discounts], prop={'size': 10})
267
- ax_benefit.set_xlabel('Fraction of Targeted Users')
268
- ax_benefit.set_ylabel('CATE Benefit')
269
- ax_benefit.set_title('CATE Benefit vs Targeted Population')
270
- st.pyplot(fig)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
 
272
  # Plotting CATE Benefit vs CATE Conversion
273
  st.subheader("CATE Benefit vs CATE Conversion")
274
- fig, ax_comp = plt.subplots()
275
- colors = ['b', 'g', 'y']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  for i, discount in enumerate(discounts):
277
- qini_conc_test = pd.concat([qini_conversions[discount][['S']], qini_benefits[discount][['S']]], axis=1)
278
  qini_conc_test.columns = ['cate_conversion', 'cate_benefit']
279
- qini_conc_test.plot(ax=ax_comp, x='cate_conversion', y='cate_benefit', color=colors[i], label=f'{discount} model')
280
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  st.write('To simplify the comparison, we can plot the CATE Benefit as a function of the CATE conversion.')
282
- st.write('In the last plot for example we can see that there is a region where offering 15% discount to a targeted group of users is more efficient than giving 10% to everyone. We can obtain the same impact in overall conversion uplift while reducing our benefit loss considerably.')
283
 
284
- ax_comp.legend(prop={'size': 10})
285
- ax_comp.set_xlabel('CATE Conversion')
286
- ax_comp.set_ylabel('CATE Benefit')
287
- ax_comp.set_title('CATE Benefit vs CATE Conversion')
288
- st.pyplot(fig)
289
 
290
  else:
291
- st.error("Please ensure the model is trained and the dataset is prepared.")
 
5
  from eval_utils.evaluation import ModelEvaluator
6
 
7
  import matplotlib.pyplot as plt
8
+ import plotly.express as px
9
+ import plotly.graph_objects as go
10
+
11
  import numpy as np
12
  import pandas as pd
13
  import streamlit as st
 
107
  st.write('We can also visualize the tradeoff between conversions and platform benefit by plotting the mean benefit per user on the y-axis and the mean conversion rate on the x-axis, for each treatment group.')
108
  mean_benefit_vs_conversion = eda.compute_mean_benefit_vs_conversion()
109
 
110
+ # fig, ax = plt.subplots()
111
+ # mean_benefit_vs_conversion.plot.scatter(x='conversion', y='benefit', c='DarkBlue', s=50, ax=ax)
112
+ # st.pyplot(fig)
113
+
114
+ fig = px.scatter(mean_benefit_vs_conversion, x='conversion', y='benefit', color_discrete_sequence=['LightBlue'], size_max=50)
115
+ st.plotly_chart(fig)
116
 
117
  st.write('''
118
  We further compute the Average Treatment Effect (ATE) for both the mean conversion rate and the mean benefit per user:
 
122
  ''')
123
  mean_conversions_ate = eda.compute_ate()
124
 
125
+ # fig, ax = plt.subplots()
126
+ # mean_conversions_ate.plot.scatter(x='conversion', y='benefit', c='DarkBlue', s=50, ax=ax)
127
+ # st.pyplot(fig)
128
+
129
+ fig = px.scatter(mean_conversions_ate, x='conversion', y='benefit', color_discrete_sequence=['LightBlue'], size_max=50)
130
+ st.plotly_chart(fig)
131
 
132
  st.subheader('Feature importance')
133
 
 
140
 
141
  feature_importance = FeatureImportance(uplift_sim.df, X_names, y_name = 'conversion', treatment_group = treatment_group)
142
  fi = feature_importance.compute_feature_importance()
143
+ # fig, ax = plt.subplots()
144
  di_df_sorted = fi.sort_values(by='score', ascending=False)
145
+ # di_df_sorted[['feature', 'score']].plot.barh(x='feature', y='score', ax=ax)
146
+ # st.pyplot(fig)
147
+ fig = px.bar(di_df_sorted, y='feature', x='score', orientation='h')
148
+ st.plotly_chart(fig)
149
+
150
 
151
  st.write("""
152
  - AccountLifetimeIndex: Longer-standing accounts are key predictors of customer response to promotions \n
 
206
  st.subheader('Feature Importances')
207
  fig, ax = plt.subplots()
208
 
209
+ # for k, v in feature_importances.items():
210
+ # st.write(f"Feature importance for {k}")
211
+ # v.plot(kind='barh', ax=ax)
212
+ # ax.set_xlabel("Importance")
213
+ # ax.set_ylabel("Feature")
214
+ # ax.set_title(f"Feature Importance for {model_type}")
215
+ # st.pyplot(fig)
216
+
217
  for k, v in feature_importances.items():
218
+ # Reset index if 'v' is a Series or its index contains the feature names
219
+ if isinstance(v, pd.Series) or 'feature' not in v.columns:
220
+ v = v.reset_index()
221
+ v.columns = ['feature', 'score'] # Adjust column names accordingly
222
+
223
+ # Assuming 'v' now has columns ['feature', 'score']
224
+ fig = px.bar(v, y='feature', x='score', orientation='h',
225
+ title=f"Feature Importance for {model_type} ({k})",
226
+ labels={'score': 'Importance', 'feature': 'Feature'})
227
+ fig.update_layout(yaxis={'categoryorder':'total ascending'}) # Optional: This sorts the bars
228
+ st.plotly_chart(fig)
229
 
230
  else:
231
  st.error("Please generate and preprocess the dataset first.")
 
270
 
271
  # Plotting CATE Conversion
272
  st.subheader("CATE Conversion vs Targeted Population")
 
 
 
 
273
 
274
+ # fig, ax_conversion = plt.subplots()
275
+ # for discount, color in zip(discounts, ['b', 'g', 'y']):
276
+ # qini_conversions[discount].plot(ax=ax_conversion, x='index', y='S', color=color)
277
+ # qini_conversions[discount].plot(ax=ax_conversion, x='index', y='Random', color='r', ls='--')
278
+
279
+ # ax_conversion.legend([f'{d} model' for d in discounts] + [f'{d} random' for d in discounts], prop={'size': 10})
280
+ # ax_conversion.set_xlabel('Fraction of Targeted Users')
281
+ # ax_conversion.set_ylabel('CATE Conversion')
282
+ # ax_conversion.set_title('CATE Conversion vs Targeted Population')
283
+ # st.pyplot(fig)
284
+
285
+ # Initialize a figure object
286
+ fig = go.Figure()
287
+
288
+ # Define colors for each discount level and the random baseline
289
+ colors = ['blue', 'green', 'yellow']
290
+ random_line_dash = 'dash'
291
+
292
+ # Iterate over each discount to add its line to the plot
293
+ for i, discount in enumerate(discounts):
294
+ # Add the model line
295
+ fig.add_trace(go.Scatter(x=qini_conversions[discount]['index'],
296
+ y=qini_conversions[discount]['S'],
297
+ mode='lines',
298
+ name=f'{discount} model',
299
+ line=dict(color=colors[i])))
300
+
301
+ # Add the random baseline line
302
+ fig.add_trace(go.Scatter(x=qini_conversions[discount]['index'],
303
+ y=qini_conversions[discount]['Random'],
304
+ mode='lines',
305
+ name=f'{discount} random',
306
+ line=dict(color='red', dash=random_line_dash)))
307
+
308
+ # Update the layout of the figure
309
+ fig.update_layout(
310
+ title='CATE Conversion vs Targeted Population',
311
+ xaxis_title='Fraction of Targeted Users',
312
+ yaxis_title='CATE Conversion',
313
+ legend_title='Legend',
314
+ legend=dict(
315
+ x=0,
316
+ y=1,
317
+ traceorder='normal',
318
+ font=dict(
319
+ size=10,
320
+ )
321
+ )
322
+ )
323
+
324
+ # Display the figure in Streamlit
325
+ st.plotly_chart(fig)
326
 
327
  # Plotting CATE Benefit
328
  st.subheader("CATE Benefit vs Targeted Population")
 
 
 
 
329
 
330
+ # fig, ax_benefit = plt.subplots()
331
+ # for discount, color in zip(discounts, ['b', 'g', 'y']):
332
+ # qini_benefits[discount].plot(ax=ax_benefit, x='index', y='S', color=color)
333
+ # qini_benefits[discount].plot(ax=ax_benefit, x='index', y='Random', color='r', ls='--')
334
+
335
+ # ax_benefit.legend([f'{d} model' for d in discounts] + [f'{d} random' for d in discounts], prop={'size': 10})
336
+ # ax_benefit.set_xlabel('Fraction of Targeted Users')
337
+ # ax_benefit.set_ylabel('CATE Benefit')
338
+ # ax_benefit.set_title('CATE Benefit vs Targeted Population')
339
+ # st.pyplot(fig)
340
+
341
+
342
+ # Initialize a figure object
343
+ fig = go.Figure()
344
+
345
+ # Define colors for each discount level and the random baseline
346
+ colors = ['blue', 'green', 'yellow']
347
+ random_line_dash = 'dash'
348
+
349
+ # Iterate over each discount to add its line to the plot
350
+ for i, discount in enumerate(discounts):
351
+ # Add the model line
352
+ fig.add_trace(go.Scatter(x=qini_benefits[discount]['index'],
353
+ y=qini_benefits[discount]['S'],
354
+ mode='lines',
355
+ name=f'{discount} model',
356
+ line=dict(color=colors[i])))
357
+
358
+ # Add the random baseline line
359
+ fig.add_trace(go.Scatter(x=qini_benefits[discount]['index'],
360
+ y=qini_benefits[discount]['Random'],
361
+ mode='lines',
362
+ name=f'{discount} random',
363
+ line=dict(color='red', dash=random_line_dash)))
364
+
365
+ # Update the layout of the figure
366
+ fig.update_layout(
367
+ title='CATE Benefit vs Targeted Population',
368
+ xaxis_title='Fraction of Targeted Users',
369
+ yaxis_title='CATE Benefit',
370
+ legend_title='Legend',
371
+ legend=dict(
372
+ x=0,
373
+ y=1,
374
+ traceorder='normal',
375
+ font=dict(
376
+ size=10,
377
+ )
378
+ )
379
+ )
380
+
381
+ # Display the figure in Streamlit
382
+ st.plotly_chart(fig)
383
+
384
 
385
  # Plotting CATE Benefit vs CATE Conversion
386
  st.subheader("CATE Benefit vs CATE Conversion")
387
+
388
+ # fig, ax_comp = plt.subplots()
389
+ # colors = ['b', 'g', 'y']
390
+ # for i, discount in enumerate(discounts):
391
+ # qini_conc_test = pd.concat([qini_conversions[discount][['S']], qini_benefits[discount][['S']]], axis=1)
392
+ # qini_conc_test.columns = ['cate_conversion', 'cate_benefit']
393
+ # qini_conc_test.plot(ax=ax_comp, x='cate_conversion', y='cate_benefit', color=colors[i], label=f'{discount} model')
394
+
395
+ # st.write('To simplify the comparison, we can plot the CATE Benefit as a function of the CATE conversion.')
396
+ # st.write('In the last plot for example we can see that there is a region where offering 15% discount to a targeted group of users is more efficient than giving 10% to everyone. We can obtain the same impact in overall conversion uplift while reducing our benefit loss considerably.')
397
+
398
+ # ax_comp.legend(prop={'size': 10})
399
+ # ax_comp.set_xlabel('CATE Conversion')
400
+ # ax_comp.set_ylabel('CATE Benefit')
401
+ # ax_comp.set_title('CATE Benefit vs CATE Conversion')
402
+ # st.pyplot(fig)
403
+
404
+ # Initialize a figure object
405
+ fig = go.Figure()
406
+
407
+ # Define colors for each discount level
408
+ colors = ['blue', 'green', 'yellow']
409
+
410
+ # Iterate over each discount to add its scatter plot to the figure
411
  for i, discount in enumerate(discounts):
412
+ qini_conc_test = pd.concat([qini_conversions[discount]['S'], qini_benefits[discount]['S']], axis=1)
413
  qini_conc_test.columns = ['cate_conversion', 'cate_benefit']
414
+
415
+ # Add the scatter plot for each discount level
416
+ # Adjust marker size with `size` and line width with `line=dict(width=2)`
417
+ fig.add_trace(go.Scatter(x=qini_conc_test['cate_conversion'],
418
+ y=qini_conc_test['cate_benefit'],
419
+ mode='markers+lines',
420
+ name=f'{discount} model',
421
+ marker=dict(color=colors[i], size=6), # Adjust marker size here
422
+ line=dict(width=2))) # Adjust line width here
423
+
424
+ # Update the layout of the figure to adjust aspect ratio and margins if needed
425
+ fig.update_layout(
426
+ title='CATE Benefit vs CATE Conversion',
427
+ xaxis_title='CATE Conversion',
428
+ yaxis_title='CATE Benefit',
429
+ legend_title='Legend',
430
+ legend=dict(
431
+ x=0,
432
+ y=1,
433
+ traceorder='normal',
434
+ font=dict(
435
+ size=10,
436
+ )
437
+ ),
438
+ # Optionally adjust plot and margin size for a "thinner" appearance
439
+ margin=dict(l=20, r=20, t=50, b=20), # Adjust margins to change plot boundary
440
+ height=400, # Adjust height for overall "thinness"
441
+ width=600 # Adjust width as needed
442
+ )
443
+
444
+ # Display the figure in Streamlit
445
+ st.plotly_chart(fig)
446
+
447
+
448
  st.write('To simplify the comparison, we can plot the CATE Benefit as a function of the CATE conversion.')
449
+ st.write('In the last plot for example, we can see that there is a region where offering a 15% discount to a targeted group of users is more efficient than giving 10% to everyone. We can obtain the same impact on overall conversion uplift while reducing our benefit loss considerably.')
450
 
 
 
 
 
 
451
 
452
  else:
453
+ st.error("Please ensure the model is trained and the dataset is prepared.")
requirements.txt CHANGED
@@ -3,4 +3,4 @@ matplotlib==3.8.3
3
  numpy==1.23.5
4
  scikit_learn==1.4.1.post1
5
  streamlit==1.32.2
6
- xgboost==2.0.3
 
3
  numpy==1.23.5
4
  scikit_learn==1.4.1.post1
5
  streamlit==1.32.2
6
+ xgboost==2.0.3