Spaces:
Sleeping
Sleeping
Commit
·
dcbe801
1
Parent(s):
45da72a
charts update
Browse files- app.py +207 -45
- requirements.txt +1 -1
app.py
CHANGED
@@ -5,6 +5,9 @@ from models_utils.ml_models import ModelTraining
|
|
5 |
from eval_utils.evaluation import ModelEvaluator
|
6 |
|
7 |
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
8 |
import numpy as np
|
9 |
import pandas as pd
|
10 |
import streamlit as st
|
@@ -104,9 +107,12 @@ if tabs == "Exploratory analysis":
|
|
104 |
st.write('We can also visualize the tradeoff between conversions and platform benefit by plotting the mean benefit per user on the y-axis and the mean conversion rate on the x-axis, for each treatment group.')
|
105 |
mean_benefit_vs_conversion = eda.compute_mean_benefit_vs_conversion()
|
106 |
|
107 |
-
fig, ax = plt.subplots()
|
108 |
-
mean_benefit_vs_conversion.plot.scatter(x='conversion', y='benefit', c='DarkBlue', s=50, ax=ax)
|
109 |
-
st.pyplot(fig)
|
|
|
|
|
|
|
110 |
|
111 |
st.write('''
|
112 |
We further compute the Average Treatment Effect (ATE) for both the mean conversion rate and the mean benefit per user:
|
@@ -116,9 +122,12 @@ if tabs == "Exploratory analysis":
|
|
116 |
''')
|
117 |
mean_conversions_ate = eda.compute_ate()
|
118 |
|
119 |
-
fig, ax = plt.subplots()
|
120 |
-
mean_conversions_ate.plot.scatter(x='conversion', y='benefit', c='DarkBlue', s=50, ax=ax)
|
121 |
-
st.pyplot(fig)
|
|
|
|
|
|
|
122 |
|
123 |
st.subheader('Feature importance')
|
124 |
|
@@ -131,10 +140,13 @@ if tabs == "Exploratory analysis":
|
|
131 |
|
132 |
feature_importance = FeatureImportance(uplift_sim.df, X_names, y_name = 'conversion', treatment_group = treatment_group)
|
133 |
fi = feature_importance.compute_feature_importance()
|
134 |
-
fig, ax = plt.subplots()
|
135 |
di_df_sorted = fi.sort_values(by='score', ascending=False)
|
136 |
-
di_df_sorted[['feature', 'score']].plot.barh(x='feature', y='score', ax=ax)
|
137 |
-
st.pyplot(fig)
|
|
|
|
|
|
|
138 |
|
139 |
st.write("""
|
140 |
- AccountLifetimeIndex: Longer-standing accounts are key predictors of customer response to promotions \n
|
@@ -194,13 +206,26 @@ if tabs == "Model training":
|
|
194 |
st.subheader('Feature Importances')
|
195 |
fig, ax = plt.subplots()
|
196 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
for k, v in feature_importances.items():
|
198 |
-
|
199 |
-
v.
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
|
|
|
|
|
|
|
|
|
|
204 |
|
205 |
else:
|
206 |
st.error("Please generate and preprocess the dataset first.")
|
@@ -245,47 +270,184 @@ if tabs == "Economic effects":
|
|
245 |
|
246 |
# Plotting CATE Conversion
|
247 |
st.subheader("CATE Conversion vs Targeted Population")
|
248 |
-
fig, ax_conversion = plt.subplots()
|
249 |
-
for discount, color in zip(discounts, ['b', 'g', 'y']):
|
250 |
-
qini_conversions[discount].plot(ax=ax_conversion, x='index', y='S', color=color)
|
251 |
-
qini_conversions[discount].plot(ax=ax_conversion, x='index', y='Random', color='r', ls='--')
|
252 |
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
258 |
|
259 |
# Plotting CATE Benefit
|
260 |
st.subheader("CATE Benefit vs Targeted Population")
|
261 |
-
fig, ax_benefit = plt.subplots()
|
262 |
-
for discount, color in zip(discounts, ['b', 'g', 'y']):
|
263 |
-
qini_benefits[discount].plot(ax=ax_benefit, x='index', y='S', color=color)
|
264 |
-
qini_benefits[discount].plot(ax=ax_benefit, x='index', y='Random', color='r', ls='--')
|
265 |
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
271 |
|
272 |
# Plotting CATE Benefit vs CATE Conversion
|
273 |
st.subheader("CATE Benefit vs CATE Conversion")
|
274 |
-
|
275 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
for i, discount in enumerate(discounts):
|
277 |
-
qini_conc_test = pd.concat([qini_conversions[discount][
|
278 |
qini_conc_test.columns = ['cate_conversion', 'cate_benefit']
|
279 |
-
|
280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
st.write('To simplify the comparison, we can plot the CATE Benefit as a function of the CATE conversion.')
|
282 |
-
st.write('In the last plot for example we can see that there is a region where offering 15% discount to a targeted group of users is more efficient than giving 10% to everyone. We can obtain the same impact
|
283 |
|
284 |
-
ax_comp.legend(prop={'size': 10})
|
285 |
-
ax_comp.set_xlabel('CATE Conversion')
|
286 |
-
ax_comp.set_ylabel('CATE Benefit')
|
287 |
-
ax_comp.set_title('CATE Benefit vs CATE Conversion')
|
288 |
-
st.pyplot(fig)
|
289 |
|
290 |
else:
|
291 |
-
st.error("Please ensure the model is trained and the dataset is prepared.")
|
|
|
5 |
from eval_utils.evaluation import ModelEvaluator
|
6 |
|
7 |
import matplotlib.pyplot as plt
|
8 |
+
import plotly.express as px
|
9 |
+
import plotly.graph_objects as go
|
10 |
+
|
11 |
import numpy as np
|
12 |
import pandas as pd
|
13 |
import streamlit as st
|
|
|
107 |
st.write('We can also visualize the tradeoff between conversions and platform benefit by plotting the mean benefit per user on the y-axis and the mean conversion rate on the x-axis, for each treatment group.')
|
108 |
mean_benefit_vs_conversion = eda.compute_mean_benefit_vs_conversion()
|
109 |
|
110 |
+
# fig, ax = plt.subplots()
|
111 |
+
# mean_benefit_vs_conversion.plot.scatter(x='conversion', y='benefit', c='DarkBlue', s=50, ax=ax)
|
112 |
+
# st.pyplot(fig)
|
113 |
+
|
114 |
+
fig = px.scatter(mean_benefit_vs_conversion, x='conversion', y='benefit', color_discrete_sequence=['LightBlue'], size_max=50)
|
115 |
+
st.plotly_chart(fig)
|
116 |
|
117 |
st.write('''
|
118 |
We further compute the Average Treatment Effect (ATE) for both the mean conversion rate and the mean benefit per user:
|
|
|
122 |
''')
|
123 |
mean_conversions_ate = eda.compute_ate()
|
124 |
|
125 |
+
# fig, ax = plt.subplots()
|
126 |
+
# mean_conversions_ate.plot.scatter(x='conversion', y='benefit', c='DarkBlue', s=50, ax=ax)
|
127 |
+
# st.pyplot(fig)
|
128 |
+
|
129 |
+
fig = px.scatter(mean_conversions_ate, x='conversion', y='benefit', color_discrete_sequence=['LightBlue'], size_max=50)
|
130 |
+
st.plotly_chart(fig)
|
131 |
|
132 |
st.subheader('Feature importance')
|
133 |
|
|
|
140 |
|
141 |
feature_importance = FeatureImportance(uplift_sim.df, X_names, y_name = 'conversion', treatment_group = treatment_group)
|
142 |
fi = feature_importance.compute_feature_importance()
|
143 |
+
# fig, ax = plt.subplots()
|
144 |
di_df_sorted = fi.sort_values(by='score', ascending=False)
|
145 |
+
# di_df_sorted[['feature', 'score']].plot.barh(x='feature', y='score', ax=ax)
|
146 |
+
# st.pyplot(fig)
|
147 |
+
fig = px.bar(di_df_sorted, y='feature', x='score', orientation='h')
|
148 |
+
st.plotly_chart(fig)
|
149 |
+
|
150 |
|
151 |
st.write("""
|
152 |
- AccountLifetimeIndex: Longer-standing accounts are key predictors of customer response to promotions \n
|
|
|
206 |
st.subheader('Feature Importances')
|
207 |
fig, ax = plt.subplots()
|
208 |
|
209 |
+
# for k, v in feature_importances.items():
|
210 |
+
# st.write(f"Feature importance for {k}")
|
211 |
+
# v.plot(kind='barh', ax=ax)
|
212 |
+
# ax.set_xlabel("Importance")
|
213 |
+
# ax.set_ylabel("Feature")
|
214 |
+
# ax.set_title(f"Feature Importance for {model_type}")
|
215 |
+
# st.pyplot(fig)
|
216 |
+
|
217 |
for k, v in feature_importances.items():
|
218 |
+
# Reset index if 'v' is a Series or its index contains the feature names
|
219 |
+
if isinstance(v, pd.Series) or 'feature' not in v.columns:
|
220 |
+
v = v.reset_index()
|
221 |
+
v.columns = ['feature', 'score'] # Adjust column names accordingly
|
222 |
+
|
223 |
+
# Assuming 'v' now has columns ['feature', 'score']
|
224 |
+
fig = px.bar(v, y='feature', x='score', orientation='h',
|
225 |
+
title=f"Feature Importance for {model_type} ({k})",
|
226 |
+
labels={'score': 'Importance', 'feature': 'Feature'})
|
227 |
+
fig.update_layout(yaxis={'categoryorder':'total ascending'}) # Optional: This sorts the bars
|
228 |
+
st.plotly_chart(fig)
|
229 |
|
230 |
else:
|
231 |
st.error("Please generate and preprocess the dataset first.")
|
|
|
270 |
|
271 |
# Plotting CATE Conversion
|
272 |
st.subheader("CATE Conversion vs Targeted Population")
|
|
|
|
|
|
|
|
|
273 |
|
274 |
+
# fig, ax_conversion = plt.subplots()
|
275 |
+
# for discount, color in zip(discounts, ['b', 'g', 'y']):
|
276 |
+
# qini_conversions[discount].plot(ax=ax_conversion, x='index', y='S', color=color)
|
277 |
+
# qini_conversions[discount].plot(ax=ax_conversion, x='index', y='Random', color='r', ls='--')
|
278 |
+
|
279 |
+
# ax_conversion.legend([f'{d} model' for d in discounts] + [f'{d} random' for d in discounts], prop={'size': 10})
|
280 |
+
# ax_conversion.set_xlabel('Fraction of Targeted Users')
|
281 |
+
# ax_conversion.set_ylabel('CATE Conversion')
|
282 |
+
# ax_conversion.set_title('CATE Conversion vs Targeted Population')
|
283 |
+
# st.pyplot(fig)
|
284 |
+
|
285 |
+
# Initialize a figure object
|
286 |
+
fig = go.Figure()
|
287 |
+
|
288 |
+
# Define colors for each discount level and the random baseline
|
289 |
+
colors = ['blue', 'green', 'yellow']
|
290 |
+
random_line_dash = 'dash'
|
291 |
+
|
292 |
+
# Iterate over each discount to add its line to the plot
|
293 |
+
for i, discount in enumerate(discounts):
|
294 |
+
# Add the model line
|
295 |
+
fig.add_trace(go.Scatter(x=qini_conversions[discount]['index'],
|
296 |
+
y=qini_conversions[discount]['S'],
|
297 |
+
mode='lines',
|
298 |
+
name=f'{discount} model',
|
299 |
+
line=dict(color=colors[i])))
|
300 |
+
|
301 |
+
# Add the random baseline line
|
302 |
+
fig.add_trace(go.Scatter(x=qini_conversions[discount]['index'],
|
303 |
+
y=qini_conversions[discount]['Random'],
|
304 |
+
mode='lines',
|
305 |
+
name=f'{discount} random',
|
306 |
+
line=dict(color='red', dash=random_line_dash)))
|
307 |
+
|
308 |
+
# Update the layout of the figure
|
309 |
+
fig.update_layout(
|
310 |
+
title='CATE Conversion vs Targeted Population',
|
311 |
+
xaxis_title='Fraction of Targeted Users',
|
312 |
+
yaxis_title='CATE Conversion',
|
313 |
+
legend_title='Legend',
|
314 |
+
legend=dict(
|
315 |
+
x=0,
|
316 |
+
y=1,
|
317 |
+
traceorder='normal',
|
318 |
+
font=dict(
|
319 |
+
size=10,
|
320 |
+
)
|
321 |
+
)
|
322 |
+
)
|
323 |
+
|
324 |
+
# Display the figure in Streamlit
|
325 |
+
st.plotly_chart(fig)
|
326 |
|
327 |
# Plotting CATE Benefit
|
328 |
st.subheader("CATE Benefit vs Targeted Population")
|
|
|
|
|
|
|
|
|
329 |
|
330 |
+
# fig, ax_benefit = plt.subplots()
|
331 |
+
# for discount, color in zip(discounts, ['b', 'g', 'y']):
|
332 |
+
# qini_benefits[discount].plot(ax=ax_benefit, x='index', y='S', color=color)
|
333 |
+
# qini_benefits[discount].plot(ax=ax_benefit, x='index', y='Random', color='r', ls='--')
|
334 |
+
|
335 |
+
# ax_benefit.legend([f'{d} model' for d in discounts] + [f'{d} random' for d in discounts], prop={'size': 10})
|
336 |
+
# ax_benefit.set_xlabel('Fraction of Targeted Users')
|
337 |
+
# ax_benefit.set_ylabel('CATE Benefit')
|
338 |
+
# ax_benefit.set_title('CATE Benefit vs Targeted Population')
|
339 |
+
# st.pyplot(fig)
|
340 |
+
|
341 |
+
|
342 |
+
# Initialize a figure object
|
343 |
+
fig = go.Figure()
|
344 |
+
|
345 |
+
# Define colors for each discount level and the random baseline
|
346 |
+
colors = ['blue', 'green', 'yellow']
|
347 |
+
random_line_dash = 'dash'
|
348 |
+
|
349 |
+
# Iterate over each discount to add its line to the plot
|
350 |
+
for i, discount in enumerate(discounts):
|
351 |
+
# Add the model line
|
352 |
+
fig.add_trace(go.Scatter(x=qini_benefits[discount]['index'],
|
353 |
+
y=qini_benefits[discount]['S'],
|
354 |
+
mode='lines',
|
355 |
+
name=f'{discount} model',
|
356 |
+
line=dict(color=colors[i])))
|
357 |
+
|
358 |
+
# Add the random baseline line
|
359 |
+
fig.add_trace(go.Scatter(x=qini_benefits[discount]['index'],
|
360 |
+
y=qini_benefits[discount]['Random'],
|
361 |
+
mode='lines',
|
362 |
+
name=f'{discount} random',
|
363 |
+
line=dict(color='red', dash=random_line_dash)))
|
364 |
+
|
365 |
+
# Update the layout of the figure
|
366 |
+
fig.update_layout(
|
367 |
+
title='CATE Benefit vs Targeted Population',
|
368 |
+
xaxis_title='Fraction of Targeted Users',
|
369 |
+
yaxis_title='CATE Benefit',
|
370 |
+
legend_title='Legend',
|
371 |
+
legend=dict(
|
372 |
+
x=0,
|
373 |
+
y=1,
|
374 |
+
traceorder='normal',
|
375 |
+
font=dict(
|
376 |
+
size=10,
|
377 |
+
)
|
378 |
+
)
|
379 |
+
)
|
380 |
+
|
381 |
+
# Display the figure in Streamlit
|
382 |
+
st.plotly_chart(fig)
|
383 |
+
|
384 |
|
385 |
# Plotting CATE Benefit vs CATE Conversion
|
386 |
st.subheader("CATE Benefit vs CATE Conversion")
|
387 |
+
|
388 |
+
# fig, ax_comp = plt.subplots()
|
389 |
+
# colors = ['b', 'g', 'y']
|
390 |
+
# for i, discount in enumerate(discounts):
|
391 |
+
# qini_conc_test = pd.concat([qini_conversions[discount][['S']], qini_benefits[discount][['S']]], axis=1)
|
392 |
+
# qini_conc_test.columns = ['cate_conversion', 'cate_benefit']
|
393 |
+
# qini_conc_test.plot(ax=ax_comp, x='cate_conversion', y='cate_benefit', color=colors[i], label=f'{discount} model')
|
394 |
+
|
395 |
+
# st.write('To simplify the comparison, we can plot the CATE Benefit as a function of the CATE conversion.')
|
396 |
+
# st.write('In the last plot for example we can see that there is a region where offering 15% discount to a targeted group of users is more efficient than giving 10% to everyone. We can obtain the same impact in overall conversion uplift while reducing our benefit loss considerably.')
|
397 |
+
|
398 |
+
# ax_comp.legend(prop={'size': 10})
|
399 |
+
# ax_comp.set_xlabel('CATE Conversion')
|
400 |
+
# ax_comp.set_ylabel('CATE Benefit')
|
401 |
+
# ax_comp.set_title('CATE Benefit vs CATE Conversion')
|
402 |
+
# st.pyplot(fig)
|
403 |
+
|
404 |
+
# Initialize a figure object
|
405 |
+
fig = go.Figure()
|
406 |
+
|
407 |
+
# Define colors for each discount level
|
408 |
+
colors = ['blue', 'green', 'yellow']
|
409 |
+
|
410 |
+
# Iterate over each discount to add its scatter plot to the figure
|
411 |
for i, discount in enumerate(discounts):
|
412 |
+
qini_conc_test = pd.concat([qini_conversions[discount]['S'], qini_benefits[discount]['S']], axis=1)
|
413 |
qini_conc_test.columns = ['cate_conversion', 'cate_benefit']
|
414 |
+
|
415 |
+
# Add the scatter plot for each discount level
|
416 |
+
# Adjust marker size with `size` and line width with `line=dict(width=2)`
|
417 |
+
fig.add_trace(go.Scatter(x=qini_conc_test['cate_conversion'],
|
418 |
+
y=qini_conc_test['cate_benefit'],
|
419 |
+
mode='markers+lines',
|
420 |
+
name=f'{discount} model',
|
421 |
+
marker=dict(color=colors[i], size=6), # Adjust marker size here
|
422 |
+
line=dict(width=2))) # Adjust line width here
|
423 |
+
|
424 |
+
# Update the layout of the figure to adjust aspect ratio and margins if needed
|
425 |
+
fig.update_layout(
|
426 |
+
title='CATE Benefit vs CATE Conversion',
|
427 |
+
xaxis_title='CATE Conversion',
|
428 |
+
yaxis_title='CATE Benefit',
|
429 |
+
legend_title='Legend',
|
430 |
+
legend=dict(
|
431 |
+
x=0,
|
432 |
+
y=1,
|
433 |
+
traceorder='normal',
|
434 |
+
font=dict(
|
435 |
+
size=10,
|
436 |
+
)
|
437 |
+
),
|
438 |
+
# Optionally adjust plot and margin size for a "thinner" appearance
|
439 |
+
margin=dict(l=20, r=20, t=50, b=20), # Adjust margins to change plot boundary
|
440 |
+
height=400, # Adjust height for overall "thinness"
|
441 |
+
width=600 # Adjust width as needed
|
442 |
+
)
|
443 |
+
|
444 |
+
# Display the figure in Streamlit
|
445 |
+
st.plotly_chart(fig)
|
446 |
+
|
447 |
+
|
448 |
st.write('To simplify the comparison, we can plot the CATE Benefit as a function of the CATE conversion.')
|
449 |
+
st.write('In the last plot for example, we can see that there is a region where offering a 15% discount to a targeted group of users is more efficient than giving 10% to everyone. We can obtain the same impact on overall conversion uplift while reducing our benefit loss considerably.')
|
450 |
|
|
|
|
|
|
|
|
|
|
|
451 |
|
452 |
else:
|
453 |
+
st.error("Please ensure the model is trained and the dataset is prepared.")
|
requirements.txt
CHANGED
@@ -3,4 +3,4 @@ matplotlib==3.8.3
|
|
3 |
numpy==1.23.5
|
4 |
scikit_learn==1.4.1.post1
|
5 |
streamlit==1.32.2
|
6 |
-
xgboost==2.0.3
|
|
|
3 |
numpy==1.23.5
|
4 |
scikit_learn==1.4.1.post1
|
5 |
streamlit==1.32.2
|
6 |
+
xgboost==2.0.3
|