Spaces:

neuronslabs
/

uplift_modeling

Sleeping

App Files Files Community

howardroark commited on Mar 21, 2024

Commit

dcbe801

1 Parent(s): 45da72a

charts update

Browse files

Files changed (2) hide show

app.py +207 -45
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -5,6 +5,9 @@ from models_utils.ml_models import ModelTraining
 from eval_utils.evaluation import ModelEvaluator
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import streamlit as st
@@ -104,9 +107,12 @@ if tabs == "Exploratory analysis":
         st.write('We can also visualize the tradeoff between conversions and platform benefit by plotting the mean benefit per user on the y-axis and the mean conversion rate on the x-axis, for each treatment group.')
         mean_benefit_vs_conversion = eda.compute_mean_benefit_vs_conversion()
-        fig, ax = plt.subplots()
-        mean_benefit_vs_conversion.plot.scatter(x='conversion', y='benefit', c='DarkBlue', s=50, ax=ax)
-        st.pyplot(fig)
         st.write('''
                  We further compute the Average Treatment Effect (ATE) for both the mean conversion rate and the mean benefit per user:
@@ -116,9 +122,12 @@ if tabs == "Exploratory analysis":
                  ''')
         mean_conversions_ate = eda.compute_ate()
-        fig, ax = plt.subplots()
-        mean_conversions_ate.plot.scatter(x='conversion', y='benefit', c='DarkBlue', s=50, ax=ax)
-        st.pyplot(fig)
         st.subheader('Feature importance')
@@ -131,10 +140,13 @@ if tabs == "Exploratory analysis":
         feature_importance = FeatureImportance(uplift_sim.df, X_names, y_name = 'conversion', treatment_group = treatment_group)
         fi = feature_importance.compute_feature_importance()
-        fig, ax = plt.subplots()
         di_df_sorted = fi.sort_values(by='score', ascending=False)
-        di_df_sorted[['feature', 'score']].plot.barh(x='feature', y='score', ax=ax)
-        st.pyplot(fig)
         st.write("""
                     - AccountLifetimeIndex: Longer-standing accounts are key predictors of customer response to promotions \n
@@ -194,13 +206,26 @@ if tabs == "Model training":
             st.subheader('Feature Importances')
             fig, ax = plt.subplots()
             for k, v in feature_importances.items():
-                st.write(f"Feature importance for {k}")
-                v.plot(kind='barh', ax=ax)
-                ax.set_xlabel("Importance")
-                ax.set_ylabel("Feature")
-                ax.set_title(f"Feature Importance for {model_type}")
-                st.pyplot(fig)
     else:
         st.error("Please generate and preprocess the dataset first.")
@@ -245,47 +270,184 @@ if tabs == "Economic effects":
         # Plotting CATE Conversion
         st.subheader("CATE Conversion vs Targeted Population")
-        fig, ax_conversion = plt.subplots()
-        for discount, color in zip(discounts, ['b', 'g', 'y']):
-            qini_conversions[discount].plot(ax=ax_conversion, x='index', y='S', color=color)
-            qini_conversions[discount].plot(ax=ax_conversion, x='index', y='Random', color='r', ls='--')
-        ax_conversion.legend([f'{d} model' for d in discounts] + [f'{d} random' for d in discounts], prop={'size': 10})
-        ax_conversion.set_xlabel('Fraction of Targeted Users')
-        ax_conversion.set_ylabel('CATE Conversion')
-        ax_conversion.set_title('CATE Conversion vs Targeted Population')
-        st.pyplot(fig)
         # Plotting CATE Benefit
         st.subheader("CATE Benefit vs Targeted Population")
-        fig, ax_benefit = plt.subplots()
-        for discount, color in zip(discounts, ['b', 'g', 'y']):
-            qini_benefits[discount].plot(ax=ax_benefit, x='index', y='S', color=color)
-            qini_benefits[discount].plot(ax=ax_benefit, x='index', y='Random', color='r', ls='--')
-        ax_benefit.legend([f'{d} model' for d in discounts] + [f'{d} random' for d in discounts], prop={'size': 10})
-        ax_benefit.set_xlabel('Fraction of Targeted Users')
-        ax_benefit.set_ylabel('CATE Benefit')
-        ax_benefit.set_title('CATE Benefit vs Targeted Population')
-        st.pyplot(fig)
         # Plotting CATE Benefit vs CATE Conversion
         st.subheader("CATE Benefit vs CATE Conversion")
-        fig, ax_comp = plt.subplots()
-        colors = ['b', 'g', 'y']
         for i, discount in enumerate(discounts):
-            qini_conc_test = pd.concat([qini_conversions[discount][['S']], qini_benefits[discount][['S']]], axis=1)
             qini_conc_test.columns = ['cate_conversion', 'cate_benefit']
-            qini_conc_test.plot(ax=ax_comp, x='cate_conversion', y='cate_benefit', color=colors[i], label=f'{discount} model')
         st.write('To simplify the comparison, we can plot the CATE Benefit as a function of the CATE conversion.')
-        st.write('In the last plot for example we can see that there is a region where offering 15% discount to a targeted group of users is more efficient than giving 10% to everyone. We can obtain the same impact in overall conversion uplift while reducing our benefit loss considerably.')
-        ax_comp.legend(prop={'size': 10})
-        ax_comp.set_xlabel('CATE Conversion')
-        ax_comp.set_ylabel('CATE Benefit')
-        ax_comp.set_title('CATE Benefit vs CATE Conversion')
-        st.pyplot(fig)
     else:
-        st.error("Please ensure the model is trained and the dataset is prepared.")

 from eval_utils.evaluation import ModelEvaluator
 import matplotlib.pyplot as plt
+import plotly.express as px
+import plotly.graph_objects as go
 import numpy as np
 import pandas as pd
 import streamlit as st
         st.write('We can also visualize the tradeoff between conversions and platform benefit by plotting the mean benefit per user on the y-axis and the mean conversion rate on the x-axis, for each treatment group.')
         mean_benefit_vs_conversion = eda.compute_mean_benefit_vs_conversion()
+        # fig, ax = plt.subplots()
+        # mean_benefit_vs_conversion.plot.scatter(x='conversion', y='benefit', c='DarkBlue', s=50, ax=ax)
+        # st.pyplot(fig)
+        fig = px.scatter(mean_benefit_vs_conversion, x='conversion', y='benefit', color_discrete_sequence=['LightBlue'], size_max=50)
+        st.plotly_chart(fig)
         st.write('''
                  We further compute the Average Treatment Effect (ATE) for both the mean conversion rate and the mean benefit per user:
                  ''')
         mean_conversions_ate = eda.compute_ate()
+        # fig, ax = plt.subplots()
+        # mean_conversions_ate.plot.scatter(x='conversion', y='benefit', c='DarkBlue', s=50, ax=ax)
+        # st.pyplot(fig)
+        fig = px.scatter(mean_conversions_ate, x='conversion', y='benefit', color_discrete_sequence=['LightBlue'], size_max=50)
+        st.plotly_chart(fig)
         st.subheader('Feature importance')
         feature_importance = FeatureImportance(uplift_sim.df, X_names, y_name = 'conversion', treatment_group = treatment_group)
         fi = feature_importance.compute_feature_importance()
+        # fig, ax = plt.subplots()
         di_df_sorted = fi.sort_values(by='score', ascending=False)
+        # di_df_sorted[['feature', 'score']].plot.barh(x='feature', y='score', ax=ax)
+        # st.pyplot(fig)
+        fig = px.bar(di_df_sorted, y='feature', x='score', orientation='h')
+        st.plotly_chart(fig)
         st.write("""
                     - AccountLifetimeIndex: Longer-standing accounts are key predictors of customer response to promotions \n
             st.subheader('Feature Importances')
             fig, ax = plt.subplots()
+            # for k, v in feature_importances.items():
+            #     st.write(f"Feature importance for {k}")
+            #     v.plot(kind='barh', ax=ax)
+            #     ax.set_xlabel("Importance")
+            #     ax.set_ylabel("Feature")
+            #     ax.set_title(f"Feature Importance for {model_type}")
+            #     st.pyplot(fig)
             for k, v in feature_importances.items():
+                # Reset index if 'v' is a Series or its index contains the feature names
+                if isinstance(v, pd.Series) or 'feature' not in v.columns:
+                    v = v.reset_index()
+                    v.columns = ['feature', 'score']  # Adjust column names accordingly
+                # Assuming 'v' now has columns ['feature', 'score']
+                fig = px.bar(v, y='feature', x='score', orientation='h',
+                            title=f"Feature Importance for {model_type} ({k})",
+                            labels={'score': 'Importance', 'feature': 'Feature'})
+                fig.update_layout(yaxis={'categoryorder':'total ascending'})  # Optional: This sorts the bars
+                st.plotly_chart(fig)
     else:
         st.error("Please generate and preprocess the dataset first.")
         # Plotting CATE Conversion
         st.subheader("CATE Conversion vs Targeted Population")
+        # fig, ax_conversion = plt.subplots()
+        # for discount, color in zip(discounts, ['b', 'g', 'y']):
+        #     qini_conversions[discount].plot(ax=ax_conversion, x='index', y='S', color=color)
+        #     qini_conversions[discount].plot(ax=ax_conversion, x='index', y='Random', color='r', ls='--')
+        # ax_conversion.legend([f'{d} model' for d in discounts] + [f'{d} random' for d in discounts], prop={'size': 10})
+        # ax_conversion.set_xlabel('Fraction of Targeted Users')
+        # ax_conversion.set_ylabel('CATE Conversion')
+        # ax_conversion.set_title('CATE Conversion vs Targeted Population')
+        # st.pyplot(fig)
+        # Initialize a figure object
+        fig = go.Figure()
+        # Define colors for each discount level and the random baseline
+        colors = ['blue', 'green', 'yellow']
+        random_line_dash = 'dash'
+        # Iterate over each discount to add its line to the plot
+        for i, discount in enumerate(discounts):
+            # Add the model line
+            fig.add_trace(go.Scatter(x=qini_conversions[discount]['index'],
+                                    y=qini_conversions[discount]['S'],
+                                    mode='lines',
+                                    name=f'{discount} model',
+                                    line=dict(color=colors[i])))
+            # Add the random baseline line
+            fig.add_trace(go.Scatter(x=qini_conversions[discount]['index'],
+                                    y=qini_conversions[discount]['Random'],
+                                    mode='lines',
+                                    name=f'{discount} random',
+                                    line=dict(color='red', dash=random_line_dash)))
+        # Update the layout of the figure
+        fig.update_layout(
+            title='CATE Conversion vs Targeted Population',
+            xaxis_title='Fraction of Targeted Users',
+            yaxis_title='CATE Conversion',
+            legend_title='Legend',
+            legend=dict(
+                x=0,
+                y=1,
+                traceorder='normal',
+                font=dict(
+                    size=10,
+                )
+            )
+        )
+        # Display the figure in Streamlit
+        st.plotly_chart(fig)
         # Plotting CATE Benefit
         st.subheader("CATE Benefit vs Targeted Population")
+        # fig, ax_benefit = plt.subplots()
+        # for discount, color in zip(discounts, ['b', 'g', 'y']):
+        #     qini_benefits[discount].plot(ax=ax_benefit, x='index', y='S', color=color)
+        #     qini_benefits[discount].plot(ax=ax_benefit, x='index', y='Random', color='r', ls='--')
+        # ax_benefit.legend([f'{d} model' for d in discounts] + [f'{d} random' for d in discounts], prop={'size': 10})
+        # ax_benefit.set_xlabel('Fraction of Targeted Users')
+        # ax_benefit.set_ylabel('CATE Benefit')
+        # ax_benefit.set_title('CATE Benefit vs Targeted Population')
+        # st.pyplot(fig)
+        # Initialize a figure object
+        fig = go.Figure()
+        # Define colors for each discount level and the random baseline
+        colors = ['blue', 'green', 'yellow']
+        random_line_dash = 'dash'
+        # Iterate over each discount to add its line to the plot
+        for i, discount in enumerate(discounts):
+            # Add the model line
+            fig.add_trace(go.Scatter(x=qini_benefits[discount]['index'],
+                                    y=qini_benefits[discount]['S'],
+                                    mode='lines',
+                                    name=f'{discount} model',
+                                    line=dict(color=colors[i])))
+            # Add the random baseline line
+            fig.add_trace(go.Scatter(x=qini_benefits[discount]['index'],
+                                    y=qini_benefits[discount]['Random'],
+                                    mode='lines',
+                                    name=f'{discount} random',
+                                    line=dict(color='red', dash=random_line_dash)))
+        # Update the layout of the figure
+        fig.update_layout(
+            title='CATE Benefit vs Targeted Population',
+            xaxis_title='Fraction of Targeted Users',
+            yaxis_title='CATE Benefit',
+            legend_title='Legend',
+            legend=dict(
+                x=0,
+                y=1,
+                traceorder='normal',
+                font=dict(
+                    size=10,
+                )
+            )
+        )
+        # Display the figure in Streamlit
+        st.plotly_chart(fig)
         # Plotting CATE Benefit vs CATE Conversion
         st.subheader("CATE Benefit vs CATE Conversion")
+        # fig, ax_comp = plt.subplots()
+        # colors = ['b', 'g', 'y']
+        # for i, discount in enumerate(discounts):
+        #     qini_conc_test = pd.concat([qini_conversions[discount][['S']], qini_benefits[discount][['S']]], axis=1)
+        #     qini_conc_test.columns = ['cate_conversion', 'cate_benefit']
+        #     qini_conc_test.plot(ax=ax_comp, x='cate_conversion', y='cate_benefit', color=colors[i], label=f'{discount} model')
+        # st.write('To simplify the comparison, we can plot the CATE Benefit as a function of the CATE conversion.')
+        # st.write('In the last plot for example we can see that there is a region where offering 15% discount to a targeted group of users is more efficient than giving 10% to everyone. We can obtain the same impact in overall conversion uplift while reducing our benefit loss considerably.')
+        # ax_comp.legend(prop={'size': 10})
+        # ax_comp.set_xlabel('CATE Conversion')
+        # ax_comp.set_ylabel('CATE Benefit')
+        # ax_comp.set_title('CATE Benefit vs CATE Conversion')
+        # st.pyplot(fig)
+        # Initialize a figure object
+        fig = go.Figure()
+        # Define colors for each discount level
+        colors = ['blue', 'green', 'yellow']
+        # Iterate over each discount to add its scatter plot to the figure
         for i, discount in enumerate(discounts):
+            qini_conc_test = pd.concat([qini_conversions[discount]['S'], qini_benefits[discount]['S']], axis=1)
             qini_conc_test.columns = ['cate_conversion', 'cate_benefit']
+            # Add the scatter plot for each discount level
+            # Adjust marker size with `size` and line width with `line=dict(width=2)`
+            fig.add_trace(go.Scatter(x=qini_conc_test['cate_conversion'],
+                                    y=qini_conc_test['cate_benefit'],
+                                    mode='markers+lines',
+                                    name=f'{discount} model',
+                                    marker=dict(color=colors[i], size=6),  # Adjust marker size here
+                                    line=dict(width=2)))  # Adjust line width here
+        # Update the layout of the figure to adjust aspect ratio and margins if needed
+        fig.update_layout(
+            title='CATE Benefit vs CATE Conversion',
+            xaxis_title='CATE Conversion',
+            yaxis_title='CATE Benefit',
+            legend_title='Legend',
+            legend=dict(
+                x=0,
+                y=1,
+                traceorder='normal',
+                font=dict(
+                    size=10,
+                )
+            ),
+            # Optionally adjust plot and margin size for a "thinner" appearance
+            margin=dict(l=20, r=20, t=50, b=20),  # Adjust margins to change plot boundary
+            height=400,  # Adjust height for overall "thinness"
+            width=600   # Adjust width as needed
+        )
+        # Display the figure in Streamlit
+        st.plotly_chart(fig)
         st.write('To simplify the comparison, we can plot the CATE Benefit as a function of the CATE conversion.')
+        st.write('In the last plot for example, we can see that there is a region where offering a 15% discount to a targeted group of users is more efficient than giving 10% to everyone. We can obtain the same impact on overall conversion uplift while reducing our benefit loss considerably.')
     else:
+        st.error("Please ensure the model is trained and the dataset is prepared.")

requirements.txt CHANGED Viewed

@@ -3,4 +3,4 @@ matplotlib==3.8.3
 numpy==1.23.5
 scikit_learn==1.4.1.post1
 streamlit==1.32.2
-xgboost==2.0.3

 numpy==1.23.5
 scikit_learn==1.4.1.post1
 streamlit==1.32.2
+xgboost==2.0.3