import pandas as pd import matplotlib.pyplot as plt def calculate_metrics(df): total_customers = len(df['customer_id'].unique()) total_purchases = len(df) total_revenue = df['discounted_price'].sum() total_profit = df['profit'].sum() conversion_rate = total_purchases / total_customers average_order_value = total_revenue / total_purchases if total_purchases > 0 else 0 return pd.Series({ 'Total Converted Customers': total_customers, 'Total Purchases': total_purchases, 'Total Revenue': total_revenue, 'Total Profit': total_profit, 'Conversion Rate': conversion_rate, 'Average Order Value': average_order_value }) def analyze_rct_results(transactions_df, variant_assignments_df): overall_metrics = calculate_metrics(transactions_df) variant_metrics = transactions_df.groupby('variant').apply(calculate_metrics).reset_index() # Calculate incremental metrics control_metrics = variant_metrics[variant_metrics['variant'] == 'Control'].iloc[0] variant_metrics['Incremental Purchases'] = variant_metrics['Total Purchases'] - control_metrics['Total Purchases'] variant_metrics['Incremental Profit'] = variant_metrics['Total Profit'] - control_metrics['Total Profit'] variant_metrics['Profit per Incremental Purchase'] = variant_metrics['Incremental Profit'] / variant_metrics['Incremental Purchases'] # Prepare overall metrics table overall_df = pd.DataFrame([overall_metrics]) # Prepare variant metrics table variant_order = ['Control', '5% discount', '10% discount', '15% discount'] variant_metrics['variant'] = pd.Categorical(variant_metrics['variant'], categories=variant_order, ordered=True) variant_metrics = variant_metrics.sort_values('variant') # # Create plots # fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 12)) # # Create plots fig, ax2 = plt.subplots(1, 1, figsize=(10, 6)) # # Incremental Total Profit vs Incremental Total Purchases non_control = variant_metrics[variant_metrics['variant'] != 'Control'] # ax1.scatter(non_control['Incremental Purchases'], non_control['Incremental Profit']) # for _, row in non_control.iterrows(): # ax1.annotate(row['variant'], (row['Incremental Purchases'], row['Incremental Profit'])) # ax1.set_xlabel('Incremental Total Purchases') # ax1.set_ylabel('Incremental Total Profit') # ax1.set_title('Incremental Total Profit vs Incremental Total Purchases') # ax1.axhline(y=0, color='r', linestyle='--') # ax1.axvline(x=0, color='r', linestyle='--') # ax1.grid(True, linestyle=':', alpha=0.7) # # Incremental Total Profit per Incremental Purchases vs Incremental Total Purchases ax2.scatter(non_control['Incremental Purchases'], non_control['Profit per Incremental Purchase']) for _, row in non_control.iterrows(): ax2.annotate(row['variant'], (row['Incremental Purchases'], row['Profit per Incremental Purchase'])) ax2.set_xlabel('Incremental Total Purchases') ax2.set_ylabel('Incremental Total Profit per Incremental Purchase') ax2.set_title('Incremental Profit per Purchase vs Incremental Purchases') ax2.axhline(y=0, color='r', linestyle='--') ax2.axvline(x=0, color='r', linestyle='--') ax2.grid(True, linestyle=':', alpha=0.7) plt.tight_layout() return overall_df, variant_metrics, fig