Spaces:
Sleeping
Sleeping
File size: 3,504 Bytes
5fc7138 2130e8d 5fc7138 2130e8d 5fc7138 2130e8d 5fc7138 2130e8d 5fc7138 2130e8d 5fc7138 2130e8d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import pandas as pd
import matplotlib.pyplot as plt
def calculate_metrics(df):
"""
Calculate key metrics from the RCT results.
This function computes various metrics such as total customers, purchases,
revenue, profit, conversion rate, and average order value.
Args:
df (pandas.DataFrame): The DataFrame containing RCT results
Returns:
pandas.Series: A series containing calculated metrics
"""
total_customers = len(df['customer_id'].unique())
total_purchases = len(df)
total_revenue = df['discounted_price'].sum()
total_profit = df['profit'].sum()
conversion_rate = total_purchases / total_customers
average_order_value = total_revenue / total_purchases if total_purchases > 0 else 0
return pd.Series({
'Total Converted Customers': total_customers,
'Total Purchases': total_purchases,
'Total Revenue': total_revenue,
'Total Profit': total_profit,
'Conversion Rate': conversion_rate,
'Average Order Value': average_order_value
})
def analyze_rct_results(transactions_df, variant_assignments_df):
"""
Analyze the results of the Randomized Control Trial (RCT).
This function calculates overall metrics, metrics per variant, and creates
visualizations to compare the performance of different discount levels.
Args:
transactions_df (pandas.DataFrame): DataFrame containing transaction data
variant_assignments_df (pandas.DataFrame): DataFrame containing variant assignments
Returns:
tuple: Contains overall metrics DataFrame, variant metrics DataFrame, and a matplotlib Figure
"""
overall_metrics = calculate_metrics(transactions_df)
variant_metrics = transactions_df.groupby('variant').apply(calculate_metrics).reset_index()
# Calculate incremental metrics
control_metrics = variant_metrics[variant_metrics['variant'] == 'Control'].iloc[0]
variant_metrics['Incremental Purchases'] = variant_metrics['Total Purchases'] - control_metrics['Total Purchases']
variant_metrics['Incremental Profit'] = variant_metrics['Total Profit'] - control_metrics['Total Profit']
variant_metrics['Profit per Incremental Purchase'] = variant_metrics['Incremental Profit'] / variant_metrics['Incremental Purchases']
# Prepare overall metrics table
overall_df = pd.DataFrame([overall_metrics])
# Prepare variant metrics table
variant_order = ['Control', '5% discount', '10% discount', '15% discount']
variant_metrics['variant'] = pd.Categorical(variant_metrics['variant'], categories=variant_order, ordered=True)
variant_metrics = variant_metrics.sort_values('variant')
# Create plots
fig, ax1 = plt.subplots(1, 1, figsize=(10, 6))
# Incremental Total Profit vs Incremental Total Purchases
non_control = variant_metrics[variant_metrics['variant'] != 'Control']
ax1.scatter(non_control['Incremental Purchases'], non_control['Incremental Profit'])
for _, row in non_control.iterrows():
ax1.annotate(row['variant'], (row['Incremental Purchases'], row['Incremental Profit']))
ax1.set_xlabel('Incremental Total Purchases')
ax1.set_ylabel('Incremental Total Profit')
ax1.set_title('Incremental Total Profit vs Incremental Total Purchases')
ax1.axhline(y=0, color='r', linestyle='--')
ax1.axvline(x=0, color='r', linestyle='--')
ax1.grid(True, linestyle=':', alpha=0.7)
plt.tight_layout()
return overall_df, variant_metrics, fig
|