File size: 3,504 Bytes
5fc7138
 
 
 
2130e8d
 
 
 
 
 
 
 
 
 
 
 
5fc7138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2130e8d
 
 
 
 
 
 
 
 
 
 
 
 
5fc7138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2130e8d
 
5fc7138
 
 
2130e8d
5fc7138
2130e8d
 
 
 
 
 
 
 
5fc7138
2130e8d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import pandas as pd
import matplotlib.pyplot as plt

def calculate_metrics(df):
    """
    Calculate key metrics from the RCT results.

    This function computes various metrics such as total customers, purchases,
    revenue, profit, conversion rate, and average order value.

    Args:
        df (pandas.DataFrame): The DataFrame containing RCT results

    Returns:
        pandas.Series: A series containing calculated metrics
    """
    total_customers = len(df['customer_id'].unique())
    total_purchases = len(df)
    total_revenue = df['discounted_price'].sum()
    total_profit = df['profit'].sum()
    conversion_rate = total_purchases / total_customers
    average_order_value = total_revenue / total_purchases if total_purchases > 0 else 0

    return pd.Series({
        'Total Converted Customers': total_customers,
        'Total Purchases': total_purchases,
        'Total Revenue': total_revenue,
        'Total Profit': total_profit,
        'Conversion Rate': conversion_rate,
        'Average Order Value': average_order_value
    })

def analyze_rct_results(transactions_df, variant_assignments_df):
    """
    Analyze the results of the Randomized Control Trial (RCT).

    This function calculates overall metrics, metrics per variant, and creates
    visualizations to compare the performance of different discount levels.

    Args:
        transactions_df (pandas.DataFrame): DataFrame containing transaction data
        variant_assignments_df (pandas.DataFrame): DataFrame containing variant assignments

    Returns:
        tuple: Contains overall metrics DataFrame, variant metrics DataFrame, and a matplotlib Figure
    """
    overall_metrics = calculate_metrics(transactions_df)
    variant_metrics = transactions_df.groupby('variant').apply(calculate_metrics).reset_index()

    # Calculate incremental metrics
    control_metrics = variant_metrics[variant_metrics['variant'] == 'Control'].iloc[0]
    variant_metrics['Incremental Purchases'] = variant_metrics['Total Purchases'] - control_metrics['Total Purchases']
    variant_metrics['Incremental Profit'] = variant_metrics['Total Profit'] - control_metrics['Total Profit']
    variant_metrics['Profit per Incremental Purchase'] = variant_metrics['Incremental Profit'] / variant_metrics['Incremental Purchases']

    # Prepare overall metrics table
    overall_df = pd.DataFrame([overall_metrics])

    # Prepare variant metrics table
    variant_order = ['Control', '5% discount', '10% discount', '15% discount']
    variant_metrics['variant'] = pd.Categorical(variant_metrics['variant'], categories=variant_order, ordered=True)
    variant_metrics = variant_metrics.sort_values('variant')

    # Create plots
    fig, ax1 = plt.subplots(1, 1, figsize=(10, 6))

    # Incremental Total Profit vs Incremental Total Purchases
    non_control = variant_metrics[variant_metrics['variant'] != 'Control']
    ax1.scatter(non_control['Incremental Purchases'], non_control['Incremental Profit'])
    for _, row in non_control.iterrows():
        ax1.annotate(row['variant'], (row['Incremental Purchases'], row['Incremental Profit']))
    ax1.set_xlabel('Incremental Total Purchases')
    ax1.set_ylabel('Incremental Total Profit')
    ax1.set_title('Incremental Total Profit vs Incremental Total Purchases')
    ax1.axhline(y=0, color='r', linestyle='--')
    ax1.axvline(x=0, color='r', linestyle='--')
    ax1.grid(True, linestyle=':', alpha=0.7)

    plt.tight_layout()
    return overall_df, variant_metrics, fig