File size: 7,657 Bytes
5fc7138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2130e8d
 
 
 
 
 
 
 
 
 
 
 
 
 
5fc7138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2130e8d
5fc7138
2130e8d
5fc7138
2130e8d
5fc7138
2130e8d
 
 
 
 
5fc7138
 
2130e8d
 
 
 
 
 
 
 
 
 
 
 
 
 
5fc7138
 
 
 
2130e8d
 
 
 
 
 
 
 
5fc7138
 
 
 
 
 
2130e8d
 
5fc7138
 
 
 
2130e8d
 
 
 
 
 
 
 
 
 
 
 
 
5fc7138
 
 
 
 
 
 
 
 
2130e8d
5fc7138
 
2130e8d
 
 
 
 
5fc7138
 
 
 
 
 
 
2130e8d
 
 
5fc7138
 
 
 
 
 
 
 
 
 
 
 
2130e8d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

# Define the Electronics products
electronics_products = [
    {"name": "4K Smart TV", "cost": 500, "price": 699},
    {"name": "Wireless Headphones", "cost": 100, "price": 139},
    {"name": "Gaming Console", "cost": 300, "price": 419},
    {"name": "Digital Camera", "cost": 400, "price": 559},
    {"name": "Bluetooth Speaker", "cost": 50, "price": 69},
    {"name": "Smartwatch", "cost": 150, "price": 209},
    {"name": "Laptop", "cost": 600, "price": 839},
    {"name": "Tablet", "cost": 200, "price": 279},
    {"name": "Drone", "cost": 250, "price": 349},
    {"name": "Home Theater System", "cost": 350, "price": 489},
    {"name": "E-reader", "cost": 80, "price": 109},
    {"name": "Portable Power Bank", "cost": 30, "price": 41},
    {"name": "Wireless Earbuds", "cost": 80, "price": 109},
    {"name": "Action Camera", "cost": 150, "price": 209},
    {"name": "Smart Home Hub", "cost": 70, "price": 97},
    {"name": "Gaming Mouse", "cost": 40, "price": 55},
    {"name": "External Hard Drive", "cost": 60, "price": 83},
    {"name": "Graphic Tablet", "cost": 180, "price": 249},
    {"name": "Noise-Canceling Headphones", "cost": 200, "price": 279},
    {"name": "Portable Projector", "cost": 300, "price": 419}
]

# Define the RCT variants
variants = ['Control', '5% discount', '10% discount', '15% discount']
discount_rates = [0, 0.05, 0.10, 0.15]

def calculate_purchase_probability(customer, discount, base_prob=0.1):
    """
    Calculate the probability of a customer making a purchase based on various factors.

    This function considers customer attributes such as age, loyalty, past behavior,
    and the applied discount to determine the likelihood of a purchase.

    Args:
        customer (dict): A dictionary containing customer attributes
        discount (float): The discount rate applied (e.g., 0.05 for 5% discount)
        base_prob (float): The base probability of purchase (default: 0.1)

    Returns:
        float: The calculated probability of purchase
    """
    prob = base_prob

    # Age factor (younger customers more sensitive to discounts)
    age_factor = (60 - customer['age']) / 60
    prob += 0.02 * age_factor

    # Loyalty factor (more loyal customers less sensitive to discounts)
    loyalty_factor = (6 - customer['loyalty_level']) / 5
    prob += 0.02 * loyalty_factor

    # Past behavior factor (customers with more orders more likely to buy, but less sensitive to discounts)
    order_factor = min(customer['total_orders'] / 20, 1)
    prob += 0.03 * order_factor

    # Newsletter subscription factor (subscribed customers more sensitive to discounts)
    if customer['newsletter_subscription']:
        prob += 0.03

    # Browsing device factor (mobile and app users more sensitive to discounts)
    if customer['main_browsing_device'] == 'Mobile':
        prob += 0.02
    elif customer['main_browsing_device'] == 'App':
        prob += 0.03

    # Average order value factor (higher AOV customers less sensitive to discounts)
    aov_factor = min(customer['average_order_value'] / 1000, 1)
    prob -= 0.02 * aov_factor

    # Gender factor (assume slightly different sensitivity to discounts)
    if customer['gender'] == 'Female':
        prob += 0.01
    elif customer['gender'] == 'Male':
        prob -= 0.01

    # Preferred payment method factor
    if customer['preferred_payment_method'] == 'Credit Card':
        prob += 0.02  # Credit card users might be more likely to make impulse purchases

    # Adjust probability based on discount with increased sensitivity
    discount_sensitivity = 1 + age_factor - loyalty_factor + (0.5 if customer['newsletter_subscription'] else 0)
    if discount == 0.05:
        prob *= (1 + discount * 3.5 * discount_sensitivity)
    elif discount == 0.1:
        prob *= (1 + discount * 4.5 * discount_sensitivity)
    elif discount == 0.15:
        prob *= (1 + discount * 4.3 * discount_sensitivity)

    # Add random noise to the probability
    noise = np.random.normal(0, 0.02)  # Add noise with mean 0 and std dev 0.02
    prob = max(0, min(1, prob + noise))

    return prob

def simulate_purchase(customer, variant_index, product):
    """
    Simulate a purchase based on the customer, variant, and product.

    This function determines if a purchase is made, and if so, calculates
    the discounted price, cost, and profit.

    Args:
        customer (dict): A dictionary containing customer attributes
        variant_index (int): The index of the variant (discount level)
        product (dict): A dictionary containing product information

    Returns:
        dict or None: A dictionary with purchase details if a purchase is made, None otherwise
    """
    discount = discount_rates[variant_index]
    prob = calculate_purchase_probability(customer, discount)

    if np.random.random() < prob:
        # Add some noise to the discounted price
        price_noise = np.random.normal(0, product['price'] * 0.05)  # 5% noise
        discounted_price = max(0, product['price'] * (1 - discount) + price_noise)
        
        # Add some noise to the cost
        cost_noise = np.random.normal(0, product['cost'] * 0.03)  # 3% noise
        adjusted_cost = max(0, product['cost'] + cost_noise)

        return {
            'customer_id': customer['customer_id'],
            'variant': variants[variant_index],
            'product': product['name'],
            'price': product['price'],
            'discounted_price': discounted_price,
            'cost': adjusted_cost,
            'profit': discounted_price - adjusted_cost
        }
    return None

def run_rct_simulation(df, experiment_duration=30):
    """
    Run a Randomized Control Trial (RCT) simulation.

    This function simulates an RCT by assigning customers to different variants
    and simulating purchases over the experiment duration.

    Args:
        df (pandas.DataFrame): The customer data
        experiment_duration (int): The duration of the experiment in days (default: 30)

    Returns:
        tuple: Contains two DataFrames - transactions and variant assignments
    """
    # Set random seed for reproducibility
    np.random.seed(42)
    random.seed(42)

    # Set up experiment dates
    start_date = datetime(2024, 7, 1)
    end_date = start_date + timedelta(days=experiment_duration)

    results = []
    variant_assignments = []

    for _, customer in df.iterrows():
        # Add some randomness to variant assignment
        if np.random.random() < 0.05:  # 5% chance of random assignment
            variant_index = np.random.randint(0, 4)
        else:
            variant_index = np.random.randint(0, 4)  # Original random assignment
            
        # Record variant assignment for all eligible customers
        variant_assignments.append({
            'customer_id': customer['customer_id'],
            'variant': variants[variant_index]
        })

        # Simulate multiple purchase opportunities with varying frequency
        num_opportunities = np.random.poisson(experiment_duration / 10)
        for _ in range(num_opportunities):
            product = random.choice(electronics_products)
            purchase = simulate_purchase(customer, variant_index, product)
            if purchase:
                results.append(purchase)

    # Create DataFrame from results
    transactions_df = pd.DataFrame(results)
    transactions_df['purchase'] = 1
    
    # Create DataFrame from variant assignments
    variant_assignments_df = pd.DataFrame(variant_assignments)
    
    return transactions_df, variant_assignments_df