File size: 5,539 Bytes
5fc7138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

# Define the Electronics products
electronics_products = [
    {"name": "4K Smart TV", "cost": 500, "price": 699},
    {"name": "Wireless Headphones", "cost": 100, "price": 139},
    {"name": "Gaming Console", "cost": 300, "price": 419},
    {"name": "Digital Camera", "cost": 400, "price": 559},
    {"name": "Bluetooth Speaker", "cost": 50, "price": 69},
    {"name": "Smartwatch", "cost": 150, "price": 209},
    {"name": "Laptop", "cost": 600, "price": 839},
    {"name": "Tablet", "cost": 200, "price": 279},
    {"name": "Drone", "cost": 250, "price": 349},
    {"name": "Home Theater System", "cost": 350, "price": 489},
    {"name": "E-reader", "cost": 80, "price": 109},
    {"name": "Portable Power Bank", "cost": 30, "price": 41},
    {"name": "Wireless Earbuds", "cost": 80, "price": 109},
    {"name": "Action Camera", "cost": 150, "price": 209},
    {"name": "Smart Home Hub", "cost": 70, "price": 97},
    {"name": "Gaming Mouse", "cost": 40, "price": 55},
    {"name": "External Hard Drive", "cost": 60, "price": 83},
    {"name": "Graphic Tablet", "cost": 180, "price": 249},
    {"name": "Noise-Canceling Headphones", "cost": 200, "price": 279},
    {"name": "Portable Projector", "cost": 300, "price": 419}
]

# Define the RCT variants
variants = ['Control', '5% discount', '10% discount', '15% discount']
discount_rates = [0, 0.05, 0.10, 0.15]

# Function to calculate purchase probability with increased feature dependency
def calculate_purchase_probability(customer, discount, base_prob=0.1):
    prob = base_prob

    # Age factor (younger customers more sensitive to discounts)
    age_factor = (60 - customer['age']) / 60
    prob += 0.02 * age_factor

    # Loyalty factor (more loyal customers less sensitive to discounts)
    loyalty_factor = (6 - customer['loyalty_level']) / 5
    prob += 0.02 * loyalty_factor

    # Past behavior factor (customers with more orders more likely to buy, but less sensitive to discounts)
    order_factor = min(customer['total_orders'] / 20, 1)
    prob += 0.03 * order_factor

    # Newsletter subscription factor (subscribed customers more sensitive to discounts)
    if customer['newsletter_subscription']:
        prob += 0.03

    # Browsing device factor (mobile and app users more sensitive to discounts)
    if customer['main_browsing_device'] == 'Mobile':
        prob += 0.02
    elif customer['main_browsing_device'] == 'App':
        prob += 0.03

    # Average order value factor (higher AOV customers less sensitive to discounts)
    aov_factor = min(customer['average_order_value'] / 1000, 1)
    prob -= 0.02 * aov_factor

    # Gender factor (assume slightly different sensitivity to discounts)
    if customer['gender'] == 'Female':
        prob += 0.01
    elif customer['gender'] == 'Male':
        prob -= 0.01

    # Preferred payment method factor
    if customer['preferred_payment_method'] == 'Credit Card':
        prob += 0.02  # Credit card users might be more likely to make impulse purchases

    # Adjust probability based on discount with increased sensitivity
    discount_sensitivity = 1 + age_factor - loyalty_factor + (0.5 if customer['newsletter_subscription'] else 0)
    if discount == 0.05:
      prob *= (1 + discount * 3.5 * discount_sensitivity)
    elif discount == 0.1:
      prob *= (1 + discount * 4.5 * discount_sensitivity)
    elif discount == 0.15:
      prob *= (1 + discount * 4.3 * discount_sensitivity)

    return min(max(prob, 0), 1)  # Ensure probability is between 0 and 1

# Function to simulate purchases
def simulate_purchase(customer, variant_index, product):
    discount = discount_rates[variant_index]
    prob = calculate_purchase_probability(customer, discount)

    if np.random.random() < prob:
        discounted_price = product['price'] * (1 - discount)
        return {
            'customer_id': customer['customer_id'],
            'variant': variants[variant_index],
            'product': product['name'],
            'price': product['price'],
            'discounted_price': discounted_price,
            'cost': product['cost'],
            'profit': discounted_price - product['cost']
        }
    return None

def run_rct_simulation(df, experiment_duration=30):
    # Set random seed for reproducibility
    np.random.seed(42)
    random.seed(42)

    # Set up experiment dates
    start_date = datetime(2024, 7, 1)
    end_date = start_date + timedelta(days=experiment_duration)

    results = []
    variant_assignments = []  # New list to store variant assignments

    for _, customer in df.iterrows():
        variant_index = np.random.randint(0, 4)  # Randomly assign variant
            
        # Record variant assignment for all eligible customers
        variant_assignments.append({
            'customer_id': customer['customer_id'],
            'variant': variants[variant_index]
        })

        # Simulate multiple purchase opportunities
        for _ in range(round(experiment_duration/10)):
            product = random.choice(electronics_products)
            purchase = simulate_purchase(customer, variant_index, product)
            if purchase:
                results.append(purchase)

    # Create DataFrame from results
    transactions_df = pd.DataFrame(results)
    transactions_df['purchase'] = 1
    
    # Create DataFrame from variant assignments
    variant_assignments_df = pd.DataFrame(variant_assignments)
    
    return transactions_df, variant_assignments_df