Spaces:
Sleeping
Sleeping
File size: 7,657 Bytes
5fc7138 2130e8d 5fc7138 2130e8d 5fc7138 2130e8d 5fc7138 2130e8d 5fc7138 2130e8d 5fc7138 2130e8d 5fc7138 2130e8d 5fc7138 2130e8d 5fc7138 2130e8d 5fc7138 2130e8d 5fc7138 2130e8d 5fc7138 2130e8d 5fc7138 2130e8d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random
# Define the Electronics products
electronics_products = [
{"name": "4K Smart TV", "cost": 500, "price": 699},
{"name": "Wireless Headphones", "cost": 100, "price": 139},
{"name": "Gaming Console", "cost": 300, "price": 419},
{"name": "Digital Camera", "cost": 400, "price": 559},
{"name": "Bluetooth Speaker", "cost": 50, "price": 69},
{"name": "Smartwatch", "cost": 150, "price": 209},
{"name": "Laptop", "cost": 600, "price": 839},
{"name": "Tablet", "cost": 200, "price": 279},
{"name": "Drone", "cost": 250, "price": 349},
{"name": "Home Theater System", "cost": 350, "price": 489},
{"name": "E-reader", "cost": 80, "price": 109},
{"name": "Portable Power Bank", "cost": 30, "price": 41},
{"name": "Wireless Earbuds", "cost": 80, "price": 109},
{"name": "Action Camera", "cost": 150, "price": 209},
{"name": "Smart Home Hub", "cost": 70, "price": 97},
{"name": "Gaming Mouse", "cost": 40, "price": 55},
{"name": "External Hard Drive", "cost": 60, "price": 83},
{"name": "Graphic Tablet", "cost": 180, "price": 249},
{"name": "Noise-Canceling Headphones", "cost": 200, "price": 279},
{"name": "Portable Projector", "cost": 300, "price": 419}
]
# Define the RCT variants
variants = ['Control', '5% discount', '10% discount', '15% discount']
discount_rates = [0, 0.05, 0.10, 0.15]
def calculate_purchase_probability(customer, discount, base_prob=0.1):
"""
Calculate the probability of a customer making a purchase based on various factors.
This function considers customer attributes such as age, loyalty, past behavior,
and the applied discount to determine the likelihood of a purchase.
Args:
customer (dict): A dictionary containing customer attributes
discount (float): The discount rate applied (e.g., 0.05 for 5% discount)
base_prob (float): The base probability of purchase (default: 0.1)
Returns:
float: The calculated probability of purchase
"""
prob = base_prob
# Age factor (younger customers more sensitive to discounts)
age_factor = (60 - customer['age']) / 60
prob += 0.02 * age_factor
# Loyalty factor (more loyal customers less sensitive to discounts)
loyalty_factor = (6 - customer['loyalty_level']) / 5
prob += 0.02 * loyalty_factor
# Past behavior factor (customers with more orders more likely to buy, but less sensitive to discounts)
order_factor = min(customer['total_orders'] / 20, 1)
prob += 0.03 * order_factor
# Newsletter subscription factor (subscribed customers more sensitive to discounts)
if customer['newsletter_subscription']:
prob += 0.03
# Browsing device factor (mobile and app users more sensitive to discounts)
if customer['main_browsing_device'] == 'Mobile':
prob += 0.02
elif customer['main_browsing_device'] == 'App':
prob += 0.03
# Average order value factor (higher AOV customers less sensitive to discounts)
aov_factor = min(customer['average_order_value'] / 1000, 1)
prob -= 0.02 * aov_factor
# Gender factor (assume slightly different sensitivity to discounts)
if customer['gender'] == 'Female':
prob += 0.01
elif customer['gender'] == 'Male':
prob -= 0.01
# Preferred payment method factor
if customer['preferred_payment_method'] == 'Credit Card':
prob += 0.02 # Credit card users might be more likely to make impulse purchases
# Adjust probability based on discount with increased sensitivity
discount_sensitivity = 1 + age_factor - loyalty_factor + (0.5 if customer['newsletter_subscription'] else 0)
if discount == 0.05:
prob *= (1 + discount * 3.5 * discount_sensitivity)
elif discount == 0.1:
prob *= (1 + discount * 4.5 * discount_sensitivity)
elif discount == 0.15:
prob *= (1 + discount * 4.3 * discount_sensitivity)
# Add random noise to the probability
noise = np.random.normal(0, 0.02) # Add noise with mean 0 and std dev 0.02
prob = max(0, min(1, prob + noise))
return prob
def simulate_purchase(customer, variant_index, product):
"""
Simulate a purchase based on the customer, variant, and product.
This function determines if a purchase is made, and if so, calculates
the discounted price, cost, and profit.
Args:
customer (dict): A dictionary containing customer attributes
variant_index (int): The index of the variant (discount level)
product (dict): A dictionary containing product information
Returns:
dict or None: A dictionary with purchase details if a purchase is made, None otherwise
"""
discount = discount_rates[variant_index]
prob = calculate_purchase_probability(customer, discount)
if np.random.random() < prob:
# Add some noise to the discounted price
price_noise = np.random.normal(0, product['price'] * 0.05) # 5% noise
discounted_price = max(0, product['price'] * (1 - discount) + price_noise)
# Add some noise to the cost
cost_noise = np.random.normal(0, product['cost'] * 0.03) # 3% noise
adjusted_cost = max(0, product['cost'] + cost_noise)
return {
'customer_id': customer['customer_id'],
'variant': variants[variant_index],
'product': product['name'],
'price': product['price'],
'discounted_price': discounted_price,
'cost': adjusted_cost,
'profit': discounted_price - adjusted_cost
}
return None
def run_rct_simulation(df, experiment_duration=30):
"""
Run a Randomized Control Trial (RCT) simulation.
This function simulates an RCT by assigning customers to different variants
and simulating purchases over the experiment duration.
Args:
df (pandas.DataFrame): The customer data
experiment_duration (int): The duration of the experiment in days (default: 30)
Returns:
tuple: Contains two DataFrames - transactions and variant assignments
"""
# Set random seed for reproducibility
np.random.seed(42)
random.seed(42)
# Set up experiment dates
start_date = datetime(2024, 7, 1)
end_date = start_date + timedelta(days=experiment_duration)
results = []
variant_assignments = []
for _, customer in df.iterrows():
# Add some randomness to variant assignment
if np.random.random() < 0.05: # 5% chance of random assignment
variant_index = np.random.randint(0, 4)
else:
variant_index = np.random.randint(0, 4) # Original random assignment
# Record variant assignment for all eligible customers
variant_assignments.append({
'customer_id': customer['customer_id'],
'variant': variants[variant_index]
})
# Simulate multiple purchase opportunities with varying frequency
num_opportunities = np.random.poisson(experiment_duration / 10)
for _ in range(num_opportunities):
product = random.choice(electronics_products)
purchase = simulate_purchase(customer, variant_index, product)
if purchase:
results.append(purchase)
# Create DataFrame from results
transactions_df = pd.DataFrame(results)
transactions_df['purchase'] = 1
# Create DataFrame from variant assignments
variant_assignments_df = pd.DataFrame(variant_assignments)
return transactions_df, variant_assignments_df
|