Spaces:

neuronslabs
/

comfyCausalAI

Sleeping

App Files Files Community

comfyCausalAI / rct_simulator.py

rknl

updated

2130e8d verified about 1 year ago

raw

history blame contribute delete

7.66 kB

	import pandas as pd
	import numpy as np
	from datetime import datetime, timedelta
	import random

	# Define the Electronics products
	electronics_products = [
	{"name": "4K Smart TV", "cost": 500, "price": 699},
	{"name": "Wireless Headphones", "cost": 100, "price": 139},
	{"name": "Gaming Console", "cost": 300, "price": 419},
	{"name": "Digital Camera", "cost": 400, "price": 559},
	{"name": "Bluetooth Speaker", "cost": 50, "price": 69},
	{"name": "Smartwatch", "cost": 150, "price": 209},
	{"name": "Laptop", "cost": 600, "price": 839},
	{"name": "Tablet", "cost": 200, "price": 279},
	{"name": "Drone", "cost": 250, "price": 349},
	{"name": "Home Theater System", "cost": 350, "price": 489},
	{"name": "E-reader", "cost": 80, "price": 109},
	{"name": "Portable Power Bank", "cost": 30, "price": 41},
	{"name": "Wireless Earbuds", "cost": 80, "price": 109},
	{"name": "Action Camera", "cost": 150, "price": 209},
	{"name": "Smart Home Hub", "cost": 70, "price": 97},
	{"name": "Gaming Mouse", "cost": 40, "price": 55},
	{"name": "External Hard Drive", "cost": 60, "price": 83},
	{"name": "Graphic Tablet", "cost": 180, "price": 249},
	{"name": "Noise-Canceling Headphones", "cost": 200, "price": 279},
	{"name": "Portable Projector", "cost": 300, "price": 419}
	]

	# Define the RCT variants
	variants = ['Control', '5% discount', '10% discount', '15% discount']
	discount_rates = [0, 0.05, 0.10, 0.15]

	def calculate_purchase_probability(customer, discount, base_prob=0.1):
	"""
	Calculate the probability of a customer making a purchase based on various factors.

	This function considers customer attributes such as age, loyalty, past behavior,
	and the applied discount to determine the likelihood of a purchase.

	Args:
	customer (dict): A dictionary containing customer attributes
	discount (float): The discount rate applied (e.g., 0.05 for 5% discount)
	base_prob (float): The base probability of purchase (default: 0.1)

	Returns:
	float: The calculated probability of purchase
	"""
	prob = base_prob

	# Age factor (younger customers more sensitive to discounts)
	age_factor = (60 - customer['age']) / 60
	prob += 0.02 * age_factor

	# Loyalty factor (more loyal customers less sensitive to discounts)
	loyalty_factor = (6 - customer['loyalty_level']) / 5
	prob += 0.02 * loyalty_factor

	# Past behavior factor (customers with more orders more likely to buy, but less sensitive to discounts)
	order_factor = min(customer['total_orders'] / 20, 1)
	prob += 0.03 * order_factor

	# Newsletter subscription factor (subscribed customers more sensitive to discounts)
	if customer['newsletter_subscription']:
	prob += 0.03

	# Browsing device factor (mobile and app users more sensitive to discounts)
	if customer['main_browsing_device'] == 'Mobile':
	prob += 0.02
	elif customer['main_browsing_device'] == 'App':
	prob += 0.03

	# Average order value factor (higher AOV customers less sensitive to discounts)
	aov_factor = min(customer['average_order_value'] / 1000, 1)
	prob -= 0.02 * aov_factor

	# Gender factor (assume slightly different sensitivity to discounts)
	if customer['gender'] == 'Female':
	prob += 0.01
	elif customer['gender'] == 'Male':
	prob -= 0.01

	# Preferred payment method factor
	if customer['preferred_payment_method'] == 'Credit Card':
	prob += 0.02 # Credit card users might be more likely to make impulse purchases

	# Adjust probability based on discount with increased sensitivity
	discount_sensitivity = 1 + age_factor - loyalty_factor + (0.5 if customer['newsletter_subscription'] else 0)
	if discount == 0.05:
	prob = (1 + discount 3.5 * discount_sensitivity)
	elif discount == 0.1:
	prob = (1 + discount 4.5 * discount_sensitivity)
	elif discount == 0.15:
	prob = (1 + discount 4.3 * discount_sensitivity)

	# Add random noise to the probability
	noise = np.random.normal(0, 0.02) # Add noise with mean 0 and std dev 0.02
	prob = max(0, min(1, prob + noise))

	return prob

	def simulate_purchase(customer, variant_index, product):
	"""
	Simulate a purchase based on the customer, variant, and product.

	This function determines if a purchase is made, and if so, calculates
	the discounted price, cost, and profit.

	Args:
	customer (dict): A dictionary containing customer attributes
	variant_index (int): The index of the variant (discount level)
	product (dict): A dictionary containing product information

	Returns:
	dict or None: A dictionary with purchase details if a purchase is made, None otherwise
	"""
	discount = discount_rates[variant_index]
	prob = calculate_purchase_probability(customer, discount)

	if np.random.random() < prob:
	# Add some noise to the discounted price
	price_noise = np.random.normal(0, product['price'] * 0.05) # 5% noise
	discounted_price = max(0, product['price'] * (1 - discount) + price_noise)

	# Add some noise to the cost
	cost_noise = np.random.normal(0, product['cost'] * 0.03) # 3% noise
	adjusted_cost = max(0, product['cost'] + cost_noise)

	return {
	'customer_id': customer['customer_id'],
	'variant': variants[variant_index],
	'product': product['name'],
	'price': product['price'],
	'discounted_price': discounted_price,
	'cost': adjusted_cost,
	'profit': discounted_price - adjusted_cost
	}
	return None

	def run_rct_simulation(df, experiment_duration=30):
	"""
	Run a Randomized Control Trial (RCT) simulation.

	This function simulates an RCT by assigning customers to different variants
	and simulating purchases over the experiment duration.

	Args:
	df (pandas.DataFrame): The customer data
	experiment_duration (int): The duration of the experiment in days (default: 30)

	Returns:
	tuple: Contains two DataFrames - transactions and variant assignments
	"""
	# Set random seed for reproducibility
	np.random.seed(42)
	random.seed(42)

	# Set up experiment dates
	start_date = datetime(2024, 7, 1)
	end_date = start_date + timedelta(days=experiment_duration)

	results = []
	variant_assignments = []

	for _, customer in df.iterrows():
	# Add some randomness to variant assignment
	if np.random.random() < 0.05: # 5% chance of random assignment
	variant_index = np.random.randint(0, 4)
	else:
	variant_index = np.random.randint(0, 4) # Original random assignment

	# Record variant assignment for all eligible customers
	variant_assignments.append({
	'customer_id': customer['customer_id'],
	'variant': variants[variant_index]
	})

	# Simulate multiple purchase opportunities with varying frequency
	num_opportunities = np.random.poisson(experiment_duration / 10)
	for _ in range(num_opportunities):
	product = random.choice(electronics_products)
	purchase = simulate_purchase(customer, variant_index, product)
	if purchase:
	results.append(purchase)

	# Create DataFrame from results
	transactions_df = pd.DataFrame(results)
	transactions_df['purchase'] = 1

	# Create DataFrame from variant assignments
	variant_assignments_df = pd.DataFrame(variant_assignments)

	return transactions_df, variant_assignments_df