Anomaly / synthetic dataset.py
Ujeshhh's picture
Upload 10 files
d16c0f6 verified
raw
history blame
2.11 kB
import pandas as pd
import numpy as np
from faker import Faker
import random
from datetime import datetime, timedelta
fake = Faker()
np.random.seed(42)
# Parameters
num_customers = 50
num_transactions = 3000
suspicious_ratio = 0.05 # 5% transactions will be anomalies
# Transaction categories
regular_merchants = ['Pharmacy', 'Supermarket', 'Electricity Bill', 'Water Bill', 'Medical Checkup', 'Rent',
'Insurance']
suspicious_merchants = ['Gift Card Store', 'Unknown Transfer', 'Crypto Exchange', 'Late Night ATM', 'Online Casino']
transaction_types = ['debit', 'credit', 'atm_withdrawal']
# Generate customers
customer_ids = [f"CUST{1000 + i}" for i in range(num_customers)]
ages = np.random.randint(65, 90, size=num_customers) # Elderly age range
# Generate transactions
data = []
for _ in range(num_transactions):
customer_idx = np.random.randint(0, num_customers)
customer_id = customer_ids[customer_idx]
age = ages[customer_idx]
timestamp = fake.date_time_between(start_date='-180d', end_date='now')
is_anomalous = np.random.rand() < suspicious_ratio
if is_anomalous:
merchant = random.choice(suspicious_merchants)
amount = round(np.random.uniform(200, 5000), 2)
transaction_type = random.choice(['debit', 'atm_withdrawal'])
else:
merchant = random.choice(regular_merchants)
amount = round(np.random.uniform(10, 300), 2)
transaction_type = random.choice(transaction_types)
data.append({
'customer_id': customer_id,
'age': age,
'transaction_id': fake.uuid4(),
'timestamp': timestamp,
'merchant': merchant,
'amount': amount,
'transaction_type': transaction_type,
'is_anomalous': int(is_anomalous)
})
# Convert to DataFrame
df = pd.DataFrame(data)
# Sort by timestamp
df = df.sort_values(by='timestamp')
# Save to CSV
df.to_csv('synthetic_elderly_transactions.csv', index=False)
print("✅ Dataset created and saved as 'synthetic_elderly_transactions.csv'")