File size: 2,111 Bytes
d16c0f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import pandas as pd
import numpy as np
from faker import Faker
import random
from datetime import datetime, timedelta

fake = Faker()
np.random.seed(42)

# Parameters
num_customers = 50
num_transactions = 3000
suspicious_ratio = 0.05  # 5% transactions will be anomalies

# Transaction categories
regular_merchants = ['Pharmacy', 'Supermarket', 'Electricity Bill', 'Water Bill', 'Medical Checkup', 'Rent',
                     'Insurance']
suspicious_merchants = ['Gift Card Store', 'Unknown Transfer', 'Crypto Exchange', 'Late Night ATM', 'Online Casino']

transaction_types = ['debit', 'credit', 'atm_withdrawal']

# Generate customers
customer_ids = [f"CUST{1000 + i}" for i in range(num_customers)]
ages = np.random.randint(65, 90, size=num_customers)  # Elderly age range

# Generate transactions
data = []

for _ in range(num_transactions):
    customer_idx = np.random.randint(0, num_customers)
    customer_id = customer_ids[customer_idx]
    age = ages[customer_idx]

    timestamp = fake.date_time_between(start_date='-180d', end_date='now')

    is_anomalous = np.random.rand() < suspicious_ratio

    if is_anomalous:
        merchant = random.choice(suspicious_merchants)
        amount = round(np.random.uniform(200, 5000), 2)
        transaction_type = random.choice(['debit', 'atm_withdrawal'])
    else:
        merchant = random.choice(regular_merchants)
        amount = round(np.random.uniform(10, 300), 2)
        transaction_type = random.choice(transaction_types)

    data.append({
        'customer_id': customer_id,
        'age': age,
        'transaction_id': fake.uuid4(),
        'timestamp': timestamp,
        'merchant': merchant,
        'amount': amount,
        'transaction_type': transaction_type,
        'is_anomalous': int(is_anomalous)
    })

# Convert to DataFrame
df = pd.DataFrame(data)

# Sort by timestamp
df = df.sort_values(by='timestamp')

# Save to CSV
df.to_csv('synthetic_elderly_transactions.csv', index=False)
print("✅ Dataset created and saved as 'synthetic_elderly_transactions.csv'")