Spaces:
Sleeping
Sleeping
File size: 2,170 Bytes
3de89cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
"""data_handler.py module."""
import numpy as np
import pandas as pd
from typing import Tuple, Dict
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
class FinancialDataHandler:
def __init__(self, config: Dict):
"""Initialize the data handler with configuration."""
self.batch_size = config['data']['batch_size']
self.shuffle_buffer = config['data']['shuffle_buffer']
self.prefetch_buffer = config['data']['prefetch_buffer']
self.scaler = StandardScaler()
def simulate_financial_data(self, num_samples: int = 1000) -> pd.DataFrame:
"""Generate synthetic financial data for testing."""
np.random.seed(42)
data = {
'transaction_amount': np.random.lognormal(mean=4.0, sigma=1.0, size=num_samples),
'account_balance': np.random.normal(loc=10000, scale=5000, size=num_samples),
'transaction_frequency': np.random.poisson(lam=5, size=num_samples),
'credit_score': np.random.normal(loc=700, scale=50, size=num_samples).clip(300, 850),
'days_since_last_transaction': np.random.exponential(scale=7, size=num_samples)
}
return pd.DataFrame(data)
def preprocess_data(self, data: pd.DataFrame) -> tf.data.Dataset:
"""Preprocess the data and convert to TensorFlow dataset."""
# Standardize the features
scaled_data = self.scaler.fit_transform(data)
# Convert to TensorFlow dataset
dataset = tf.data.Dataset.from_tensor_slices(scaled_data)
# Apply dataset transformations
dataset = dataset.shuffle(self.shuffle_buffer)
dataset = dataset.batch(self.batch_size)
dataset = dataset.prefetch(self.prefetch_buffer)
return dataset
def get_client_data(self) -> Tuple[tf.data.Dataset, StandardScaler]:
"""Get preprocessed client data and scaler."""
# Simulate client data
raw_data = self.simulate_financial_data()
# Preprocess data
dataset = self.preprocess_data(raw_data)
return dataset, self.scaler
|