Spaces:
Sleeping
Sleeping
"""data_handler.py module.""" | |
import numpy as np | |
import pandas as pd | |
from typing import Tuple, Dict | |
import tensorflow as tf | |
from sklearn.preprocessing import StandardScaler | |
class FinancialDataHandler: | |
def __init__(self, config: Dict): | |
"""Initialize the data handler with configuration.""" | |
self.batch_size = config['data']['batch_size'] | |
self.shuffle_buffer = config['data']['shuffle_buffer'] | |
self.prefetch_buffer = config['data']['prefetch_buffer'] | |
self.scaler = StandardScaler() | |
def simulate_financial_data(self, num_samples: int = 1000) -> pd.DataFrame: | |
"""Generate synthetic financial data for testing.""" | |
np.random.seed(42) | |
data = { | |
'transaction_amount': np.random.lognormal(mean=4.0, sigma=1.0, size=num_samples), | |
'account_balance': np.random.normal(loc=10000, scale=5000, size=num_samples), | |
'transaction_frequency': np.random.poisson(lam=5, size=num_samples), | |
'credit_score': np.random.normal(loc=700, scale=50, size=num_samples).clip(300, 850), | |
'days_since_last_transaction': np.random.exponential(scale=7, size=num_samples) | |
} | |
return pd.DataFrame(data) | |
def preprocess_data(self, data: pd.DataFrame) -> tf.data.Dataset: | |
"""Preprocess the data and convert to TensorFlow dataset.""" | |
# Standardize the features | |
scaled_data = self.scaler.fit_transform(data) | |
# Convert to TensorFlow dataset | |
dataset = tf.data.Dataset.from_tensor_slices(scaled_data) | |
# Apply dataset transformations | |
dataset = dataset.shuffle(self.shuffle_buffer) | |
dataset = dataset.batch(self.batch_size) | |
dataset = dataset.prefetch(self.prefetch_buffer) | |
return dataset | |
def get_client_data(self) -> Tuple[tf.data.Dataset, StandardScaler]: | |
"""Get preprocessed client data and scaler.""" | |
# Simulate client data | |
raw_data = self.simulate_financial_data() | |
# Preprocess data | |
dataset = self.preprocess_data(raw_data) | |
return dataset, self.scaler | |