fraud-detection / model_wrapper.py
0xnu's picture
Upload 4 files
513edc0 verified
raw
history blame
7.97 kB
import joblib
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
class FraudDetectionModel:
"""Wrapper class for the fraud detection model"""
def __init__(self, model_path=None, preprocessor_path=None, metadata_path=None):
self.model = None
self.preprocessor = None
self.metadata = None
if model_path and preprocessor_path:
self.load_model(model_path, preprocessor_path, metadata_path)
def load_model(self, model_path, preprocessor_path, metadata_path=None):
"""Load the trained model, preprocessor, and metadata"""
self.model = joblib.load(model_path)
self.preprocessor = joblib.load(preprocessor_path)
if metadata_path:
self.metadata = joblib.load(metadata_path)
print(f"Loaded {self.metadata['model_name']} model with AUC: {self.metadata['auc_score']:.4f}")
else:
print("Model and preprocessor loaded successfully!")
def predict_single_transaction(self, transaction_data):
"""
Predict fraud probability for a single transaction
Args:
transaction_data (dict): Dictionary containing transaction features
Returns:
dict: Prediction results with probability and risk level
"""
if self.model is None or self.preprocessor is None:
raise ValueError("Model not loaded. Please load model first.")
# Convert to DataFrame
df = pd.DataFrame([transaction_data])
# Add TransactionID if not present (required for preprocessing)
if 'TransactionID' not in df.columns:
df['TransactionID'] = 'temp_id'
try:
# Preprocess the data
X_processed, _ = self.preprocessor.preprocess(df, fit=False)
# Make prediction
fraud_probability = self.model.predict_proba(X_processed)[0, 1]
# Determine risk level
if fraud_probability >= 0.8:
risk_level = "High Risk"
recommendation = "Block transaction and investigate immediately"
elif fraud_probability >= 0.5:
risk_level = "Medium Risk"
recommendation = "Review transaction manually"
elif fraud_probability >= 0.2:
risk_level = "Low Risk"
recommendation = "Monitor transaction"
else:
risk_level = "Very Low Risk"
recommendation = "Process normally"
return {
"fraud_probability": float(fraud_probability),
"risk_level": risk_level,
"recommendation": recommendation,
"is_suspicious": fraud_probability >= 0.5
}
except Exception as e:
return {
"error": f"Prediction failed: {str(e)}",
"fraud_probability": None,
"risk_level": "Unknown",
"recommendation": "Manual review required"
}
def predict_batch(self, transactions_df):
"""
Predict fraud probabilities for multiple transactions
Args:
transactions_df (pd.DataFrame): DataFrame containing transaction data
Returns:
pd.DataFrame: DataFrame with predictions added
"""
if self.model is None or self.preprocessor is None:
raise ValueError("Model not loaded. Please load model first.")
# Preprocess the data
X_processed, _ = self.preprocessor.preprocess(transactions_df, fit=False)
# Make predictions
fraud_probabilities = self.model.predict_proba(X_processed)[:, 1]
# Add predictions to original DataFrame
result_df = transactions_df.copy()
result_df['fraud_probability'] = fraud_probabilities
result_df['is_suspicious'] = fraud_probabilities >= 0.5
# Add risk levels
risk_levels = []
for prob in fraud_probabilities:
if prob >= 0.8:
risk_levels.append("High Risk")
elif prob >= 0.5:
risk_levels.append("Medium Risk")
elif prob >= 0.2:
risk_levels.append("Low Risk")
else:
risk_levels.append("Very Low Risk")
result_df['risk_level'] = risk_levels
return result_df
def get_feature_importance(self, top_n=20):
"""Get feature importance if available"""
if self.model is None:
raise ValueError("Model not loaded.")
if hasattr(self.model, 'feature_importances_'):
feature_names = self.preprocessor.feature_names
importance_df = pd.DataFrame({
'feature': feature_names,
'importance': self.model.feature_importances_
}).sort_values('importance', ascending=False).head(top_n)
return importance_df
else:
return "Feature importance not available for this model type."
def get_model_info(self):
"""Get information about the loaded model"""
if self.model is None:
return "No model loaded."
info = {
"model_type": type(self.model).__name__,
"feature_count": len(self.preprocessor.feature_names) if self.preprocessor else "Unknown",
"preprocessing_steps": [
"Categorical encoding",
"Feature engineering",
"Missing value imputation",
"Feature scaling"
]
}
# Add metadata information if available
if self.metadata:
info.update({
"model_name": self.metadata.get('model_name', 'Unknown'),
"auc_score": self.metadata.get('auc_score', 'Unknown'),
"training_timestamp": self.metadata.get('timestamp', 'Unknown'),
"model_file": self.metadata.get('model_file', 'Unknown'),
"preprocessor_file": self.metadata.get('preprocessor_file', 'Unknown')
})
return info
# Example usage and testing
if __name__ == "__main__":
# Initialize model wrapper with specific files
fraud_model = FraudDetectionModel(
model_path="fraud_detection_model_xgboost_20250727_145448.joblib",
preprocessor_path="preprocessor_20250727_145448.joblib",
metadata_path="model_metadata_20250727_145448.joblib"
)
# Example transaction data for testing
sample_transaction = {
'TransactionAmt': 150.0,
'card1': 13553,
'card2': 150.0,
'card3': 150.0,
'card4': 'discover',
'card5': 142.0,
'card6': 'credit',
'addr1': 325.0,
'addr2': 87.0,
'dist1': 19.0,
'dist2': 19.0,
'P_emaildomain': 'gmail.com',
'R_emaildomain': 'gmail.com',
'C1': 1.0,
'C2': 1.0,
'C3': 0.0,
'C4': 0.0,
'C5': 0.0,
'C6': 1.0,
'C7': 0.0,
'C8': 0.0,
'C9': 1.0,
'C10': 0.0,
'C11': 1.0,
'C12': 1.0,
'C13': 1.0,
'C14': 1.0,
'D1': 0.0,
'D2': 0.0,
'D3': 0.0,
'D4': 0.0,
'D5': 20.0,
'D10': 0.0,
'D15': 0.0,
'M1': 'T',
'M2': 'T',
'M3': 'T',
'M4': 'M0',
'M5': 'F',
'M6': 'F',
'TransactionDT': 86400
}
print("Sample transaction for testing:")
print(sample_transaction)
print("\n" + "="*50)
print("Model wrapper created successfully!")
print("To use: load your model files and call predict_single_transaction()")