import joblib import pandas as pd import numpy as np from sklearn.preprocessing import StandardScaler, LabelEncoder from sklearn.impute import SimpleImputer class FraudDetectionModel: """Wrapper class for the fraud detection model""" def __init__(self, model_path=None, preprocessor_path=None, metadata_path=None): self.model = None self.preprocessor = None self.metadata = None if model_path and preprocessor_path: self.load_model(model_path, preprocessor_path, metadata_path) def load_model(self, model_path, preprocessor_path, metadata_path=None): """Load the trained model, preprocessor, and metadata""" self.model = joblib.load(model_path) self.preprocessor = joblib.load(preprocessor_path) if metadata_path: self.metadata = joblib.load(metadata_path) print(f"Loaded {self.metadata['model_name']} model with AUC: {self.metadata['auc_score']:.4f}") else: print("Model and preprocessor loaded successfully!") def predict_single_transaction(self, transaction_data): """ Predict fraud probability for a single transaction Args: transaction_data (dict): Dictionary containing transaction features Returns: dict: Prediction results with probability and risk level """ if self.model is None or self.preprocessor is None: raise ValueError("Model not loaded. Please load model first.") # Convert to DataFrame df = pd.DataFrame([transaction_data]) # Add TransactionID if not present (required for preprocessing) if 'TransactionID' not in df.columns: df['TransactionID'] = 'temp_id' try: # Preprocess the data X_processed, _ = self.preprocessor.preprocess(df, fit=False) # Make prediction fraud_probability = self.model.predict_proba(X_processed)[0, 1] # Determine risk level if fraud_probability >= 0.8: risk_level = "High Risk" recommendation = "Block transaction and investigate immediately" elif fraud_probability >= 0.5: risk_level = "Medium Risk" recommendation = "Review transaction manually" elif fraud_probability >= 0.2: risk_level = "Low Risk" recommendation = "Monitor transaction" else: risk_level = "Very Low Risk" recommendation = "Process normally" return { "fraud_probability": float(fraud_probability), "risk_level": risk_level, "recommendation": recommendation, "is_suspicious": fraud_probability >= 0.5 } except Exception as e: return { "error": f"Prediction failed: {str(e)}", "fraud_probability": None, "risk_level": "Unknown", "recommendation": "Manual review required" } def predict_batch(self, transactions_df): """ Predict fraud probabilities for multiple transactions Args: transactions_df (pd.DataFrame): DataFrame containing transaction data Returns: pd.DataFrame: DataFrame with predictions added """ if self.model is None or self.preprocessor is None: raise ValueError("Model not loaded. Please load model first.") # Preprocess the data X_processed, _ = self.preprocessor.preprocess(transactions_df, fit=False) # Make predictions fraud_probabilities = self.model.predict_proba(X_processed)[:, 1] # Add predictions to original DataFrame result_df = transactions_df.copy() result_df['fraud_probability'] = fraud_probabilities result_df['is_suspicious'] = fraud_probabilities >= 0.5 # Add risk levels risk_levels = [] for prob in fraud_probabilities: if prob >= 0.8: risk_levels.append("High Risk") elif prob >= 0.5: risk_levels.append("Medium Risk") elif prob >= 0.2: risk_levels.append("Low Risk") else: risk_levels.append("Very Low Risk") result_df['risk_level'] = risk_levels return result_df def get_feature_importance(self, top_n=20): """Get feature importance if available""" if self.model is None: raise ValueError("Model not loaded.") if hasattr(self.model, 'feature_importances_'): feature_names = self.preprocessor.feature_names importance_df = pd.DataFrame({ 'feature': feature_names, 'importance': self.model.feature_importances_ }).sort_values('importance', ascending=False).head(top_n) return importance_df else: return "Feature importance not available for this model type." def get_model_info(self): """Get information about the loaded model""" if self.model is None: return "No model loaded." info = { "model_type": type(self.model).__name__, "feature_count": len(self.preprocessor.feature_names) if self.preprocessor else "Unknown", "preprocessing_steps": [ "Categorical encoding", "Feature engineering", "Missing value imputation", "Feature scaling" ] } # Add metadata information if available if self.metadata: info.update({ "model_name": self.metadata.get('model_name', 'Unknown'), "auc_score": self.metadata.get('auc_score', 'Unknown'), "training_timestamp": self.metadata.get('timestamp', 'Unknown'), "model_file": self.metadata.get('model_file', 'Unknown'), "preprocessor_file": self.metadata.get('preprocessor_file', 'Unknown') }) return info # Example usage and testing if __name__ == "__main__": # Initialize model wrapper with specific files fraud_model = FraudDetectionModel( model_path="fraud_detection_model_xgboost_20250727_145448.joblib", preprocessor_path="preprocessor_20250727_145448.joblib", metadata_path="model_metadata_20250727_145448.joblib" ) # Example transaction data for testing sample_transaction = { 'TransactionAmt': 150.0, 'card1': 13553, 'card2': 150.0, 'card3': 150.0, 'card4': 'discover', 'card5': 142.0, 'card6': 'credit', 'addr1': 325.0, 'addr2': 87.0, 'dist1': 19.0, 'dist2': 19.0, 'P_emaildomain': 'gmail.com', 'R_emaildomain': 'gmail.com', 'C1': 1.0, 'C2': 1.0, 'C3': 0.0, 'C4': 0.0, 'C5': 0.0, 'C6': 1.0, 'C7': 0.0, 'C8': 0.0, 'C9': 1.0, 'C10': 0.0, 'C11': 1.0, 'C12': 1.0, 'C13': 1.0, 'C14': 1.0, 'D1': 0.0, 'D2': 0.0, 'D3': 0.0, 'D4': 0.0, 'D5': 20.0, 'D10': 0.0, 'D15': 0.0, 'M1': 'T', 'M2': 'T', 'M3': 'T', 'M4': 'M0', 'M5': 'F', 'M6': 'F', 'TransactionDT': 86400 } print("Sample transaction for testing:") print(sample_transaction) print("\n" + "="*50) print("Model wrapper created successfully!") print("To use: load your model files and call predict_single_transaction()")