Spaces:

0xnu
/

fraud-detection

Sleeping

App Files Files Community

fraud-detection / model_wrapper.py

0xnu

Upload 4 files

513edc0 verified about 1 month ago

raw

history blame

7.97 kB

	import joblib
	import pandas as pd
	import numpy as np
	from sklearn.preprocessing import StandardScaler, LabelEncoder
	from sklearn.impute import SimpleImputer

	class FraudDetectionModel:
	"""Wrapper class for the fraud detection model"""

	def __init__(self, model_path=None, preprocessor_path=None, metadata_path=None):
	self.model = None
	self.preprocessor = None
	self.metadata = None

	if model_path and preprocessor_path:
	self.load_model(model_path, preprocessor_path, metadata_path)

	def load_model(self, model_path, preprocessor_path, metadata_path=None):
	"""Load the trained model, preprocessor, and metadata"""
	self.model = joblib.load(model_path)
	self.preprocessor = joblib.load(preprocessor_path)

	if metadata_path:
	self.metadata = joblib.load(metadata_path)
	print(f"Loaded {self.metadata['model_name']} model with AUC: {self.metadata['auc_score']:.4f}")
	else:
	print("Model and preprocessor loaded successfully!")

	def predict_single_transaction(self, transaction_data):
	"""
	Predict fraud probability for a single transaction

	Args:
	transaction_data (dict): Dictionary containing transaction features

	Returns:
	dict: Prediction results with probability and risk level
	"""
	if self.model is None or self.preprocessor is None:
	raise ValueError("Model not loaded. Please load model first.")

	# Convert to DataFrame
	df = pd.DataFrame([transaction_data])

	# Add TransactionID if not present (required for preprocessing)
	if 'TransactionID' not in df.columns:
	df['TransactionID'] = 'temp_id'

	try:
	# Preprocess the data
	X_processed, _ = self.preprocessor.preprocess(df, fit=False)

	# Make prediction
	fraud_probability = self.model.predict_proba(X_processed)[0, 1]

	# Determine risk level
	if fraud_probability >= 0.8:
	risk_level = "High Risk"
	recommendation = "Block transaction and investigate immediately"
	elif fraud_probability >= 0.5:
	risk_level = "Medium Risk"
	recommendation = "Review transaction manually"
	elif fraud_probability >= 0.2:
	risk_level = "Low Risk"
	recommendation = "Monitor transaction"
	else:
	risk_level = "Very Low Risk"
	recommendation = "Process normally"

	return {
	"fraud_probability": float(fraud_probability),
	"risk_level": risk_level,
	"recommendation": recommendation,
	"is_suspicious": fraud_probability >= 0.5
	}

	except Exception as e:
	return {
	"error": f"Prediction failed: {str(e)}",
	"fraud_probability": None,
	"risk_level": "Unknown",
	"recommendation": "Manual review required"
	}

	def predict_batch(self, transactions_df):
	"""
	Predict fraud probabilities for multiple transactions

	Args:
	transactions_df (pd.DataFrame): DataFrame containing transaction data

	Returns:
	pd.DataFrame: DataFrame with predictions added
	"""
	if self.model is None or self.preprocessor is None:
	raise ValueError("Model not loaded. Please load model first.")

	# Preprocess the data
	X_processed, _ = self.preprocessor.preprocess(transactions_df, fit=False)

	# Make predictions
	fraud_probabilities = self.model.predict_proba(X_processed)[:, 1]

	# Add predictions to original DataFrame
	result_df = transactions_df.copy()
	result_df['fraud_probability'] = fraud_probabilities
	result_df['is_suspicious'] = fraud_probabilities >= 0.5

	# Add risk levels
	risk_levels = []
	for prob in fraud_probabilities:
	if prob >= 0.8:
	risk_levels.append("High Risk")
	elif prob >= 0.5:
	risk_levels.append("Medium Risk")
	elif prob >= 0.2:
	risk_levels.append("Low Risk")
	else:
	risk_levels.append("Very Low Risk")

	result_df['risk_level'] = risk_levels

	return result_df

	def get_feature_importance(self, top_n=20):
	"""Get feature importance if available"""
	if self.model is None:
	raise ValueError("Model not loaded.")

	if hasattr(self.model, 'feature_importances_'):
	feature_names = self.preprocessor.feature_names
	importance_df = pd.DataFrame({
	'feature': feature_names,
	'importance': self.model.feature_importances_
	}).sort_values('importance', ascending=False).head(top_n)

	return importance_df
	else:
	return "Feature importance not available for this model type."

	def get_model_info(self):
	"""Get information about the loaded model"""
	if self.model is None:
	return "No model loaded."

	info = {
	"model_type": type(self.model).__name__,
	"feature_count": len(self.preprocessor.feature_names) if self.preprocessor else "Unknown",
	"preprocessing_steps": [
	"Categorical encoding",
	"Feature engineering",
	"Missing value imputation",
	"Feature scaling"
	]
	}

	# Add metadata information if available
	if self.metadata:
	info.update({
	"model_name": self.metadata.get('model_name', 'Unknown'),
	"auc_score": self.metadata.get('auc_score', 'Unknown'),
	"training_timestamp": self.metadata.get('timestamp', 'Unknown'),
	"model_file": self.metadata.get('model_file', 'Unknown'),
	"preprocessor_file": self.metadata.get('preprocessor_file', 'Unknown')
	})

	return info

	# Example usage and testing
	if __name__ == "__main__":
	# Initialize model wrapper with specific files
	fraud_model = FraudDetectionModel(
	model_path="fraud_detection_model_xgboost_20250727_145448.joblib",
	preprocessor_path="preprocessor_20250727_145448.joblib",
	metadata_path="model_metadata_20250727_145448.joblib"
	)

	# Example transaction data for testing
	sample_transaction = {
	'TransactionAmt': 150.0,
	'card1': 13553,
	'card2': 150.0,
	'card3': 150.0,
	'card4': 'discover',
	'card5': 142.0,
	'card6': 'credit',
	'addr1': 325.0,
	'addr2': 87.0,
	'dist1': 19.0,
	'dist2': 19.0,
	'P_emaildomain': 'gmail.com',
	'R_emaildomain': 'gmail.com',
	'C1': 1.0,
	'C2': 1.0,
	'C3': 0.0,
	'C4': 0.0,
	'C5': 0.0,
	'C6': 1.0,
	'C7': 0.0,
	'C8': 0.0,
	'C9': 1.0,
	'C10': 0.0,
	'C11': 1.0,
	'C12': 1.0,
	'C13': 1.0,
	'C14': 1.0,
	'D1': 0.0,
	'D2': 0.0,
	'D3': 0.0,
	'D4': 0.0,
	'D5': 20.0,
	'D10': 0.0,
	'D15': 0.0,
	'M1': 'T',
	'M2': 'T',
	'M3': 'T',
	'M4': 'M0',
	'M5': 'F',
	'M6': 'F',
	'TransactionDT': 86400
	}

	print("Sample transaction for testing:")
	print(sample_transaction)
	print("\n" + "="*50)
	print("Model wrapper created successfully!")
	print("To use: load your model files and call predict_single_transaction()")