import gradio as gr import pandas as pd import numpy as np import joblib from model_wrapper import FraudDetectionModel from preprocessor import FraudDataPreprocessor import os # Initialize the fraud detection model fraud_model = FraudDetectionModel() # Load model if files exist try: # Load the specific XGBoost model files from your training model_path = "fraud_detection_model_xgboost_20250727_145448.joblib" preprocessor_path = "preprocessor_20250727_145448.joblib" metadata_path = "model_metadata_20250727_145448.joblib" if os.path.exists(model_path) and os.path.exists(preprocessor_path): if os.path.exists(metadata_path): fraud_model.load_model(model_path, preprocessor_path, metadata_path) else: fraud_model.load_model(model_path, preprocessor_path) model_loaded = True else: model_loaded = False print("Model files not found. Please upload the following files:") print("- fraud_detection_model_xgboost_20250727_145448.joblib") print("- preprocessor_20250727_145448.joblib") print("- model_metadata_20250727_145448.joblib") except Exception as e: model_loaded = False print(f"Error loading model: {e}") def predict_fraud_risk( transaction_amount, card_type, email_domain, transaction_hour, addr1, addr2, card1, card2, dist1, c1, c2, c3, c4, c5, c6, d1, d2, d3, d4, d5, m1, m2, m3, m4, m5, m6 ): """Predict fraud risk for a transaction""" if not model_loaded: return "❌ Model not loaded. Please contact administrator.", "", "", "" try: # Prepare transaction data transaction_data = { 'TransactionAmt': float(transaction_amount), 'card4': card_type, 'P_emaildomain': email_domain, 'R_emaildomain': email_domain, 'addr1': float(addr1) if addr1 else None, 'addr2': float(addr2) if addr2 else None, 'card1': float(card1) if card1 else None, 'card2': float(card2) if card2 else None, 'card3': float(transaction_amount), # Often similar to transaction amount 'card5': 142.0, # Default value 'card6': 'credit', # Default value 'dist1': float(dist1) if dist1 else None, 'dist2': float(dist1) if dist1 else None, # Often similar to dist1 'C1': float(c1), 'C2': float(c2), 'C3': float(c3), 'C4': float(c4), 'C5': float(c5), 'C6': float(c6), 'C7': 0.0, 'C8': 0.0, 'C9': 1.0, 'C10': 0.0, 'C11': 1.0, 'C12': 1.0, 'C13': 1.0, 'C14': 1.0, 'D1': float(d1), 'D2': float(d2), 'D3': float(d3), 'D4': float(d4), 'D5': float(d5), 'D10': 0.0, 'D15': 0.0, 'M1': m1, 'M2': m2, 'M3': m3, 'M4': m4, 'M5': m5, 'M6': m6, 'TransactionDT': transaction_hour * 3600 # Convert hour to seconds } # Make prediction result = fraud_model.predict_single_transaction(transaction_data) if 'error' in result: return f"❌ {result['error']}", "", "", "" # Format results probability = result['fraud_probability'] risk_level = result['risk_level'] recommendation = result['recommendation'] # Create risk indicator if probability >= 0.8: risk_indicator = f"🔴 HIGH RISK ({probability:.1%})" elif probability >= 0.5: risk_indicator = f"🟡 MEDIUM RISK ({probability:.1%})" elif probability >= 0.2: risk_indicator = f"🟠 LOW RISK ({probability:.1%})" else: risk_indicator = f"🟢 VERY LOW RISK ({probability:.1%})" return risk_indicator, f"{probability:.4f}", risk_level, recommendation except Exception as e: return f"❌ Error: {str(e)}", "", "", "" def predict_from_csv(file): """Predict fraud risk for multiple transactions from CSV""" if not model_loaded: return "❌ Model not loaded. Please contact administrator." if file is None: return "❌ Please upload a CSV file." try: # Read CSV file df = pd.read_csv(file.name) # Make batch predictions results_df = fraud_model.predict_batch(df) # Save results output_path = "fraud_predictions.csv" results_df.to_csv(output_path, index=False) # Create summary total_transactions = len(results_df) high_risk = len(results_df[results_df['fraud_probability'] >= 0.8]) medium_risk = len(results_df[(results_df['fraud_probability'] >= 0.5) & (results_df['fraud_probability'] < 0.8)]) low_risk = len(results_df[(results_df['fraud_probability'] >= 0.2) & (results_df['fraud_probability'] < 0.5)]) very_low_risk = len(results_df[results_df['fraud_probability'] < 0.2]) summary = f""" 📊 **Batch Prediction Summary** Total Transactions: {total_transactions} 🔴 High Risk: {high_risk} ({high_risk/total_transactions:.1%}) 🟡 Medium Risk: {medium_risk} ({medium_risk/total_transactions:.1%}) 🟠 Low Risk: {low_risk} ({low_risk/total_transactions:.1%}) 🟢 Very Low Risk: {very_low_risk} ({very_low_risk/total_transactions:.1%}) Results saved to: {output_path} """ return summary, output_path except Exception as e: return f"❌ Error processing CSV: {str(e)}", None # Create Gradio interface with gr.Blocks(title="Fraud Detection System", theme=gr.themes.Soft()) as app: gr.Markdown(""" # 🔒 Credit Card Fraud Detection System This system uses machine learning to assess the risk of credit card transactions being fraudulent. Enter transaction details below to get a risk assessment. **Risk Levels:** - 🔴 High Risk (≥80%): Block transaction immediately - 🟡 Medium Risk (50-79%): Manual review required - 🟠 Low Risk (20-49%): Monitor transaction - 🟢 Very Low Risk (<20%): Process normally """) with gr.Tabs(): # Single Transaction Tab with gr.TabItem("Single Transaction"): with gr.Row(): with gr.Column(): gr.Markdown("### Transaction Details") transaction_amount = gr.Number(label="Transaction Amount ($)", value=100.0) card_type = gr.Dropdown( choices=["visa", "mastercard", "american express", "discover"], label="Card Type", value="visa" ) email_domain = gr.Textbox(label="Email Domain", value="gmail.com") transaction_hour = gr.Slider(0, 23, label="Transaction Hour", value=12) gr.Markdown("### Address & Card Info") addr1 = gr.Number(label="Address 1", value=325.0) addr2 = gr.Number(label="Address 2", value=87.0) card1 = gr.Number(label="Card 1", value=13553) card2 = gr.Number(label="Card 2", value=150.0) dist1 = gr.Number(label="Distance 1", value=19.0) with gr.Column(): gr.Markdown("### Transaction Counts") c1 = gr.Number(label="C1", value=1.0) c2 = gr.Number(label="C2", value=1.0) c3 = gr.Number(label="C3", value=0.0) c4 = gr.Number(label="C4", value=0.0) c5 = gr.Number(label="C5", value=0.0) c6 = gr.Number(label="C6", value=1.0) gr.Markdown("### Time Deltas") d1 = gr.Number(label="D1", value=0.0) d2 = gr.Number(label="D2", value=0.0) d3 = gr.Number(label="D3", value=0.0) d4 = gr.Number(label="D4", value=0.0) d5 = gr.Number(label="D5", value=20.0) gr.Markdown("### Match Features") m1 = gr.Dropdown(choices=["T", "F"], label="M1", value="T") m2 = gr.Dropdown(choices=["T", "F"], label="M2", value="T") m3 = gr.Dropdown(choices=["T", "F"], label="M3", value="T") m4 = gr.Dropdown(choices=["M0", "M1", "M2"], label="M4", value="M0") m5 = gr.Dropdown(choices=["T", "F"], label="M5", value="F") m6 = gr.Dropdown(choices=["T", "F"], label="M6", value="F") predict_btn = gr.Button("🔍 Analyze Transaction", variant="primary", size="lg") with gr.Row(): risk_output = gr.Textbox(label="Risk Assessment", lines=1) probability_output = gr.Textbox(label="Fraud Probability", lines=1) with gr.Row(): risk_level_output = gr.Textbox(label="Risk Level", lines=1) recommendation_output = gr.Textbox(label="Recommendation", lines=2) predict_btn.click( predict_fraud_risk, inputs=[ transaction_amount, card_type, email_domain, transaction_hour, addr1, addr2, card1, card2, dist1, c1, c2, c3, c4, c5, c6, d1, d2, d3, d4, d5, m1, m2, m3, m4, m5, m6 ], outputs=[risk_output, probability_output, risk_level_output, recommendation_output] ) # Batch Processing Tab with gr.TabItem("Batch Processing"): gr.Markdown(""" ### Upload CSV File for Batch Processing Upload a CSV file containing multiple transactions. The file should include the same columns as used in single transaction prediction. """) file_upload = gr.File(label="Upload CSV File", file_types=[".csv"]) batch_btn = gr.Button("🔍 Process Batch", variant="primary") batch_output = gr.Textbox(label="Batch Results", lines=10) download_file = gr.File(label="Download Results") batch_btn.click( predict_from_csv, inputs=[file_upload], outputs=[batch_output, download_file] ) # Model Info Tab with gr.TabItem("Model Information"): if model_loaded and fraud_model.metadata: model_info = fraud_model.get_model_info() gr.Markdown(f""" ### Model Status **Status:** ✅ {model_info.get('model_name', 'XGBoost')} Model Loaded **AUC Score:** {model_info.get('auc_score', 'N/A')} **Training Date:** {model_info.get('training_timestamp', 'N/A')} **Features:** {model_info.get('feature_count', 'N/A')} ### About This Model This fraud detection system uses an **XGBoost classifier** trained on a comprehensive dataset of credit card transactions. The model achieved high performance with advanced feature engineering and ensemble learning techniques. ### Model Performance - **Algorithm**: XGBoost (Extreme Gradient Boosting) - **AUC Score**: {model_info.get('auc_score', 'N/A')} - **Features Used**: {model_info.get('feature_count', 'N/A')} engineered features - **Training Method**: Cross-validation with stratified sampling - **Speed**: Real-time predictions (<100ms) ### Features Used The model processes over 40 features including: - **Transaction Details**: Amount, timing, frequency patterns - **Card Information**: Type, issuer details, security features - **User Behaviour**: Email domains, address patterns, historical counts - **Device & Session**: Geographic data, device fingerprinting - **Engineered Features**: Ratios, transformations, temporal patterns ### XGBoost Advantages - **High Accuracy**: Excellent performance on tabular data - **Feature Importance**: Clear understanding of decision factors - **Robustness**: Handles missing values and outliers well - **Scalability**: Efficient training and inference """) else: gr.Markdown(f""" ### Model Status **Status:** {'✅ Basic Model Loaded' if model_loaded else '❌ Not Loaded'} ### About This Model This fraud detection system uses advanced machine learning algorithms to assess transaction risk. The model was trained on a large dataset of credit card transactions and uses multiple features including transaction amount, card details, user behaviour patterns, and timing information. ### Features Used - Transaction amount and timing - Card information (type, numbers) - Email domain patterns - Address information - User behaviour counts - Device and session data ### Model Performance - **Algorithm**: Ensemble methods (Random Forest, XGBoost, LightGBM) - **Accuracy**: High precision in detecting fraudulent transactions - **Speed**: Real-time predictions """) # Launch the app if __name__ == "__main__": app.launch()