Spaces:

0xnu
/

fraud-detection

Sleeping

File size: 18,872 Bytes

import gradio as gr
import pandas as pd
import numpy as np
import joblib
from model_wrapper import FraudDetectionModel
from preprocessor import FraudDataPreprocessor
from feature_utils import fill_missing_features
import os

# Initialize the fraud detection model
fraud_model = FraudDetectionModel()

# Load model if files exist
try:
    # First, ensure the FraudDataPreprocessor class is available
    import sys
    sys.modules['__main__'].FraudDataPreprocessor = FraudDataPreprocessor
    
    # Load the specific XGBoost model files from your training
    model_path = "fraud_detection_model_xgboost_20250727_145448.joblib"
    preprocessor_path = "preprocessor_20250727_145448.joblib"
    metadata_path = "model_metadata_20250727_145448.joblib"
    
    if os.path.exists(model_path) and os.path.exists(preprocessor_path):
        if os.path.exists(metadata_path):
            fraud_model.load_model(model_path, preprocessor_path, metadata_path)
        else:
            fraud_model.load_model(model_path, preprocessor_path)
        model_loaded = True
        print(f"✅ Model loaded successfully!")
    else:
        model_loaded = False
        print("❌ Model files not found. Please upload the following files:")
        print("- fraud_detection_model_xgboost_20250727_145448.joblib")
        print("- preprocessor_20250727_145448.joblib") 
        print("- model_metadata_20250727_145448.joblib")
except Exception as e:
    model_loaded = False
    print(f"❌ Error loading model: {e}")

def predict_single_transaction(
    transaction_id,
    transaction_dt,
    transaction_amt,
    product_cd,
    card1,
    card2,
    card3,
    card4,
    card5,
    card6,
    addr1,
    addr2,
    p_emaildomain
):
    """Predict fraud risk for a single transaction with exact API fields"""
    
    if not model_loaded:
        return "❌ Model not loaded. Please contact administrator.", "", "", ""
    
    try:
        # Prepare transaction data exactly as API expects
        transaction_data = {
            'TransactionID': int(transaction_id) if transaction_id else 123456,
            'TransactionDT': int(transaction_dt) if transaction_dt else 18403200,
            'TransactionAmt': float(transaction_amt),
            'ProductCD': product_cd,
            'card1': int(card1) if card1 else None,
            'card2': float(card2) if card2 else None,
            'card3': float(card3) if card3 else None,
            'card4': card4,
            'card5': float(card5) if card5 else None,
            'card6': card6,
            'addr1': float(addr1) if addr1 else None,
            'addr2': float(addr2) if addr2 else None,
            'P_emaildomain': p_emaildomain,
            'R_emaildomain': p_emaildomain  # Often same as P_emaildomain
        }
        
        # Fill missing features with defaults
        complete_data = fill_missing_features(transaction_data)
        
        # Make prediction
        result = fraud_model.predict_single_transaction(complete_data)
        
        if 'error' in result:
            return f"❌ {result['error']}", "", "", ""
        
        # Format results
        probability = result['fraud_probability']
        risk_level = result['risk_level']
        recommendation = result['recommendation']
        
        # Create risk indicator
        if probability >= 0.8:
            risk_indicator = f"🔴 HIGH RISK ({probability:.1%})"
        elif probability >= 0.5:
            risk_indicator = f"🟡 MEDIUM RISK ({probability:.1%})"
        elif probability >= 0.2:
            risk_indicator = f"🟠 LOW RISK ({probability:.1%})"
        else:
            risk_indicator = f"🟢 VERY LOW RISK ({probability:.1%})"
        
        return risk_indicator, f"{probability:.4f}", risk_level, recommendation
        
    except Exception as e:
        return f"❌ Error: {str(e)}", "", "", ""

def predict_batch_from_csv(file):
    """Predict fraud risk for multiple transactions from CSV"""
    
    if not model_loaded:
        return "❌ Model not loaded. Please contact administrator.", None
    
    if file is None:
        return "❌ Please upload a CSV file.", None
    
    try:
        # Read CSV file
        df = pd.read_csv(file.name)
        
        # Validate required columns
        required_cols = ['TransactionAmt']
        missing_cols = [col for col in required_cols if col not in df.columns]
        
        if missing_cols:
            return f"❌ Missing required columns: {missing_cols}. Please ensure your CSV has at least 'TransactionAmt' column.", None
        
        # Add default TransactionID if not present
        if 'TransactionID' not in df.columns:
            df['TransactionID'] = range(1, len(df) + 1)
        
        # Process each row and make predictions
        results = []
        
        for idx, row in df.iterrows():
            try:
                # Fill missing features for this row
                transaction_data = row.to_dict()
                complete_data = fill_missing_features(transaction_data)
                
                # Make prediction
                result = fraud_model.predict_single_transaction(complete_data)
                
                if 'error' not in result:
                    # Add results to original row data
                    row_result = row.copy()
                    row_result['fraud_probability'] = result['fraud_probability']
                    row_result['risk_level'] = result['risk_level']
                    row_result['recommendation'] = result['recommendation']
                    row_result['is_suspicious'] = result['is_suspicious']
                else:
                    # Handle prediction error
                    row_result = row.copy()
                    row_result['fraud_probability'] = None
                    row_result['risk_level'] = 'Error'
                    row_result['recommendation'] = result.get('error', 'Prediction failed')
                    row_result['is_suspicious'] = False
                
                results.append(row_result)
                
            except Exception as e:
                # Handle row processing error
                row_result = row.copy()
                row_result['fraud_probability'] = None
                row_result['risk_level'] = 'Error'
                row_result['recommendation'] = f'Processing error: {str(e)}'
                row_result['is_suspicious'] = False
                results.append(row_result)
        
        # Create results DataFrame
        results_df = pd.DataFrame(results)
        
        # Save results
        output_path = "fraud_predictions_batch.csv"
        results_df.to_csv(output_path, index=False)
        
        # Create summary
        valid_predictions = results_df[results_df['fraud_probability'].notna()]
        total_transactions = len(results_df)
        valid_count = len(valid_predictions)
        
        if valid_count > 0:
            high_risk = len(valid_predictions[valid_predictions['fraud_probability'] >= 0.8])
            medium_risk = len(valid_predictions[(valid_predictions['fraud_probability'] >= 0.5) & (valid_predictions['fraud_probability'] < 0.8)])
            low_risk = len(valid_predictions[(valid_predictions['fraud_probability'] >= 0.2) & (valid_predictions['fraud_probability'] < 0.5)])
            very_low_risk = len(valid_predictions[valid_predictions['fraud_probability'] < 0.2])
            
            summary = f"""
📊 **Batch Prediction Summary**

Total Transactions: {total_transactions}
Successfully Processed: {valid_count}
Errors: {total_transactions - valid_count}

**Risk Distribution:**
🔴 High Risk: {high_risk} ({high_risk/valid_count:.1%})
🟡 Medium Risk: {medium_risk} ({medium_risk/valid_count:.1%})
🟠 Low Risk: {low_risk} ({low_risk/valid_count:.1%})
🟢 Very Low Risk: {very_low_risk} ({very_low_risk/valid_count:.1%})

Results saved to: {output_path}
            """
        else:
            summary = f"""
❌ **Batch Processing Failed**

Total Transactions: {total_transactions}
Successfully Processed: 0
All transactions encountered errors.

Please check your CSV format and try again.
            """
        
        return summary, output_path
        
    except Exception as e:
        return f"❌ Error processing CSV: {str(e)}", None

# Create Gradio interface
with gr.Blocks(title="Fraud Detection System", theme=gr.themes.Soft()) as app:
    
    gr.Markdown("""
    # 🔒 Credit Card Fraud Detection System
    
    This system uses **XGBoost machine learning** to assess the risk of credit card transactions being fraudulent.
    Enter transaction details for single prediction or upload CSV for batch processing.
    
    **Risk Levels:**
    - 🔴 High Risk (≥80%): Block transaction immediately
    - 🟡 Medium Risk (50-79%): Manual review required  
    - 🟠 Low Risk (20-49%): Monitor transaction
    - 🟢 Very Low Risk (<20%): Process normally
    """)
    
    with gr.Tabs():
        
        # Single Transaction Tab
        with gr.TabItem("🔍 Single Transaction"):
            gr.Markdown("""
            ### Single Transaction Fraud Detection
            Enter the transaction details below for instant fraud risk assessment.
            """)
            
            with gr.Row():
                with gr.Column():
                    gr.Markdown("### 📝 Transaction Information")
                    transaction_id = gr.Number(label="Transaction ID", value=123456, precision=0)
                    transaction_dt = gr.Number(label="Transaction DateTime (seconds)", value=18403200, precision=0)
                    transaction_amt = gr.Number(label="Transaction Amount ($)", value=150.00)
                    product_cd = gr.Dropdown(
                        choices=["W", "C", "S", "R", "H"],
                        label="Product Code",
                        value="W"
                    )
                    
                    gr.Markdown("### 💳 Card Information")
                    card1 = gr.Number(label="Card 1", value=4532015112830366, precision=0)
                    card2 = gr.Number(label="Card 2", value=404.0)
                    card3 = gr.Number(label="Card 3", value=150.0)
                
                with gr.Column():
                    gr.Markdown("### 💳 Card Details")
                    card4 = gr.Dropdown(
                        choices=["visa", "mastercard", "american express", "discover"],
                        label="Card Type",
                        value="visa"
                    )
                    card5 = gr.Number(label="Card 5", value=142.0)
                    card6 = gr.Dropdown(
                        choices=["credit", "debit"],
                        label="Card Category",
                        value="credit"
                    )
                    
                    gr.Markdown("### 📍 Address Information") 
                    addr1 = gr.Number(label="Address 1", value=315.0)
                    addr2 = gr.Number(label="Address 2", value=87.0)
                    
                    gr.Markdown("### 📧 Email Information")
                    p_emaildomain = gr.Textbox(label="Email Domain", value="gmail.com")
            
            predict_btn = gr.Button("🔍 Analyze Transaction", variant="primary", size="lg")
            
            gr.Markdown("### 📊 Prediction Results")
            with gr.Row():
                risk_output = gr.Textbox(label="Risk Assessment", lines=1)
                probability_output = gr.Textbox(label="Fraud Probability", lines=1)
                
            with gr.Row():
                risk_level_output = gr.Textbox(label="Risk Level", lines=1)
                recommendation_output = gr.Textbox(label="Recommendation", lines=2)
            
            predict_btn.click(
                predict_single_transaction,
                inputs=[
                    transaction_id, transaction_dt, transaction_amt, product_cd,
                    card1, card2, card3, card4, card5, card6,
                    addr1, addr2, p_emaildomain
                ],
                outputs=[risk_output, probability_output, risk_level_output, recommendation_output]
            )
        
        # Batch Processing Tab
        with gr.TabItem("📁 Batch Processing"):
            gr.Markdown("""
            ### CSV Batch Processing
            
            Upload a CSV file containing multiple transactions for batch fraud detection.
            
            **Required CSV Columns:**
            - `TransactionAmt` (required)
            - `TransactionID` (optional - will be auto-generated)
            - `TransactionDT`, `ProductCD`, `card1-6`, `addr1-2`, `P_emaildomain` (optional - smart defaults used)
            
            **Example CSV Format:**
            ```
            TransactionID,TransactionDT,TransactionAmt,ProductCD,card1,card2,card3,card4,card5,card6,addr1,addr2,P_emaildomain
            123456,18403200,150.00,W,4532015112830366,404.0,150.0,visa,142.0,credit,315.0,87.0,gmail.com
            123457,18403300,2500.00,C,5555555555554444,555.0,200.0,mastercard,224.0,credit,420.0,95.0,yahoo.com
            ```
            """)
            
            file_upload = gr.File(
                label="Upload CSV File", 
                file_types=[".csv"],
                elem_id="csv-upload"
            )
            batch_btn = gr.Button("🔍 Process Batch", variant="primary", size="lg")
            
            gr.Markdown("### 📊 Batch Results")
            batch_output = gr.Textbox(label="Processing Summary", lines=12)
            download_file = gr.File(label="Download Results CSV")
            
            batch_btn.click(
                predict_batch_from_csv,
                inputs=[file_upload],
                outputs=[batch_output, download_file]
            )
        
        # Sample Data Tab
        with gr.TabItem("📋 Sample Data"):
            gr.Markdown("""
            ### Sample Transaction Data
            
            Use these examples to test the system or as a template for your CSV files.
            """)
            
            gr.Markdown("""
            #### Example 1: Low Risk Transaction
            ```json
            {
                "TransactionID": 123456,
                "TransactionDT": 18403200,
                "TransactionAmt": 150.00,
                "ProductCD": "W",
                "card1": 4532015112830366,
                "card2": 404.0,
                "card3": 150.0,
                "card4": "visa",
                "card5": 142.0,
                "card6": "credit",
                "addr1": 315.0,
                "addr2": 87.0,
                "P_emaildomain": "gmail.com"
            }
            ```
            
            #### Example 2: Higher Risk Transaction
            ```json
            {
                "TransactionID": 123457,
                "TransactionDT": 18403300,
                "TransactionAmt": 2500.00,
                "ProductCD": "C",
                "card1": 5555555555554444,
                "card2": 555.0,
                "card3": 200.0,
                "card4": "mastercard",
                "card5": 224.0,
                "card6": "credit",
                "addr1": 420.0,
                "addr2": 95.0,
                "P_emaildomain": "yahoo.com"
            }
            ```
            
            #### CSV Sample File
            You can copy this into a CSV file for batch testing:
            ```
            TransactionID,TransactionDT,TransactionAmt,ProductCD,card1,card2,card3,card4,card5,card6,addr1,addr2,P_emaildomain
            123456,18403200,150.00,W,4532015112830366,404.0,150.0,visa,142.0,credit,315.0,87.0,gmail.com
            123457,18403300,2500.00,C,5555555555554444,555.0,200.0,mastercard,224.0,credit,420.0,95.0,yahoo.com
            123458,18403400,75.50,W,4111111111111111,300.0,75.0,visa,100.0,debit,200.0,50.0,hotmail.com
            ```
            """)
        
        # Model Info Tab
        with gr.TabItem("ℹ️ Model Information"):
            if model_loaded and fraud_model.metadata:
                model_info = fraud_model.get_model_info()
                gr.Markdown(f"""
                ### Model Status
                **Status:** ✅ {model_info.get('model_name', 'XGBoost')} Model Loaded  
                **AUC Score:** {model_info.get('auc_score', 'N/A')}  
                **Training Date:** {model_info.get('training_timestamp', 'N/A')}  
                **Features:** {model_info.get('feature_count', 'N/A')}  
                
                ### About This Model
                This fraud detection system uses an **XGBoost classifier** trained on a comprehensive dataset 
                of credit card transactions. The model achieved high performance with advanced feature engineering
                and ensemble learning techniques.
                
                ### API Compatible Interface
                This interface matches the exact field structure expected by the fraud detection API:
                
                **Single Prediction Endpoint:** `/v1/predict`  
                **Batch Prediction Endpoint:** `/v1/predict/batch`
                
                ### Supported Fields
                - **TransactionID**: Unique transaction identifier
                - **TransactionDT**: Transaction datetime (seconds)
                - **TransactionAmt**: Transaction amount in USD
                - **ProductCD**: Product code (W, C, S, R, H)
                - **card1-6**: Card-related features
                - **addr1-2**: Address information
                - **P_emaildomain**: Primary email domain
                
                ### Model Performance
                - **Algorithm**: XGBoost (Extreme Gradient Boosting)
                - **AUC Score**: {model_info.get('auc_score', 'N/A')}
                - **Features Used**: {model_info.get('feature_count', 'N/A')} engineered features
                - **Training Method**: Cross-validation with stratified sampling
                - **Speed**: Real-time predictions (<100ms)
                """)
            else:
                gr.Markdown(f"""
                ### Model Status
                **Status:** {'✅ Basic Model Loaded' if model_loaded else '❌ Not Loaded'}
                
                ### About This Model
                This fraud detection system uses advanced machine learning algorithms to assess transaction risk.
                The model processes transactions with the same field structure as the API endpoints.
                
                ### Features
                - Single transaction analysis
                - Batch CSV processing
                - Real-time risk assessment
                - API-compatible field structure
                """)

# Launch the app
if __name__ == "__main__":
    app.launch()