Spaces:

Nikhillmahesh701
/

Loan_Recovery

Sleeping

App Files Files Community

Nikhillmahesh701 commited on Apr 17

Commit

a47b6e9

verified ·

1 Parent(s): 9f70e0c

Create app.py

Browse files

Files changed (1) hide show

app.py +388 -0

app.py ADDED Viewed

	@@ -0,0 +1,388 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import matplotlib
+matplotlib.use('Agg')  # Use non-interactive backend
+import matplotlib.pyplot as plt
+import seaborn as sns
+import os
+import joblib
+from src.models.loan_recovery_model import LoanRecoveryModel
+from src.utils.data_generator import generate_loan_data
+from src.preprocessing.data_processor import LoanDataProcessor
+# Set page configuration
+st.set_page_config(
+    page_title="Smart Loan Recovery System",
+    page_icon="💰",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Define functions
+@st.cache_data
+def load_sample_data():
+    """Load or generate sample data."""
+    data_path = "data/loan_data.csv"
+    if os.path.exists(data_path):
+        return pd.read_csv(data_path)
+    else:
+        data = generate_loan_data(n_samples=1000)
+        os.makedirs("data", exist_ok=True)
+        data.to_csv(data_path, index=False)
+        return data
+@st.cache_resource
+def load_model(model_type="random_forest"):
+    """Load the trained model."""
+    model_path = f"models/loan_recovery_{model_type}.pkl"
+    # Check if model exists, if not train it
+    if not os.path.exists(model_path):
+        st.info(f"Model not found. Training a new {model_type} model...")
+        from src.train_model import train_and_save_model
+        train_and_save_model(model_type=model_type)
+    return LoanRecoveryModel.load_model(model_path)
+def predict_recovery(model, data):
+    """Make predictions using the model."""
+    recovery_probs = model.predict(data)
+    return recovery_probs
+def plot_recovery_distribution(data):
+    """Plot the distribution of recovery status."""
+    fig, ax = plt.subplots(figsize=(10, 6))
+    recovery_counts = data['recovery_status'].value_counts()
+    labels = ['Not Recovered', 'Recovered']
+    ax.bar(labels, recovery_counts.values)
+    ax.set_ylabel('Count')
+    ax.set_title('Distribution of Loan Recovery Status')
+    for i, v in enumerate(recovery_counts.values):
+        ax.text(i, v + 5, str(v), ha='center')
+    # Add percentage labels
+    total = len(data)
+    for i, v in enumerate(recovery_counts.values):
+        percentage = v / total * 100
+        ax.text(i, v/2, f"{percentage:.1f}%", ha='center', color='white', fontweight='bold')
+    return fig
+def plot_feature_importance(model):
+    """Plot feature importance."""
+    return model.plot_feature_importance(top_n=10)
+def plot_recovery_by_feature(data, feature, is_categorical=False):
+    """Plot recovery rate by a specific feature."""
+    fig, ax = plt.subplots(figsize=(10, 6))
+    if is_categorical:
+        # For categorical features
+        recovery_by_feature = data.groupby(feature)['recovery_status'].mean().sort_values()
+        counts = data.groupby(feature).size()
+        # Create a bar plot
+        bars = ax.bar(recovery_by_feature.index, recovery_by_feature.values * 100)
+        ax.set_ylabel('Recovery Rate (%)')
+        ax.set_title(f'Recovery Rate by {feature.replace("_", " ").title()}')
+        ax.set_ylim(0, 100)
+        # Add count labels
+        for i, (idx, count) in enumerate(counts.items()):
+            ax.text(i, 5, f"n={count}", ha='center', color='white', fontweight='bold')
+        # Rotate x-axis labels if needed
+        if len(recovery_by_feature) > 5:
+            plt.xticks(rotation=45, ha='right')
+    else:
+        # For numerical features, create bins
+        if feature in ['age', 'loan_term', 'previous_defaults', 'days_past_due']:
+            # These features have a small range, so we can use them directly
+            data['feature_bin'] = data[feature]
+        else:
+            # Create bins for continuous features
+            data['feature_bin'] = pd.qcut(data[feature], 5, duplicates='drop')
+        # Calculate recovery rate by bin
+        recovery_by_bin = data.groupby('feature_bin')['recovery_status'].mean().sort_index()
+        counts = data.groupby('feature_bin').size()
+        # Create a bar plot
+        bars = ax.bar(range(len(recovery_by_bin)), recovery_by_bin.values * 100)
+        ax.set_ylabel('Recovery Rate (%)')
+        ax.set_title(f'Recovery Rate by {feature.replace("_", " ").title()}')
+        ax.set_ylim(0, 100)
+        # Set x-axis labels
+        if feature in ['age', 'loan_term', 'previous_defaults', 'days_past_due']:
+            ax.set_xticks(range(len(recovery_by_bin)))
+            ax.set_xticklabels(recovery_by_bin.index)
+        else:
+            # Format bin labels
+            bin_labels = []
+            for bin_range in recovery_by_bin.index:
+                if hasattr(bin_range, 'left') and hasattr(bin_range, 'right'):
+                    bin_labels.append(f"{bin_range.left:.1f}-{bin_range.right:.1f}")
+                else:
+                    bin_labels.append(str(bin_range))
+            ax.set_xticks(range(len(recovery_by_bin)))
+            ax.set_xticklabels(bin_labels)
+            plt.xticks(rotation=45, ha='right')
+        # Add count labels
+        for i, count in enumerate(counts.values):
+            ax.text(i, 5, f"n={count}", ha='center', color='white', fontweight='bold')
+        # Add feature name to x-axis
+        ax.set_xlabel(feature.replace("_", " ").title())
+    plt.tight_layout()
+    return fig
+# Main application
+def main():
+    # Header
+    st.title("Smart Loan Recovery System")
+    st.image("https://img.icons8.com/color/96/000000/loan.png", width=100)
+    # Load data and model
+    data = load_sample_data()
+    # Load Random Forest model only
+    model = load_model("random_forest")
+    # Prediction page
+    st.title("Predict Loan Recovery")
+    st.write("""
+    Use this tool to predict the probability of recovering a loan based on customer and loan information.
+    You can either:
+    1. Enter information for a single loan
+    2. Upload a CSV file with multiple loans
+    """)
+    prediction_type = st.radio("Prediction Type", ["Single Loan", "Batch Prediction"])
+    if prediction_type == "Single Loan":
+        st.subheader("Enter Loan Information")
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            age = st.number_input("Age", min_value=18, max_value=100, value=35)
+            gender = st.selectbox("Gender", ["Male", "Female"])
+            employment_status = st.selectbox(
+                "Employment Status",
+                ["Employed", "Self-employed", "Unemployed", "Retired"]
+            )
+            annual_income = st.number_input("Annual Income ($)", min_value=0, value=60000)
+        with col2:
+            credit_score = st.slider("Credit Score", 300, 850, 650)
+            loan_amount = st.number_input("Loan Amount ($)", min_value=1000, value=20000)
+            interest_rate = st.slider("Interest Rate (%)", 1.0, 25.0, 8.0, 0.1)
+            loan_term = st.selectbox("Loan Term (months)", [12, 24, 36, 48, 60])
+        with col3:
+            payment_history = st.selectbox(
+                "Payment History",
+                ["Excellent", "Good", "Fair", "Poor", "Very Poor"]
+            )
+            days_past_due = st.number_input("Days Past Due", min_value=0, value=0)
+            previous_defaults = st.number_input("Previous Defaults", min_value=0, max_value=10, value=0)
+        # Calculate derived features
+        monthly_payment = (loan_amount * (interest_rate/100/12) *
+                          (1 + interest_rate/100/12)**(loan_term)) / \
+                          ((1 + interest_rate/100/12)**(loan_term) - 1)
+        debt_to_income = (monthly_payment * 12) / max(1, annual_income)
+        # Display calculated values
+        st.subheader("Calculated Values")
+        col1, col2 = st.columns(2)
+        with col1:
+            st.metric("Monthly Payment", f"${monthly_payment:.2f}")
+        with col2:
+            st.metric("Debt-to-Income Ratio", f"{debt_to_income*100:.2f}%")
+        # Create input dataframe
+        input_data = pd.DataFrame({
+            'age': [age],
+            'gender': [gender],
+            'employment_status': [employment_status],
+            'annual_income': [annual_income],
+            'credit_score': [credit_score],
+            'loan_amount': [loan_amount],
+            'interest_rate': [interest_rate],
+            'loan_term': [loan_term],
+            'payment_history': [payment_history],
+            'days_past_due': [days_past_due],
+            'previous_defaults': [previous_defaults],
+            'monthly_payment': [monthly_payment],
+            'debt_to_income': [debt_to_income]
+        })
+        # Make prediction
+        if st.button("Predict Recovery Probability"):
+            with st.spinner("Calculating recovery probability..."):
+                recovery_prob = predict_recovery(model, input_data)[0]
+                # Display result
+                st.subheader("Prediction Result")
+                # Create gauge chart for probability
+                fig, ax = plt.subplots(figsize=(10, 2))
+                ax.barh([0], [100], color='lightgray', height=0.5)
+                ax.barh([0], [recovery_prob * 100], color='green' if recovery_prob >= 0.5 else 'red', height=0.5)
+                ax.set_xlim(0, 100)
+                ax.set_yticks([])
+                ax.set_xticks([0, 25, 50, 75, 100])
+                ax.set_xticklabels(['0%', '25%', '50%', '75%', '100%'])
+                ax.axvline(50, color='gray', linestyle='--', alpha=0.5)
+                ax.text(recovery_prob * 100, 0, f"{recovery_prob*100:.1f}%",
+                        ha='center', va='center', fontweight='bold', color='black')
+                st.pyplot(fig)
+                # Recommendation
+                st.subheader("Recovery Assessment")
+                if recovery_prob >= 0.8:
+                    st.success("High probability of recovery. Standard collection procedures recommended.")
+                elif recovery_prob >= 0.5:
+                    st.info("Moderate probability of recovery. Consider offering a payment plan.")
+                elif recovery_prob >= 0.3:
+                    st.warning("Low probability of recovery. Consider debt restructuring or settlement offers.")
+                else:
+                    st.error("Very low probability of recovery. Consider debt write-off or third-party collection.")
+                # Risk factors
+                st.subheader("Key Risk Factors")
+                risk_factors = []
+                if credit_score < 600:
+                    risk_factors.append("Low credit score")
+                if days_past_due > 30:
+                    risk_factors.append("Significant payment delay")
+                if previous_defaults > 0:
+                    risk_factors.append("History of defaults")
+                if debt_to_income > 0.4:
+                    risk_factors.append("High debt-to-income ratio")
+                if payment_history in ["Poor", "Very Poor"]:
+                    risk_factors.append("Poor payment history")
+                if risk_factors:
+                    for factor in risk_factors:
+                        st.write(f"• {factor}")
+                else:
+                    st.write("No significant risk factors identified.")
+    else:  # Batch prediction
+        st.subheader("Upload CSV File")
+        st.write("""
+        Upload a CSV file with loan information. The file should contain the following columns:
+        age, gender, employment_status, annual_income, credit_score, loan_amount, interest_rate,
+        loan_term, payment_history, days_past_due, previous_defaults
+        """)
+        # Sample file download
+        sample_data = data.sample(5).drop(['customer_id', 'recovery_status'], axis=1, errors='ignore')
+        @st.cache_data
+        def convert_df_to_csv(df):
+            return df.to_csv(index=False).encode('utf-8')
+        csv = convert_df_to_csv(sample_data)
+        st.download_button(
+            "Download Sample CSV",
+            csv,
+            "sample_loans.csv",
+            "text/csv",
+            key='download-csv'
+        )
+        # File upload
+        uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
+        if uploaded_file is not None:
+            # Load and display the data
+            batch_data = pd.read_csv(uploaded_file)
+            st.write("Preview of uploaded data:")
+            st.dataframe(batch_data.head())
+            # Check for required columns
+            required_cols = ['age', 'gender', 'employment_status', 'annual_income',
+                            'credit_score', 'loan_amount', 'interest_rate',
+                            'loan_term', 'payment_history', 'days_past_due',
+                            'previous_defaults']
+            missing_cols = [col for col in required_cols if col not in batch_data.columns]
+            if missing_cols:
+                st.error(f"Missing required columns: {', '.join(missing_cols)}")
+            else:
+                # Calculate derived features if not present
+                if 'monthly_payment' not in batch_data.columns:
+                    batch_data['monthly_payment'] = (
+                        batch_data['loan_amount'] * (batch_data['interest_rate']/100/12) *
+                        (1 + batch_data['interest_rate']/100/12)**(batch_data['loan_term'])
+                    ) / (
+                        (1 + batch_data['interest_rate']/100/12)**(batch_data['loan_term']) - 1
+                    )
+                if 'debt_to_income' not in batch_data.columns:
+                    batch_data['debt_to_income'] = (batch_data['monthly_payment'] * 12) / batch_data['annual_income'].replace(0, 1)
+                # Make predictions
+                if st.button("Run Batch Prediction"):
+                    with st.spinner("Processing batch predictions..."):
+                        # Make predictions
+                        recovery_probs = predict_recovery(model, batch_data)
+                        # Add predictions to the dataframe
+                        batch_data['recovery_probability'] = recovery_probs
+                        batch_data['recovery_prediction'] = (recovery_probs >= 0.5).astype(int)
+                        # Display results
+                        st.subheader("Prediction Results")
+                        st.dataframe(batch_data)
+                        # Summary statistics
+                        st.subheader("Summary")
+                        avg_prob = batch_data['recovery_probability'].mean() * 100
+                        predicted_recoveries = batch_data['recovery_prediction'].sum()
+                        recovery_rate = predicted_recoveries / len(batch_data) * 100
+                        col1, col2 = st.columns(2)
+                        with col1:
+                            st.metric("Average Recovery Probability", f"{avg_prob:.2f}%")
+                        with col2:
+                            st.metric("Predicted Recovery Rate", f"{recovery_rate:.2f}% ({predicted_recoveries}/{len(batch_data)})")
+                        # Distribution of probabilities
+                        st.subheader("Distribution of Recovery Probabilities")
+                        fig, ax = plt.subplots(figsize=(10, 6))
+                        sns.histplot(batch_data['recovery_probability'], bins=20, kde=True, ax=ax)
+                        ax.set_xlabel("Recovery Probability")
+                        ax.set_ylabel("Count")
+                        ax.axvline(0.5, color='red', linestyle='--')
+                        ax.text(0.5, ax.get_ylim()[1]*0.9, "Decision Threshold",
+                                rotation=90, va='top', ha='right', color='red')
+                        st.pyplot(fig)
+                        # Download results
+                        csv = convert_df_to_csv(batch_data)
+                        st.download_button(
+                            "Download Results CSV",
+                            csv,
+                            "loan_recovery_predictions.csv",
+                            "text/csv",
+                            key='download-results'
+                        )
+if __name__ == "__main__":
+    main()