File size: 4,465 Bytes
7c5d1d0
7a63bb7
a719e13
2750f6c
 
a178285
2750f6c
de496ae
7c5d1d0
4339fe4
2750f6c
a178285
 
 
 
 
 
 
7a63bb7
4339fe4
 
 
 
 
 
 
 
 
 
2750f6c
4339fe4
2750f6c
96a2bd1
 
 
 
 
 
abb49f7
 
 
 
 
 
 
96a2bd1
6cde6ed
abb49f7
 
 
 
7a63bb7
2ab8b05
 
 
a719e13
 
 
 
2d5fce6
de496ae
a719e13
 
 
de496ae
 
2d5fce6
a719e13
 
 
7a63bb7
4339fe4
373ea98
 
 
 
 
4339fe4
 
 
 
 
 
 
96a2bd1
 
 
 
 
 
 
4339fe4
a178285
96a2bd1
a178285
 
 
 
 
 
 
 
 
 
96a2bd1
abb49f7
 
96a2bd1
 
a178285
 
 
96a2bd1
 
a178285
96a2bd1
 
2750f6c
a178285
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import gradio as gr
import numpy as np
import pandas as pd
import joblib
import warnings
from huggingface_hub import hf_hub_download

warnings.filterwarnings("ignore")

# Load ensemble model
def load_model():
    model_path = hf_hub_download(
        repo_id="Zeyadd-Mostaffa/final_ensemble_model",
        filename="final_ensemble_model.pkl"
    )
    model = joblib.load(model_path)
    print("βœ… Ensemble model loaded successfully.")
    return model

# Load scaler
def load_scaler():
    scaler_path = hf_hub_download(
        repo_id="Zeyadd-Mostaffa/final_ensemble_model",
        filename="scaler.pkl"
    )
    scaler = joblib.load(scaler_path)
    print("βœ… Scaler loaded successfully.")
    return scaler

model = load_model()
scaler = load_scaler()

# Define prediction function
def predict_employee_status(
    satisfaction_level, last_evaluation, number_project,
    average_monthly_hours, time_spend_company,
    work_accident, promotion_last_5years, salary, department, threshold=0.5
):
    expected_columns = [
        'satisfaction_level', 'last_evaluation', 'number_project', 'average_monthly_hours',
        'time_spend_company', 'Work_accident', 'promotion_last_5years', 'salary',
        'satisfaction_evaluation', 'work_balance',
        'department_IT', 'department_RandD', 'department_accounting', 'department_hr',
        'department_management', 'department_marketing', 'department_product_mng',
        'department_sales', 'department_support', 'department_technical'
    ]

    department_features = {col: 0 for col in expected_columns if col.startswith("department_")}
    dept_key = f"department_{department}"
    if dept_key in department_features:
        department_features[dept_key] = 1

    satisfaction_evaluation = satisfaction_level * last_evaluation
    work_balance = average_monthly_hours / number_project

    input_data = {
        "satisfaction_level": [satisfaction_level],
        "last_evaluation": [last_evaluation],
        "number_project": [number_project],
        "average_monthly_hours": [average_monthly_hours],
        "time_spend_company": [time_spend_company],
        "Work_accident": [work_accident],
        "promotion_last_5years": [promotion_last_5years],
        "salary": [salary],
        "satisfaction_evaluation": [satisfaction_evaluation],
        "work_balance": [work_balance],
        **department_features
    }

    input_df = pd.DataFrame(input_data)

    # Ensure all expected columns exist
    for col in expected_columns:
        if col not in input_df.columns:
            input_df[col] = 0
    input_df = input_df[expected_columns]

    # Apply scaling to same numerical columns as training
    numeric_cols = [
        'satisfaction_level', 'last_evaluation',
        'average_monthly_hours', 'number_project', 'work_balance'
    ]
    input_df[numeric_cols] = scaler.transform(input_df[numeric_cols])

    try:
        prob = model.predict_proba(input_df)[0][1]
        result = "βœ… Employee is likely to quit." if prob >= threshold else "βœ… Employee is likely to stay."
        return f"{result} (Probability: {prob:.2%})"
    except Exception as e:
        return f"❌ Prediction error: {str(e)}"

# Gradio UI
def gradio_interface():
    interface = gr.Interface(
        fn=predict_employee_status,
        inputs=[
            gr.Number(label="Satisfaction Level (0.0 - 1.0)"),
            gr.Number(label="Last Evaluation (0.0 - 1.0)"),
            gr.Number(label="Number of Projects (1 - 10)"),
            gr.Number(label="Average Monthly Hours (80 - 320)"),
            gr.Number(label="Time Spend at Company (Years)"),
            gr.Radio([0, 1], label="Work Accident (0 = No, 1 = Yes)"),
            gr.Radio([0, 1], label="Promotion in Last 5 Years (0 = No, 1 = Yes)"),
            gr.Radio([0, 1, 2], label="Salary (0 = Low, 1 = Medium, 2 = High)"),
            gr.Dropdown(
                ['IT', 'RandD', 'accounting', 'hr', 'management',
                 'marketing', 'product_mng', 'sales', 'support', 'technical'],
                label="Department"
            ),
            gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Prediction Threshold")
        ],
        outputs="text",
        title="Employee Retention Prediction System (Voting Ensemble)",
        description="Predict whether an employee is likely to stay or quit based on their profile. Supports threshold adjustment.",
        theme="dark"
    )
    interface.launch()

gradio_interface()