NTI_ML_Project / app.py
Zeyadd-Mostaffa's picture
Update app.py
4339fe4 verified
import gradio as gr
import numpy as np
import pandas as pd
import joblib
import warnings
from huggingface_hub import hf_hub_download
warnings.filterwarnings("ignore")
# Load ensemble model
def load_model():
model_path = hf_hub_download(
repo_id="Zeyadd-Mostaffa/final_ensemble_model",
filename="final_ensemble_model.pkl"
)
model = joblib.load(model_path)
print("βœ… Ensemble model loaded successfully.")
return model
# Load scaler
def load_scaler():
scaler_path = hf_hub_download(
repo_id="Zeyadd-Mostaffa/final_ensemble_model",
filename="scaler.pkl"
)
scaler = joblib.load(scaler_path)
print("βœ… Scaler loaded successfully.")
return scaler
model = load_model()
scaler = load_scaler()
# Define prediction function
def predict_employee_status(
satisfaction_level, last_evaluation, number_project,
average_monthly_hours, time_spend_company,
work_accident, promotion_last_5years, salary, department, threshold=0.5
):
expected_columns = [
'satisfaction_level', 'last_evaluation', 'number_project', 'average_monthly_hours',
'time_spend_company', 'Work_accident', 'promotion_last_5years', 'salary',
'satisfaction_evaluation', 'work_balance',
'department_IT', 'department_RandD', 'department_accounting', 'department_hr',
'department_management', 'department_marketing', 'department_product_mng',
'department_sales', 'department_support', 'department_technical'
]
department_features = {col: 0 for col in expected_columns if col.startswith("department_")}
dept_key = f"department_{department}"
if dept_key in department_features:
department_features[dept_key] = 1
satisfaction_evaluation = satisfaction_level * last_evaluation
work_balance = average_monthly_hours / number_project
input_data = {
"satisfaction_level": [satisfaction_level],
"last_evaluation": [last_evaluation],
"number_project": [number_project],
"average_monthly_hours": [average_monthly_hours],
"time_spend_company": [time_spend_company],
"Work_accident": [work_accident],
"promotion_last_5years": [promotion_last_5years],
"salary": [salary],
"satisfaction_evaluation": [satisfaction_evaluation],
"work_balance": [work_balance],
**department_features
}
input_df = pd.DataFrame(input_data)
# Ensure all expected columns exist
for col in expected_columns:
if col not in input_df.columns:
input_df[col] = 0
input_df = input_df[expected_columns]
# Apply scaling to same numerical columns as training
numeric_cols = [
'satisfaction_level', 'last_evaluation',
'average_monthly_hours', 'number_project', 'work_balance'
]
input_df[numeric_cols] = scaler.transform(input_df[numeric_cols])
try:
prob = model.predict_proba(input_df)[0][1]
result = "βœ… Employee is likely to quit." if prob >= threshold else "βœ… Employee is likely to stay."
return f"{result} (Probability: {prob:.2%})"
except Exception as e:
return f"❌ Prediction error: {str(e)}"
# Gradio UI
def gradio_interface():
interface = gr.Interface(
fn=predict_employee_status,
inputs=[
gr.Number(label="Satisfaction Level (0.0 - 1.0)"),
gr.Number(label="Last Evaluation (0.0 - 1.0)"),
gr.Number(label="Number of Projects (1 - 10)"),
gr.Number(label="Average Monthly Hours (80 - 320)"),
gr.Number(label="Time Spend at Company (Years)"),
gr.Radio([0, 1], label="Work Accident (0 = No, 1 = Yes)"),
gr.Radio([0, 1], label="Promotion in Last 5 Years (0 = No, 1 = Yes)"),
gr.Radio([0, 1, 2], label="Salary (0 = Low, 1 = Medium, 2 = High)"),
gr.Dropdown(
['IT', 'RandD', 'accounting', 'hr', 'management',
'marketing', 'product_mng', 'sales', 'support', 'technical'],
label="Department"
),
gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Prediction Threshold")
],
outputs="text",
title="Employee Retention Prediction System (Voting Ensemble)",
description="Predict whether an employee is likely to stay or quit based on their profile. Supports threshold adjustment.",
theme="dark"
)
interface.launch()
gradio_interface()