Spaces:

Gangsterbra123
/

ICS5110

Sleeping

File size: 7,105 Bytes

import gradio as gr
import pickle
import pandas as pd
import ast
import numpy as np

# Set the option to opt into future behavior
pd.set_option('future.no_silent_downcasting', True)

# List of options for the dropdown
workclass_options = sorted(['State-gov', 'Self-emp-not-inc', 'Private', 'Federal-gov', 'Local-gov', 'Self-emp-inc', 'Without-pay'])
education_option = ['Preschool', '1st-4th', '5th-6th', '7th-8th', '9th', '10th', '11th', '12th', 'HS-grad', 'Some-college', 'Assoc-voc', 'Assoc-acdm', 'Bachelors', 'Masters', 'Prof-school', 'Doctorate']
marital_status_option = sorted(['Never-married', 'Married-civ-spouse', 'Divorced', 'Separated', 'Married-AF-spouse', 'Widowed', 'Married-spouse-absent'])
occupation_option = sorted(['Adm-clerical', 'Exec-managerial', 'Handlers-cleaners','Prof-specialty', 'Sales', 'Farming-fishing', 'Machine-op-inspct','Other-service', 'Transport-moving', 'Tech-support','Craft-repair', 'Protective-serv', 'Armed-Forces','Priv-house-serv'])
relationship_option = sorted(['Not-in-family', 'Husband', 'Wife', 'Own-child', 'Unmarried', 'Other-relative'])
race_option = sorted(['White', 'Black', 'Other', 'Asian-Pac-Islander', 'Amer-Indian-Eskimo'])
sex_option = sorted(['Male', 'Female'])
age = [0, 100]
capital_gain = [0, 99999]
capital_loss = [0, 4356]
hours_per_week = [20, 60]

# Mapping for education
education_mapping = "{'Preschool': 1, '1st-4th': 2, '5th-6th': 3, '7th-8th': 4, '9th': 5, '10th': 6, '11th': 7, '12th': 8, 'HS-grad': 9, 'Some-college': 10, 'Assoc-voc': 11, 'Assoc-acdm': 12, 'Bachelors': 13, 'Masters': 14, 'Prof-school': 15, 'Doctorate': 16}"
education_dict = ast.literal_eval(education_mapping)

# List of the columns present in dataframe used to train the model
columns = ['age', 'education-num', 'sex', 'capital-gain', 'capital-loss',
        'hours-per-week', 'workclass_Local-gov', 'workclass_Private',
        'workclass_Self-emp-inc', 'workclass_Self-emp-not-inc',
        'workclass_State-gov', 'workclass_Without-pay',
        'marital-status_Married-AF-spouse', 'marital-status_Married-civ-spouse',
        'marital-status_Married-spouse-absent', 'marital-status_Never-married',
        'marital-status_Separated', 'marital-status_Widowed',
        'occupation_Armed-Forces', 'occupation_Craft-repair',
        'occupation_Exec-managerial', 'occupation_Farming-fishing',
        'occupation_Handlers-cleaners', 'occupation_Machine-op-inspct',
        'occupation_Other-service', 'occupation_Priv-house-serv',
        'occupation_Prof-specialty', 'occupation_Protective-serv',
        'occupation_Sales', 'occupation_Tech-support',
        'occupation_Transport-moving', 'relationship_Not-in-family',
        'relationship_Other-relative', 'relationship_Own-child',
        'relationship_Unmarried', 'relationship_Wife',
        'race_Asian-Pac-Islander', 'race_Black', 'race_Other', 'race_White']

# Code for SVM
def SVM(workclass, education, marital_status, occupation, relationship, race, sex, age, capital_gain, capital_loss, hours_per_week):
    with open('models/best_svm_OvM_Salary_Classification.pkl', 'rb') as f:
        loaded_model = pickle.load(f)

    # Loading the scaler and transform the data
    with open('models/z-score_scaler_svm_Salary_Classification.pkl', 'rb') as f:
        scaler = pickle.load(f)

    new_data = {
        'age': age,
        'workclass': workclass,
        'education': education,
        'marital-status': marital_status,
        'occupation': occupation,
        'relationship': relationship,
        'race': race,
        'sex': sex,
        'capital-gain': capital_gain,
        'capital-loss': capital_loss,
        'hours-per-week': hours_per_week,
    }
    new_data = pd.DataFrame([new_data])
    new_data['education'] = new_data['education'].map(education_dict)
    new_data = new_data.rename(columns={'education': 'education-num'})

    # Create an empty DataFrame with these columns
    formattedDF = pd.DataFrame(columns=columns)

    # Copying over the continuous columns
    formattedDF['age'] = new_data['age']
    formattedDF['education-num'] = new_data['education-num']
    formattedDF['capital-gain'] = new_data['capital-gain']
    formattedDF['capital-loss'] = new_data['capital-loss']
    formattedDF['hours-per-week'] = new_data['hours-per-week']
    formattedDF['workclass_'+new_data['workclass']] = 1 
    formattedDF['marital-status_'+new_data['marital-status']] = 1
    formattedDF['occupation_'+new_data['occupation']] = 1
    formattedDF['relationship_'+new_data['relationship']] = 1
    formattedDF['race_'+new_data['race']] = 1
    formattedDF['sex'] = formattedDF['sex'].apply(lambda x: 1 if x == 'Male' else 0)

    # Fill remaining columns with 0
    formattedDF.fillna(0, inplace=True)
    formattedDF = formattedDF.astype(int)
    formattedDF = formattedDF[formattedDF.columns.intersection(columns)]

    # Assuming 'high_skew_columns' from training is a list of columns with high skewness
    for column in  ['capital-gain', 'capital-loss']:
        formattedDF[column] = np.log1p(formattedDF[column])

    # Apply the scaler to the unseen data
    continuous_columns = ['age', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week']
    formattedDF[continuous_columns] = scaler.transform(formattedDF[continuous_columns])

    # Make predictions with the loaded model
    prediction = loaded_model.predict(formattedDF)

    salary_result = '<=50K' if prediction[0] == 0 else '>50K'

    return "Predicted Salary Class:", salary_result

# Code for LogisticRegression
def LogisticRegression(input_image):
    # Task 2 logic
    return "Task 2 Result"

# Code for 
def RandomForests(input_image):
    # Task 2 logic
    return "Task 2 Result"

# interface one
iface1 = gr.Interface(
    fn=SVM,
    inputs=[
        gr.Dropdown(choices=workclass_options, label="Workclass"),
        gr.Dropdown(choices=education_option, label="Education"),
        gr.Dropdown(choices=marital_status_option, label="Marital Status"),
        gr.Dropdown(choices=occupation_option, label="Occupation"),
        gr.Dropdown(choices=relationship_option, label="Relationship"),
        gr.Dropdown(choices=race_option, label="Race"),
        gr.Dropdown(choices=sex_option, label="Sex"),
        gr.Slider(minimum=age[0], maximum=age[1], step=1, label="Age"),
        gr.Slider(minimum=capital_gain[0], maximum=capital_gain[1], step=1, label="Capital Gain"),
        gr.Slider(minimum=capital_loss[0], maximum=capital_loss[1], step=1, label="Capital Loss"),
        gr.Slider(minimum=hours_per_week[0], maximum=hours_per_week[1], step=1, label="Hours per Week"),
    ],
    outputs="text",
    title="SVM"
)

# interface two
iface2 = gr.Interface(
    fn=LogisticRegression,
    inputs="image",
    outputs="text",
    title="Logistic Regression"
)

# interface two
iface3 = gr.Interface(
    fn=RandomForests,
    inputs="image",
    outputs="text",
    title="Random Forests"
)

demo = gr.TabbedInterface([iface1, iface2, iface3], ["SVM - Jerome Agius", "Logistic Regression - Isaac Muscat", "Random Forests - Kyle Demicoli"])

# Run the interface
demo.launch(share=True)