File size: 7,105 Bytes
2267956
0b7d288
 
 
 
2267956
0b7d288
 
2267956
0b7d288
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import gradio as gr
import pickle
import pandas as pd
import ast
import numpy as np

# Set the option to opt into future behavior
pd.set_option('future.no_silent_downcasting', True)

# List of options for the dropdown
workclass_options = sorted(['State-gov', 'Self-emp-not-inc', 'Private', 'Federal-gov', 'Local-gov', 'Self-emp-inc', 'Without-pay'])
education_option = ['Preschool', '1st-4th', '5th-6th', '7th-8th', '9th', '10th', '11th', '12th', 'HS-grad', 'Some-college', 'Assoc-voc', 'Assoc-acdm', 'Bachelors', 'Masters', 'Prof-school', 'Doctorate']
marital_status_option = sorted(['Never-married', 'Married-civ-spouse', 'Divorced', 'Separated', 'Married-AF-spouse', 'Widowed', 'Married-spouse-absent'])
occupation_option = sorted(['Adm-clerical', 'Exec-managerial', 'Handlers-cleaners','Prof-specialty', 'Sales', 'Farming-fishing', 'Machine-op-inspct','Other-service', 'Transport-moving', 'Tech-support','Craft-repair', 'Protective-serv', 'Armed-Forces','Priv-house-serv'])
relationship_option = sorted(['Not-in-family', 'Husband', 'Wife', 'Own-child', 'Unmarried', 'Other-relative'])
race_option = sorted(['White', 'Black', 'Other', 'Asian-Pac-Islander', 'Amer-Indian-Eskimo'])
sex_option = sorted(['Male', 'Female'])
age = [0, 100]
capital_gain = [0, 99999]
capital_loss = [0, 4356]
hours_per_week = [20, 60]

# Mapping for education
education_mapping = "{'Preschool': 1, '1st-4th': 2, '5th-6th': 3, '7th-8th': 4, '9th': 5, '10th': 6, '11th': 7, '12th': 8, 'HS-grad': 9, 'Some-college': 10, 'Assoc-voc': 11, 'Assoc-acdm': 12, 'Bachelors': 13, 'Masters': 14, 'Prof-school': 15, 'Doctorate': 16}"
education_dict = ast.literal_eval(education_mapping)

# List of the columns present in dataframe used to train the model
columns = ['age', 'education-num', 'sex', 'capital-gain', 'capital-loss',
        'hours-per-week', 'workclass_Local-gov', 'workclass_Private',
        'workclass_Self-emp-inc', 'workclass_Self-emp-not-inc',
        'workclass_State-gov', 'workclass_Without-pay',
        'marital-status_Married-AF-spouse', 'marital-status_Married-civ-spouse',
        'marital-status_Married-spouse-absent', 'marital-status_Never-married',
        'marital-status_Separated', 'marital-status_Widowed',
        'occupation_Armed-Forces', 'occupation_Craft-repair',
        'occupation_Exec-managerial', 'occupation_Farming-fishing',
        'occupation_Handlers-cleaners', 'occupation_Machine-op-inspct',
        'occupation_Other-service', 'occupation_Priv-house-serv',
        'occupation_Prof-specialty', 'occupation_Protective-serv',
        'occupation_Sales', 'occupation_Tech-support',
        'occupation_Transport-moving', 'relationship_Not-in-family',
        'relationship_Other-relative', 'relationship_Own-child',
        'relationship_Unmarried', 'relationship_Wife',
        'race_Asian-Pac-Islander', 'race_Black', 'race_Other', 'race_White']

# Code for SVM
def SVM(workclass, education, marital_status, occupation, relationship, race, sex, age, capital_gain, capital_loss, hours_per_week):
    with open('models/best_svm_OvM_Salary_Classification.pkl', 'rb') as f:
        loaded_model = pickle.load(f)

    # Loading the scaler and transform the data
    with open('models/z-score_scaler_svm_Salary_Classification.pkl', 'rb') as f:
        scaler = pickle.load(f)

    new_data = {
        'age': age,
        'workclass': workclass,
        'education': education,
        'marital-status': marital_status,
        'occupation': occupation,
        'relationship': relationship,
        'race': race,
        'sex': sex,
        'capital-gain': capital_gain,
        'capital-loss': capital_loss,
        'hours-per-week': hours_per_week,
    }
    new_data = pd.DataFrame([new_data])
    new_data['education'] = new_data['education'].map(education_dict)
    new_data = new_data.rename(columns={'education': 'education-num'})

    # Create an empty DataFrame with these columns
    formattedDF = pd.DataFrame(columns=columns)

    # Copying over the continuous columns
    formattedDF['age'] = new_data['age']
    formattedDF['education-num'] = new_data['education-num']
    formattedDF['capital-gain'] = new_data['capital-gain']
    formattedDF['capital-loss'] = new_data['capital-loss']
    formattedDF['hours-per-week'] = new_data['hours-per-week']
    formattedDF['workclass_'+new_data['workclass']] = 1 
    formattedDF['marital-status_'+new_data['marital-status']] = 1
    formattedDF['occupation_'+new_data['occupation']] = 1
    formattedDF['relationship_'+new_data['relationship']] = 1
    formattedDF['race_'+new_data['race']] = 1
    formattedDF['sex'] = formattedDF['sex'].apply(lambda x: 1 if x == 'Male' else 0)

    # Fill remaining columns with 0
    formattedDF.fillna(0, inplace=True)
    formattedDF = formattedDF.astype(int)
    formattedDF = formattedDF[formattedDF.columns.intersection(columns)]

    # Assuming 'high_skew_columns' from training is a list of columns with high skewness
    for column in  ['capital-gain', 'capital-loss']:
        formattedDF[column] = np.log1p(formattedDF[column])

    # Apply the scaler to the unseen data
    continuous_columns = ['age', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week']
    formattedDF[continuous_columns] = scaler.transform(formattedDF[continuous_columns])

    # Make predictions with the loaded model
    prediction = loaded_model.predict(formattedDF)

    salary_result = '<=50K' if prediction[0] == 0 else '>50K'

    return "Predicted Salary Class:", salary_result

# Code for LogisticRegression
def LogisticRegression(input_image):
    # Task 2 logic
    return "Task 2 Result"

# Code for 
def RandomForests(input_image):
    # Task 2 logic
    return "Task 2 Result"

# interface one
iface1 = gr.Interface(
    fn=SVM,
    inputs=[
        gr.Dropdown(choices=workclass_options, label="Workclass"),
        gr.Dropdown(choices=education_option, label="Education"),
        gr.Dropdown(choices=marital_status_option, label="Marital Status"),
        gr.Dropdown(choices=occupation_option, label="Occupation"),
        gr.Dropdown(choices=relationship_option, label="Relationship"),
        gr.Dropdown(choices=race_option, label="Race"),
        gr.Dropdown(choices=sex_option, label="Sex"),
        gr.Slider(minimum=age[0], maximum=age[1], step=1, label="Age"),
        gr.Slider(minimum=capital_gain[0], maximum=capital_gain[1], step=1, label="Capital Gain"),
        gr.Slider(minimum=capital_loss[0], maximum=capital_loss[1], step=1, label="Capital Loss"),
        gr.Slider(minimum=hours_per_week[0], maximum=hours_per_week[1], step=1, label="Hours per Week"),
    ],
    outputs="text",
    title="SVM"
)

# interface two
iface2 = gr.Interface(
    fn=LogisticRegression,
    inputs="image",
    outputs="text",
    title="Logistic Regression"
)

# interface two
iface3 = gr.Interface(
    fn=RandomForests,
    inputs="image",
    outputs="text",
    title="Random Forests"
)

demo = gr.TabbedInterface([iface1, iface2, iface3], ["SVM - Jerome Agius", "Logistic Regression - Isaac Muscat", "Random Forests - Kyle Demicoli"])

# Run the interface
demo.launch(share=True)