Spaces:

Gangsterbra123
/

ICS5110

Sleeping

App Files Files Community

Gangsterbra123 commited on Dec 7, 2024

Commit

11f76af

verified ·

1 Parent(s): e477fa5

Upload app.py

Browse files

Files changed (1) hide show

app.py +64 -81

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import pickle
 import pandas as pd
 import ast
 import numpy as np
 # Set the option to opt into future behavior
 pd.set_option('future.no_silent_downcasting', True)
@@ -30,7 +31,7 @@ education_mapping = "{'Preschool': 1, '1st-4th': 2, '5th-6th': 3, '7th-8th': 4,
 education_dict = ast.literal_eval(education_mapping)
 # List of the columns present in dataframe used to train the model
-columns = ['age', 'education-num', 'sex', 'capital-gain', 'capital-loss',
         'hours-per-week', 'workclass_Local-gov', 'workclass_Private',
         'workclass_Self-emp-inc', 'workclass_Self-emp-not-inc',
         'workclass_State-gov', 'workclass_Without-pay',
@@ -45,17 +46,36 @@ columns = ['age', 'education-num', 'sex', 'capital-gain', 'capital-loss',
         'occupation_Sales', 'occupation_Tech-support',
         'occupation_Transport-moving', 'relationship_Not-in-family',
         'relationship_Other-relative', 'relationship_Own-child',
-        'relationship_Unmarried', 'relationship_Wife',
-        'race_Asian-Pac-Islander', 'race_Black', 'race_Other', 'race_White']
-# Code for SVM
-def SVM_Salary(workclass, education, marital_status, occupation, relationship, race, sex, age, capital_gain, capital_loss, hours_per_week):
-    with open('../SVM/models/best_svm_OvM_Salary_Classification.pkl', 'rb') as f:
-        loaded_model = pickle.load(f)
-    # Loading the scaler and transform the data
-    with open('../SVM/models/z-score_scaler_svm_salary_classification.pkl', 'rb') as f:
-        scaler = pickle.load(f)
     new_data = {
         'age': age,
@@ -75,7 +95,7 @@ def SVM_Salary(workclass, education, marital_status, occupation, relationship, r
     new_data = new_data.rename(columns={'education': 'education-num'})
     # Create an empty DataFrame with these columns
-    formattedDF = pd.DataFrame(columns=columns)
     # Copying over the continuous columns
     formattedDF['age'] = new_data['age']
@@ -93,7 +113,7 @@ def SVM_Salary(workclass, education, marital_status, occupation, relationship, r
     # Fill remaining columns with 0
     formattedDF.fillna(0, inplace=True)
     formattedDF = formattedDF.astype(int)
-    formattedDF = formattedDF[formattedDF.columns.intersection(columns)]
     # Assuming 'high_skew_columns' from training is a list of columns with high skewness
     for column in  ['capital-gain', 'capital-loss']:
@@ -108,15 +128,32 @@ def SVM_Salary(workclass, education, marital_status, occupation, relationship, r
     salary_result = '<=50K' if prediction[0] == 0 else '>50K'
-    return "Predicted Salary Class:", salary_result
-def SVM_Health(age, sex, bmi, children, smoker, region):
-    with open('models/best_health_svm_OvM_Charges_Classification.pkl', 'rb') as f:
-        loaded_model = pickle.load(f)
-    # Loading the scaler and transform the data
-    with open('models/z-score_scaler_svm_charges_classification.pkl', 'rb') as f:
-        scaler = pickle.load(f)
     #Inverting the dict to map the 'charges' values back to 'charges' labels
     inverse_mapping_charges = {
@@ -139,7 +176,7 @@ def SVM_Health(age, sex, bmi, children, smoker, region):
     new_data = pd.DataFrame([new_data])
     # Create an empty DataFrame with these columns
-    formattedDF = pd.DataFrame(columns=columns)
     # Copying over the continuous columns
     formattedDF['age'] = new_data['age']
@@ -147,14 +184,12 @@ def SVM_Health(age, sex, bmi, children, smoker, region):
     formattedDF['bmi'] = new_data['bmi']
     formattedDF['children'] = new_data['children']
     formattedDF['smoker'] = new_data['smoker'].apply(lambda x: 1 if x == 'Yes' else 0)
-    formattedDF['marital-status_'+new_data['marital-status']] = 1
     formattedDF['region_'+new_data['region']] = 1
     # Fill remaining columns with 0
     formattedDF.fillna(0, inplace=True)
     formattedDF = formattedDF.astype(int)
-    formattedDF = formattedDF[formattedDF.columns.intersection(columns)]
     # Apply the scaler to the unseen data
     continuous_columns = ['age', 'bmi']
@@ -164,32 +199,13 @@ def SVM_Health(age, sex, bmi, children, smoker, region):
     prediction = loaded_model.predict(formattedDF)[0]
     prediction = inverse_mapping_charges[prediction]
-    return "Predicted Charges Class:", prediction
-# Code for LogisticRegression
-def LogisticRegression_Salary(input_image):
-    # Task 2 logic
-    return "Task 2 Result"
-# Code for LogisticRegression
-def LogisticRegression_Health(input_image):
-    # Task 2 logic
-    return "Task 2 Result"
-# Code for
-def RandomForests_Salary(input_image):
-    # Task 2 logic
-    return "Task 2 Result"
-# Code for
-def RandomForests_Health(input_image):
-    # Task 2 logic
-    return "Task 2 Result"
 # interface one
 iface1 = gr.Interface(
-    fn=SVM_Salary,
     inputs=[
         gr.Dropdown(choices=workclass_options, label="Workclass"),
         gr.Dropdown(choices=education_option, label="Education"),
         gr.Dropdown(choices=marital_status_option, label="Marital Status"),
@@ -208,8 +224,9 @@ iface1 = gr.Interface(
 # interface two
 iface2 = gr.Interface(
-    fn=SVM_Health,
     inputs=[
         gr.Slider(minimum=age[0], maximum=age[1], step=1, label="Age"),
         gr.Dropdown(choices=sex_option, label="Sex"),
         gr.Slider(minimum=bmi[0], maximum=bmi[1], step=0.1, label="BMI"),
@@ -221,41 +238,7 @@ iface2 = gr.Interface(
     title="SVM - Health"
 )
-# interface three
-iface3 = gr.Interface(
-    fn=LogisticRegression_Salary,
-    inputs="image",
-    outputs="text",
-    title="Logistic Regression"
-)
-# interface four
-iface4 = gr.Interface(
-    fn=LogisticRegression_Health,
-    inputs="image",
-    outputs="text",
-    title="Logistic Regression"
-)
-# interface five
-iface5 = gr.Interface(
-    fn=RandomForests_Salary,
-    inputs="image",
-    outputs="text",
-    title="Random Forests"
-)
-# interface six
-iface6 = gr.Interface(
-    fn=RandomForests_Health,
-    inputs="image",
-    outputs="text",
-    title="Random Forests"
-)
-demo = gr.TabbedInterface([iface1, iface2, iface3, iface4, iface5, iface6], ["SVM - Jerome Agius", "SVM - Jerome Agius",
-                                                                            "Logistic Regression - Isaac Muscat", "Logistic Regression - Isaac Muscat",
-                                                                            "Random Forests - Kyle Demicoli", "Random Forests - Kyle Demicoli"])
 # Run the interface
 demo.launch(share=True)

 import pandas as pd
 import ast
 import numpy as np
+import os
 # Set the option to opt into future behavior
 pd.set_option('future.no_silent_downcasting', True)
 education_dict = ast.literal_eval(education_mapping)
 # List of the columns present in dataframe used to train the model
+salary_columns = ['age', 'education-num', 'sex', 'capital-gain', 'capital-loss',
         'hours-per-week', 'workclass_Local-gov', 'workclass_Private',
         'workclass_Self-emp-inc', 'workclass_Self-emp-not-inc',
         'workclass_State-gov', 'workclass_Without-pay',
         'occupation_Sales', 'occupation_Tech-support',
         'occupation_Transport-moving', 'relationship_Not-in-family',
         'relationship_Other-relative', 'relationship_Own-child',
+        'relationship_Unmarried', 'relationship_Wife', 'race_Asian-Pac-Islander',
+        'race_Black', 'race_Other', 'race_White']
+health_columns = ['age', 'sex', 'bmi', 'children', 'smoker', 'region_northwest', 'region_southeast', 'region_southwest']
+# Code for SVM
+def Salary(model, workclass, education, marital_status, occupation, relationship, race, sex, age, capital_gain, capital_loss, hours_per_week):
+    # Set the working directory to the script's directory
+    os.chdir(os.path.dirname(os.path.abspath(__file__)))
+    if model == 0:
+        model_used = "SVM"
+        with open('models/best_svm_OvM_Salary_Classification.pkl', 'rb') as f:
+            loaded_model = pickle.load(f)
+        # Loading the scaler and transform the data
+        with open('models/z-score_scaler_svm_salary_classification.pkl', 'rb') as f:
+            scaler = pickle.load(f)
+    elif model == 1:
+        model_used = "Logistic Regression"
+        with open('models/best_lr_Salary_Classification.pkl', 'rb') as f:
+            loaded_model = pickle.load(f)
+        # Loading the scaler and transform the data
+        with open('models/z-score_scaler_lr_salary_classification.pkl', 'rb') as f:
+            scaler = pickle.load(f)
+    elif model == 2:
+        model_used = "Random Forest"
+        # Add Random Forest model
     new_data = {
         'age': age,
     new_data = new_data.rename(columns={'education': 'education-num'})
     # Create an empty DataFrame with these columns
+    formattedDF = pd.DataFrame(columns=salary_columns)
     # Copying over the continuous columns
     formattedDF['age'] = new_data['age']
     # Fill remaining columns with 0
     formattedDF.fillna(0, inplace=True)
     formattedDF = formattedDF.astype(int)
+    formattedDF = formattedDF[formattedDF.columns.intersection(salary_columns)]
     # Assuming 'high_skew_columns' from training is a list of columns with high skewness
     for column in  ['capital-gain', 'capital-loss']:
     salary_result = '<=50K' if prediction[0] == 0 else '>50K'
+    return f"Predicted using {model_used} Salary Class: {salary_result}"
+def Health(model, age, sex, bmi, children, smoker, region):
+    # Set the working directory to the script's directory
+    os.chdir(os.path.dirname(os.path.abspath(__file__)))
+    if model == 0:
+        model_used = "SVM"
+        with open('models/best_health_svm_OvM_Charges_Classification.pkl', 'rb') as f:
+            loaded_model = pickle.load(f)
+        # Loading the scaler and transform the data
+        with open('models/z-score_scaler_svm_charges_classification.pkl', 'rb') as f:
+            scaler = pickle.load(f)
+    elif model == 1:
+        model_used = "Logistic Regression"
+        with open('models/best_health_lr_Charges_Classification.pkl', 'rb') as f:
+            loaded_model = pickle.load(f)
+        # Loading the scaler and transform the data
+        with open('models/z-score_scaler_lr_charges_classification.pkl', 'rb') as f:
+            scaler = pickle.load(f)
+    elif model == 2:
+        model_used = "Random Forest"
+        # Add Random Forest model
     #Inverting the dict to map the 'charges' values back to 'charges' labels
     inverse_mapping_charges = {
     new_data = pd.DataFrame([new_data])
     # Create an empty DataFrame with these columns
+    formattedDF = pd.DataFrame(columns=health_columns)
     # Copying over the continuous columns
     formattedDF['age'] = new_data['age']
     formattedDF['bmi'] = new_data['bmi']
     formattedDF['children'] = new_data['children']
     formattedDF['smoker'] = new_data['smoker'].apply(lambda x: 1 if x == 'Yes' else 0)
     formattedDF['region_'+new_data['region']] = 1
     # Fill remaining columns with 0
     formattedDF.fillna(0, inplace=True)
     formattedDF = formattedDF.astype(int)
+    formattedDF = formattedDF[formattedDF.columns.intersection(health_columns)]
     # Apply the scaler to the unseen data
     continuous_columns = ['age', 'bmi']
     prediction = loaded_model.predict(formattedDF)[0]
     prediction = inverse_mapping_charges[prediction]
+    return f"Predicted using {model_used} Charges Class: {prediction}"
 # interface one
 iface1 = gr.Interface(
+    fn=Salary,
     inputs=[
+        gr.Dropdown(choices=[("SVM - Jerome Agius", 0), ("Logistic Regression - Isaac Muscat", 1), ("Random Forest - Kyle Demicoli", 2)], label="Model", value=0),
         gr.Dropdown(choices=workclass_options, label="Workclass"),
         gr.Dropdown(choices=education_option, label="Education"),
         gr.Dropdown(choices=marital_status_option, label="Marital Status"),
 # interface two
 iface2 = gr.Interface(
+    fn=Health,
     inputs=[
+        gr.Dropdown(choices=[("SVM - Jerome Agius", 0), ("Logistic Regression - Isaac Muscat", 1), ("Random Forest - Kyle Demicoli", 2)], label="Model", value=0),
         gr.Slider(minimum=age[0], maximum=age[1], step=1, label="Age"),
         gr.Dropdown(choices=sex_option, label="Sex"),
         gr.Slider(minimum=bmi[0], maximum=bmi[1], step=0.1, label="BMI"),
     title="SVM - Health"
 )
+demo = gr.TabbedInterface([iface1, iface2], ["Salary Prediction", "Health Charges Prediction"])
 # Run the interface
 demo.launch(share=True)