Spaces:

matsammut
/

ICS5110-Applied_ML

Sleeping

App Files Files Community

ICS5110-Applied_ML / app.py

matsammut

Update app.py

3c78fe7 verified 7 months ago

raw

history blame

3.92 kB

	import gradio as gr
	import joblib
	import pandas as pd
	import numpy as np
	from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
	from sklearn.impute import KNNImputer

	# Load your saved model
	model = joblib.load("ann_model.joblib")

	# # Define the prediction function
	def predict(age, workclass, education, marital_status, occupation, relationship, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
	features = [age, workclass, education, marital_status, occupation, relationship, race, gender, capital_gain, capital_loss, hours_per_week, native_country]
	prediction = model.predict(features)
	prediction = 1
	return "Income >50K" if prediction == 1 else "Income <=50K"

	def cleaning_features(data):
	le = LabelEncoder()
	scaler = StandardScaler()
	encoder = OneHotEncoder(sparse_output=False)
	numeric_cols = ['age', 'educational-num', 'hours-per-week']
	columns_to_encode = ['race','marital-status','relationship']

	data.replace({'?': np.nan, 99999: np.nan}, inplace=True)

	# 1. Scale numerical features
	data[numeric_cols] = scaler.fit_transform(data[numeric_cols])

	# 2. Label encode gender and income
	data['gender'] = le.fit_transform(data['gender'])

	# 3. One-hot encode race
	for N in columns_to_encode:
	race_encoded = encoder.fit_transform(data[[N]])
	race_encoded_cols = encoder.get_feature_names_out([N])
	race_encoded_df = pd.DataFrame(race_encoded, columns=race_encoded_cols, index=data.index)
	# Combine the encoded data with original dataframe
	data = pd.concat([data.drop(N, axis=1), race_encoded_df], axis=1)
	# Binarize native country
	data['native-country'] = data['native-country'].apply(lambda x: x == 'United-States')
	data['native-country'] = data['native-country'].astype(int)

	print(data.head(10))

	return data, encoder, scaler

	# Create the Gradio interface
	interface = gr.Interface(
	fn=predict,
	inputs=[
	gr.Slider(18, 90, step=1, label="Age"),
	gr.Dropdown(
	["Private", "Self-emp-not-inc", "Self-emp-inc", "Federal-gov",
	"Local-gov", "State-gov", "Without-pay", "Never-worked"],
	label="Workclass"
	),
	gr.Dropdown(
	["Bachelors", "Some-college", "11th", "HS-grad", "Prof-school",
	"Assoc-acdm", "Assoc-voc", "9th", "7th-8th", "12th", "Masters",
	"1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"],
	label="Education"
	),
	gr.Dropdown(
	["Married-civ-spouse", "Divorced", "Never-married", "Separated",
	"Widowed", "Married-spouse-absent", "Married-AF-spouse"],
	label="Marital Status"
	),
	gr.Dropdown(
	["Tech-support", "Craft-repair", "Other-service", "Sales",
	"Exec-managerial", "Prof-specialty", "Handlers-cleaners",
	"Machine-op-inspct", "Adm-clerical", "Farming-fishing",
	"Transport-moving", "Priv-house-serv", "Protective-serv",
	"Armed-Forces"],
	label="Occupation"
	),
	gr.Dropdown(
	["Wife", "Husband", "Own-child", "Unmarried", "Other-relative", "Not-in-family"],
	label="Relationship"
	),
	gr.Dropdown(
	["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"],
	label="Race"
	),
	gr.Dropdown(
	["Male", "Female"],
	label="Gender"
	),
	gr.Slider(1, 90, step=1, label="Hours Per Week"),
	gr.Slider(0, 100000, step=100, label="Capital Gain"),
	gr.Slider(0, 5000, step=50, label="Capital Loss"),
	gr.Dropdown(
	["United-States", "Other"],
	label="Native Country"
	)
	],
	outputs="text",
	title="Adult Income Predictor"
	)

	# Launch the app
	interface.launch()