Spaces:

JohnG112358
/

DALI-ML-Challenge

Sleeping

App Files Files Community

John Guerrerio commited on Apr 24, 2024

Commit

e81cf7c

1 Parent(s): aefbc5b

first try at deployment

Browse files

Files changed (6) hide show

Log_Reg.pkl +0 -0
SVM.pkl +0 -0
Superstore.csv +0 -0
XGB.model +0 -0
app.py +174 -0
requirements.txt +8 -0

Log_Reg.pkl ADDED Viewed

Binary file (1.14 kB). View file

SVM.pkl ADDED Viewed

Binary file (1.15 kB). View file

Superstore.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

XGB.model ADDED Viewed

Binary file (125 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,174 @@

+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split, GridSearchCV
+from sklearn.linear_model import SGDClassifier
+from sklearn.metrics import classification_report, confusion_matrix, make_scorer, f1_score
+import shap
+import xgboost as xgb
+import gradio as gr
+import matplotlib.pyplot as plt
+import joblib
+SVM = joblib.load('SVM.pkl')
+Log_Reg = joblib.load('Log_Reg.pkl')
+XGB = xgb.XGBClassifier()
+XGB.load_model('XGB.model')
+df = pd.read_csv('Superstore.csv')
+df.dropna(subset=["Region", "Category", "Sub-Category", "Quantity", "Discount"], inplace=True)
+MEDIAN = 8.662 # from the exploratory analysis file
+RANDOM_STATE = 42 # random seed to ensure results are reproducible
+region=np.unique(df['Region'], return_inverse=True)[1]
+category=np.unique(df['Category'], return_inverse=True)[1]
+subCategory=np.unique(df['Sub-Category'], return_inverse=True)[1]
+# turn quantity, discount, and profit columns into vectors of numbers
+quantity = df["Quantity"].to_numpy()
+discount = df["Discount"].to_numpy()
+profit = df["Profit"].to_numpy()
+vectorizedDataset = np.empty((len(region), 5))
+labels = np.empty(len(region))
+# generate feature vectors
+for i in range(0, len(region)):
+  data = np.zeros((1, 5))
+  data[0][0] = region[i]
+  data[0][1] = category[i]
+  data[0][2] = subCategory[i]
+  data[0][3] = quantity[i]
+  data[0][4] = discount[i]
+  vectorizedDataset[i] = data
+  if (profit[i] > MEDIAN):
+    labels[i] = 1
+  else:
+    labels[i] = 0
+train, test, trainLabels, testLabels = train_test_split(vectorizedDataset, labels, test_size=0.3, random_state=RANDOM_STATE)
+region_label = {'Central': 0, 'East': 1, 'South': 2, 'West': 3}
+category_label = {'Furniture': 0, 'Office Supplies': 1, 'Technology': 2}
+sub_category_label = {'Accessories': 0, 'Appliances': 1, 'Art': 2, 'Binders': 3, 'Bookcases': 4,
+       'Chairs': 5, 'Copiers': 6, 'Envelopes': 7, 'Fasteners': 8, 'Furnishings': 9,
+       'Labels': 10, 'Machines': 11, 'Paper': 12, 'Phones': 13, 'Storage': 14, 'Supplies': 15,
+       'Tables': 16}
+profit_label = {0: 'Below Median Profit', 1: 'Above Median Profit'}
+feature_names = ["Region", "Category", "Sub-Category", "Quantity", "Discount"]
+def sanitize_inputs(Region, Category, Sub_Category, Quantity, Discount):
+  try:
+    Region = region_label[Region]
+    Category = category_label[Category]
+    Sub_Category = sub_category_label[Sub_Category]
+  except KeyError:
+    return ["Please provide region, category, and sub category from the pre-defined Superstore dataset classes", None]
+  if Quantity < 1 or Discount < 0:
+    return ["Quantity and Discount must be positive", None]
+  if not isinstance(Quantity, int):
+    return ["Quantity must be an integer", None]
+  if Discount > 1:
+     return ["Discount cannot be greater than one", None]
+  return [Region, Category, Sub_Category]
+def XGB_predict(Region, Category, Sub_Category, Quantity, Discount):
+  sanitized = sanitize_inputs(Region, Category, Sub_Category, Quantity, Discount)
+  if len(sanitized)==2:
+    return sanitized
+  input = np.array([[sanitized[0], sanitized[1], sanitized[2], Quantity, Discount]])
+  predicted_class = XGB.predict(input)
+  explainer = shap.Explainer(XGB, test)
+  shap_values = explainer(input)
+  shap_values.feature_names = ["Region", "Category", "Sub-Category", "Quantity", "Discount"]
+  plot = shap.plots.bar(shap_values, show=False)
+  plt.savefig('shap_plot_XGB.png')
+  return [profit_label[predicted_class[0]], 'shap_plot_XGB.png']
+def SVM_predict(Region, Category, Sub_Category, Quantity, Discount):
+  sanitized = sanitize_inputs(Region, Category, Sub_Category, Quantity, Discount)
+  if len(sanitized)==2:
+    return sanitized
+  input = np.array([[sanitized[0], sanitized[1], sanitized[2], Quantity, Discount]])
+  predicted_class = SVM.predict(input)
+  explainer = shap.Explainer(SVM, test)
+  shap_values = explainer(input)
+  shap_values.feature_names = ["Region", "Category", "Sub-Category", "Quantity", "Discount"]
+  plot = shap.plots.bar(shap_values, show=False)
+  plt.savefig('shap_plot_SVM.png')
+  return [profit_label[predicted_class[0]], 'shap_plot_SVM.png']
+def Log_reg_predict(Region, Category, Sub_Category, Quantity, Discount):
+  sanitized = sanitize_inputs(Region, Category, Sub_Category, Quantity, Discount)
+  if len(sanitized)==2:
+    return sanitized
+  input = np.array([[sanitized[0], sanitized[1], sanitized[2], Quantity, Discount]])
+  predicted_class = Log_Reg.predict(input)
+  explainer = shap.Explainer(Log_Reg, test)
+  shap_values = explainer(input)
+  shap_values.feature_names = ["Region", "Category", "Sub-Category", "Quantity", "Discount"]
+  plot = shap.plots.bar(shap_values, show=False)
+  plt.savefig('shap_plot_LogReg.png')
+  return [profit_label[predicted_class[0]], 'shap_plot_LogReg.png']
+LogReg_tab = gr.Interface(
+    fn=Log_reg_predict,
+    inputs=["text", "text", "text", "number", "number"],
+    outputs=[
+        gr.Label(label="Model Prediction"),
+        gr.Image(label="Shapley Values"),
+    ],
+    title="Logistic Regression Profit Prediction",
+    description="Create your own purchases and see if the Logistic Regression model predicts they will make above or below the median profit\n\nValid regions: ['Central', 'East', 'South', 'West']\n\nValid product categories: ['Furniture', 'Office Supplies', 'Technology']\n\nValid product sub-categories: ['Accessories', 'Appliances', 'Art', 'Binders', 'Bookcases', 'Chairs', 'Copiers', 'Envelopes', 'Fasteners', 'Furnishings', 'Labels', 'Machines', 'Paper', 'Phones', 'Storage', 'Supplies', 'Tables']",
+)
+SVM_tab = gr.Interface(
+    fn=SVM_predict,
+    inputs=["text", "text", "text", "number", "number"],
+    outputs=[
+        gr.Label(label="Model Prediction"),
+        gr.Image(label="Shapley Values"),
+    ],
+    title="SVM Profit Prediction",
+    description="Create your own purchases and see if the SVM model predicts they will make above or below the median profit\n\nValid regions: ['Central', 'East', 'South', 'West']\n\nValid product categories: ['Furniture', 'Office Supplies', 'Technology']\n\nValid product sub-categories: ['Accessories', 'Appliances', 'Art', 'Binders', 'Bookcases', 'Chairs', 'Copiers', 'Envelopes', 'Fasteners', 'Furnishings', 'Labels', 'Machines', 'Paper', 'Phones', 'Storage', 'Supplies', 'Tables']",
+)
+XGB_tab = gr.Interface(
+    fn=XGB_predict,
+    inputs=["text", "text", "text", "number", "number"],
+    outputs=[
+        gr.Label(label="Model Prediction"),
+        gr.Image(label="Shapley Values"),
+    ],
+    title="XGB Profit Prediction",
+    description="Create your own purchases and see if the XGB model predicts they will make above or below the median profit\n\nValid regions: ['Central', 'East', 'South', 'West']\n\nValid product categories: ['Furniture', 'Office Supplies', 'Technology']\n\nValid product sub-categories: ['Accessories', 'Appliances', 'Art', 'Binders', 'Bookcases', 'Chairs', 'Copiers', 'Envelopes', 'Fasteners', 'Furnishings', 'Labels', 'Machines', 'Paper', 'Phones', 'Storage', 'Supplies', 'Tables']",
+)
+demo = gr.TabbedInterface([LogReg_tab, SVM_tab, XGB_tab], tab_names=["Logistic Regression", "SVM", "XGB"], theme=gr.themes.Soft())
+demo.launch(debug=True, share=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+pandas==2.0.3
+numpy==1.25.2
+scikit-learn==1.2.2
+shap==0.45.0
+shapely==2.0.4
+xgboost==2.0.3
+matplotlib==3.7.1
+joblib==1.4.0