Upload 6 files
Browse files- explainer.bz2 +3 -0
- minmax_scaler_inputs.pickle +3 -0
- minmax_scaler_targets.pickle +3 -0
- model_coatings.h5 +3 -0
- one_hot_scaler.pickle +3 -0
- utils.py +90 -0
explainer.bz2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f19ed00f940465f3fbbba2c257fd80902121bb273ae0423925edea9ac6fc244f
|
| 3 |
+
size 34712
|
minmax_scaler_inputs.pickle
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd6b1f249a8231605f1900d2ba952f6c0b427d2d1c17c66753512f5b9213ae78
|
| 3 |
+
size 961
|
minmax_scaler_targets.pickle
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb6e6028b752d7f5549b77ec1399aa5a6e1ca1e62bf617789b369d5212df0871
|
| 3 |
+
size 731
|
model_coatings.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac892df7538832b277de18057230882d261b57453cebeb5178a02839b20f3dcb
|
| 3 |
+
size 58280
|
one_hot_scaler.pickle
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:85887e5d20533a688fcf3a6f16dc3c19ff7e3a996fdb4d00771db4ca6875e357
|
| 3 |
+
size 619
|
utils.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
""" Utils functions for preprocessing"""
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
|
| 4 |
+
import pickle
|
| 5 |
+
import tensorflow as tf
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def aggregate_transform_df(original_df, transformed_df, transformed_cols):
|
| 9 |
+
"""
|
| 10 |
+
Helper function to aggregate the columns transformed with the original dataset
|
| 11 |
+
"""
|
| 12 |
+
print(original_df.shape)
|
| 13 |
+
print(transformed_df.shape)
|
| 14 |
+
df_final = original_df.drop(columns=transformed_cols)
|
| 15 |
+
df_final = df_final.merge(transformed_df, left_index=True, right_index=True)
|
| 16 |
+
print(df_final.shape)
|
| 17 |
+
return df_final
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def encode_categorical(df, categorical_cols, method="OneHot", encoder=None, fit=True):
|
| 21 |
+
"""
|
| 22 |
+
Returns the dataframe where the categorical columns have been replaced
|
| 23 |
+
according to the method selected
|
| 24 |
+
|
| 25 |
+
Right now only OneHot is supported
|
| 26 |
+
"""
|
| 27 |
+
print(f"Running {method} encoding")
|
| 28 |
+
if fit:
|
| 29 |
+
encoder = OneHotEncoder()
|
| 30 |
+
encoder.fit(df[categorical_cols])
|
| 31 |
+
array_transformed = encoder.transform(df[categorical_cols]).toarray()
|
| 32 |
+
df_encoded = pd.DataFrame(array_transformed, columns=encoder.get_feature_names_out(), index=df.index)
|
| 33 |
+
df_final = aggregate_transform_df(df, df_encoded, categorical_cols)
|
| 34 |
+
if fit:
|
| 35 |
+
return df_final, encoder
|
| 36 |
+
else:
|
| 37 |
+
return df_final
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def scale_numerical(df, numerical_cols, method="MinMax", scaler=None, fit=True):
|
| 41 |
+
"""
|
| 42 |
+
Returns the dataframe where the numerical columns have been scaled
|
| 43 |
+
according to the method selected
|
| 44 |
+
|
| 45 |
+
Right now only MinMax is supported
|
| 46 |
+
"""
|
| 47 |
+
print(f"Running {method} scaling")
|
| 48 |
+
if fit:
|
| 49 |
+
scaler = MinMaxScaler()
|
| 50 |
+
scaler.fit(df[numerical_cols])
|
| 51 |
+
array_transformed = scaler.transform(df[numerical_cols])
|
| 52 |
+
df_transformed = pd.DataFrame(array_transformed, columns=numerical_cols, index=df.index)
|
| 53 |
+
df_final = aggregate_transform_df(df, df_transformed, numerical_cols)
|
| 54 |
+
if fit:
|
| 55 |
+
return df_final, scaler
|
| 56 |
+
else:
|
| 57 |
+
return df_final
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def fill_nans(df, cols, method="mean"):
|
| 61 |
+
df_filled = df.copy()
|
| 62 |
+
print(f"Fill nans in {cols} with the {method} method")
|
| 63 |
+
for col in cols:
|
| 64 |
+
if method == "mean":
|
| 65 |
+
df_filled[col] = df_filled[col].fillna(df[col].mean())
|
| 66 |
+
elif method == "mode":
|
| 67 |
+
df_filled[col] = df_filled[col].fillna(df[col].mode())
|
| 68 |
+
return df_filled
|
| 69 |
+
|
| 70 |
+
def encode_and_predict(model_path, data, one_hot_scaler, minmax_scaler_inputs, minmax_scaler_targets, categorical_columns, numerical_columns, target_columns, explainer=None):
|
| 71 |
+
model = tf.keras.models.load_model(model_path)
|
| 72 |
+
data = encode_categorical(data, categorical_columns, encoder=one_hot_scaler, fit=False)
|
| 73 |
+
data = scale_numerical(data, numerical_columns, scaler=minmax_scaler_inputs, fit=False)
|
| 74 |
+
if explainer:
|
| 75 |
+
return model.predict(data), data.columns, explainer.shap_values(data[-10:])
|
| 76 |
+
else:
|
| 77 |
+
return model.predict(data)
|
| 78 |
+
|
| 79 |
+
def predict(model_path, data, explainer=None):
|
| 80 |
+
model = tf.keras.models.load_model(model_path)
|
| 81 |
+
if explainer:
|
| 82 |
+
return model.predict(data), data.columns, explainer.shap_values(data[-10:])
|
| 83 |
+
else:
|
| 84 |
+
return model.predict(data)
|
| 85 |
+
|
| 86 |
+
def unpickle_file(path):
|
| 87 |
+
with open(path, "rb") as file:
|
| 88 |
+
unpickler = pickle.Unpickler(file)
|
| 89 |
+
unpickled_file = unpickler.load()
|
| 90 |
+
return unpickled_file
|