snajmark commited on
Commit
f66f57c
·
1 Parent(s): 4c965c2

Upload 6 files

Browse files
explainer.bz2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f19ed00f940465f3fbbba2c257fd80902121bb273ae0423925edea9ac6fc244f
3
+ size 34712
minmax_scaler_inputs.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd6b1f249a8231605f1900d2ba952f6c0b427d2d1c17c66753512f5b9213ae78
3
+ size 961
minmax_scaler_targets.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb6e6028b752d7f5549b77ec1399aa5a6e1ca1e62bf617789b369d5212df0871
3
+ size 731
model_coatings.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac892df7538832b277de18057230882d261b57453cebeb5178a02839b20f3dcb
3
+ size 58280
one_hot_scaler.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85887e5d20533a688fcf3a6f16dc3c19ff7e3a996fdb4d00771db4ca6875e357
3
+ size 619
utils.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ Utils functions for preprocessing"""
2
+ import pandas as pd
3
+ from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
4
+ import pickle
5
+ import tensorflow as tf
6
+
7
+
8
+ def aggregate_transform_df(original_df, transformed_df, transformed_cols):
9
+ """
10
+ Helper function to aggregate the columns transformed with the original dataset
11
+ """
12
+ print(original_df.shape)
13
+ print(transformed_df.shape)
14
+ df_final = original_df.drop(columns=transformed_cols)
15
+ df_final = df_final.merge(transformed_df, left_index=True, right_index=True)
16
+ print(df_final.shape)
17
+ return df_final
18
+
19
+
20
+ def encode_categorical(df, categorical_cols, method="OneHot", encoder=None, fit=True):
21
+ """
22
+ Returns the dataframe where the categorical columns have been replaced
23
+ according to the method selected
24
+
25
+ Right now only OneHot is supported
26
+ """
27
+ print(f"Running {method} encoding")
28
+ if fit:
29
+ encoder = OneHotEncoder()
30
+ encoder.fit(df[categorical_cols])
31
+ array_transformed = encoder.transform(df[categorical_cols]).toarray()
32
+ df_encoded = pd.DataFrame(array_transformed, columns=encoder.get_feature_names_out(), index=df.index)
33
+ df_final = aggregate_transform_df(df, df_encoded, categorical_cols)
34
+ if fit:
35
+ return df_final, encoder
36
+ else:
37
+ return df_final
38
+
39
+
40
+ def scale_numerical(df, numerical_cols, method="MinMax", scaler=None, fit=True):
41
+ """
42
+ Returns the dataframe where the numerical columns have been scaled
43
+ according to the method selected
44
+
45
+ Right now only MinMax is supported
46
+ """
47
+ print(f"Running {method} scaling")
48
+ if fit:
49
+ scaler = MinMaxScaler()
50
+ scaler.fit(df[numerical_cols])
51
+ array_transformed = scaler.transform(df[numerical_cols])
52
+ df_transformed = pd.DataFrame(array_transformed, columns=numerical_cols, index=df.index)
53
+ df_final = aggregate_transform_df(df, df_transformed, numerical_cols)
54
+ if fit:
55
+ return df_final, scaler
56
+ else:
57
+ return df_final
58
+
59
+
60
+ def fill_nans(df, cols, method="mean"):
61
+ df_filled = df.copy()
62
+ print(f"Fill nans in {cols} with the {method} method")
63
+ for col in cols:
64
+ if method == "mean":
65
+ df_filled[col] = df_filled[col].fillna(df[col].mean())
66
+ elif method == "mode":
67
+ df_filled[col] = df_filled[col].fillna(df[col].mode())
68
+ return df_filled
69
+
70
+ def encode_and_predict(model_path, data, one_hot_scaler, minmax_scaler_inputs, minmax_scaler_targets, categorical_columns, numerical_columns, target_columns, explainer=None):
71
+ model = tf.keras.models.load_model(model_path)
72
+ data = encode_categorical(data, categorical_columns, encoder=one_hot_scaler, fit=False)
73
+ data = scale_numerical(data, numerical_columns, scaler=minmax_scaler_inputs, fit=False)
74
+ if explainer:
75
+ return model.predict(data), data.columns, explainer.shap_values(data[-10:])
76
+ else:
77
+ return model.predict(data)
78
+
79
+ def predict(model_path, data, explainer=None):
80
+ model = tf.keras.models.load_model(model_path)
81
+ if explainer:
82
+ return model.predict(data), data.columns, explainer.shap_values(data[-10:])
83
+ else:
84
+ return model.predict(data)
85
+
86
+ def unpickle_file(path):
87
+ with open(path, "rb") as file:
88
+ unpickler = pickle.Unpickler(file)
89
+ unpickled_file = unpickler.load()
90
+ return unpickled_file