Spaces:

sandl
/

private_additive_manufacturing

Sleeping

App Files Files Community

snajmark commited on Jul 18, 2023

Commit

f66f57c

1 Parent(s): 4c965c2

Upload 6 files

Browse files

Files changed (6) hide show

explainer.bz2 +3 -0
minmax_scaler_inputs.pickle +3 -0
minmax_scaler_targets.pickle +3 -0
model_coatings.h5 +3 -0
one_hot_scaler.pickle +3 -0
utils.py +90 -0

explainer.bz2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f19ed00f940465f3fbbba2c257fd80902121bb273ae0423925edea9ac6fc244f
+size 34712

minmax_scaler_inputs.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dd6b1f249a8231605f1900d2ba952f6c0b427d2d1c17c66753512f5b9213ae78
+size 961

minmax_scaler_targets.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb6e6028b752d7f5549b77ec1399aa5a6e1ca1e62bf617789b369d5212df0871
+size 731

model_coatings.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac892df7538832b277de18057230882d261b57453cebeb5178a02839b20f3dcb
+size 58280

one_hot_scaler.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:85887e5d20533a688fcf3a6f16dc3c19ff7e3a996fdb4d00771db4ca6875e357
+size 619

utils.py ADDED Viewed

	@@ -0,0 +1,90 @@

+""" Utils functions for preprocessing"""
+import pandas as pd
+from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
+import pickle
+import tensorflow as tf
+def aggregate_transform_df(original_df, transformed_df, transformed_cols):
+    """
+    Helper function to aggregate the columns transformed with the original dataset
+    """
+    print(original_df.shape)
+    print(transformed_df.shape)
+    df_final = original_df.drop(columns=transformed_cols)
+    df_final = df_final.merge(transformed_df, left_index=True, right_index=True)
+    print(df_final.shape)
+    return df_final
+def encode_categorical(df, categorical_cols, method="OneHot", encoder=None, fit=True):
+    """
+    Returns the dataframe where the categorical columns have been replaced
+    according to the method selected
+    Right now only OneHot is supported
+    """
+    print(f"Running {method} encoding")
+    if fit:
+        encoder = OneHotEncoder()
+        encoder.fit(df[categorical_cols])
+    array_transformed = encoder.transform(df[categorical_cols]).toarray()
+    df_encoded = pd.DataFrame(array_transformed, columns=encoder.get_feature_names_out(), index=df.index)
+    df_final = aggregate_transform_df(df, df_encoded, categorical_cols)
+    if fit:
+        return df_final, encoder
+    else:
+        return df_final
+def scale_numerical(df, numerical_cols, method="MinMax", scaler=None, fit=True):
+    """
+    Returns the dataframe where the numerical columns have been scaled
+    according to the method selected
+    Right now only MinMax is supported
+    """
+    print(f"Running {method} scaling")
+    if fit:
+        scaler = MinMaxScaler()
+        scaler.fit(df[numerical_cols])
+    array_transformed = scaler.transform(df[numerical_cols])
+    df_transformed = pd.DataFrame(array_transformed, columns=numerical_cols, index=df.index)
+    df_final = aggregate_transform_df(df, df_transformed, numerical_cols)
+    if fit:
+        return df_final, scaler
+    else:
+        return df_final
+def fill_nans(df, cols, method="mean"):
+    df_filled = df.copy()
+    print(f"Fill nans in {cols} with the {method} method")
+    for col in cols:
+        if method == "mean":
+            df_filled[col] = df_filled[col].fillna(df[col].mean())
+        elif method == "mode":
+            df_filled[col] = df_filled[col].fillna(df[col].mode())
+    return df_filled
+def encode_and_predict(model_path, data, one_hot_scaler, minmax_scaler_inputs, minmax_scaler_targets, categorical_columns, numerical_columns, target_columns, explainer=None):
+    model = tf.keras.models.load_model(model_path)
+    data = encode_categorical(data, categorical_columns, encoder=one_hot_scaler, fit=False)
+    data = scale_numerical(data, numerical_columns, scaler=minmax_scaler_inputs, fit=False)
+    if explainer:
+        return model.predict(data), data.columns, explainer.shap_values(data[-10:])
+    else:
+        return model.predict(data)
+def predict(model_path, data, explainer=None):
+    model = tf.keras.models.load_model(model_path)
+    if explainer:
+        return model.predict(data), data.columns, explainer.shap_values(data[-10:])
+    else:
+        return model.predict(data)
+def unpickle_file(path):
+    with open(path, "rb") as file:
+        unpickler = pickle.Unpickler(file)
+        unpickled_file = unpickler.load()
+    return unpickled_file