import pandas as pd import pymatgen as mg from pymatgen.core.structure import Composition import numpy as np import tensorflow as tf import shap import joblib import matplotlib.pyplot as plt # Explainer path explainer_filename = "explainer.bz2" feature_names = ['PROPERTY: Calculated Density (g/cm$^3$)', 'PROPERTY: Calculated Young modulus (GPa)', 'Microstructure B2', 'Microstructure B2+BCC', 'Microstructure B2+L12', 'Microstructure B2+Laves+Sec.', 'Microstructure B2+Sec.', 'Microstructure BCC', 'Microstructure BCC+B2', 'Microstructure BCC+B2+FCC', 'Microstructure BCC+B2+FCC+Sec.', 'Microstructure BCC+B2+L12', 'Microstructure BCC+B2+Laves', 'Microstructure BCC+B2+Sec.', 'Microstructure BCC+BCC', 'Microstructure BCC+BCC+HCP', 'Microstructure BCC+BCC+Laves', 'Microstructure BCC+BCC+Laves(C14)', 'Microstructure BCC+BCC+Laves(C15)', 'Microstructure BCC+FCC', 'Microstructure BCC+HCP', 'Microstructure BCC+Laves', 'Microstructure BCC+Laves(C14)', 'Microstructure BCC+Laves(C15)', 'Microstructure BCC+Laves+Sec.', 'Microstructure BCC+Sec.', 'Microstructure FCC', 'Microstructure FCC+B2', 'Microstructure FCC+B2+Sec.', 'Microstructure FCC+BCC', 'Microstructure FCC+BCC+B2', 'Microstructure FCC+BCC+B2+Sec.', 'Microstructure FCC+BCC+BCC', 'Microstructure FCC+BCC+Sec.', 'Microstructure FCC+FCC', 'Microstructure FCC+HCP', 'Microstructure FCC+HCP+Sec.', 'Microstructure FCC+L12', 'Microstructure FCC+L12+B2', 'Microstructure FCC+L12+Sec.', 'Microstructure FCC+Laves', 'Microstructure FCC+Laves(C14)', 'Microstructure FCC+Laves+Sec.', 'Microstructure FCC+Sec.', 'Microstructure L12+B2', 'Microstructure Laves(C14)+Sec.', 'Microstructure OTHER', 'Preprocessing method ANNEAL', 'Preprocessing method CAST', 'Preprocessing method OTHER', 'Preprocessing method POWDER', 'Preprocessing method WROUGHT', 'BCC/FCC/other BCC', 'BCC/FCC/other FCC', 'BCC/FCC/other OTHER', 'Single/Multiphase ', 'Single/Multiphase M', 'Single/Multiphase S'] def return_feature_names(): return feature_names def normalize_and_alphabetize_formula(formula): '''Normalizes composition labels. Used to enable matching / groupby on compositions.''' if formula: try: comp = Composition(formula) weights = [comp.get_atomic_fraction(ele) for ele in comp.elements] normalized_weights = [round(w/max(weights), 3) for w in weights] normalized_comp = "".join([str(x)+str(y) for x,y in zip(comp.elements, normalized_weights)]) return Composition(normalized_comp).alphabetical_formula except: print("INVALID: ", formula) return None else: return None def calculate_density(formula): '''Calculates densisty based on Rule of Mixtures (ROM).''' comp = Composition(formula) weights = [comp.get_atomic_fraction(e)for e in comp.elements] vols = np.array([e.molar_volume for e in comp.elements]) atomic_masses = np.array([e.atomic_mass for e in comp.elements]) val = np.sum(weights*atomic_masses) / np.sum(weights*vols) return round(val, 1) def calculate_youngs_modulus(formula): '''Calculates Young Modulus based on Rule of Mixtures (ROM).''' comp = Composition(formula) weights = np.array([comp.get_atomic_fraction(e)for e in comp.elements]) vols = np.array([e.molar_volume for e in comp.elements]) ym_vals = [] for e in comp.elements: if str(e) == 'C': #use diamond form for carbon ym_vals.append(1050) elif str(e) == 'B': #use minimum value for Boron Carbide ym_vals.append(362) elif str(e) == 'Mo': ym_vals.append(329) elif str(e) == 'Co': ym_vals.append(209) else: ym_vals.append(e.youngs_modulus) #ym_vals = np.array([e.youngs_modulus for e in comp.elements]) ym_vals = np.array(ym_vals) if None in ym_vals: print(formula, ym_vals) return '' val = np.sum(weights*vols*ym_vals) / np.sum(weights*vols) return int(round(val, 0)) def interpret(input): plt.clf() ex = joblib.load(filename=explainer_filename) shap_values = ex.shap_values(input) shap.summary_plot(shap_values[0], input, feature_names=feature_names) fig = plt.gcf() return fig, None def to_categorical_num_classes_microstructure(X, num_classes_one_hot): return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes microstructure"]) def to_categorical_num_classes_processing(X, num_classes_one_hot): return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes preprocessing"]) def to_categorical_bcc_fcc_other(X, num_classes_one_hot): return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes bcc/fcc/other"]) def to_categorical_single_multiphase(X, num_classes_one_hot): return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes single/multiphase"]) def return_num_classes_one_hot(df): num_classes_microstructure = len(np.unique(np.asarray(df['PROPERTY: Microstructure']))) num_classes_processing = len(np.unique(np.asarray(df['PROPERTY: Processing method']))) num_classes_single_multiphase = len(np.unique(np.asarray(df['PROPERTY: Single/Multiphase']))) num_classes_bcc_fcc_other = len(np.unique(np.asarray(df['PROPERTY: BCC/FCC/other']))) return {"Num classes microstructure": num_classes_microstructure, "Num classes preprocessing": num_classes_processing, "Num classes single/multiphase": num_classes_single_multiphase, "Num classes bcc/fcc/other": num_classes_bcc_fcc_other} def turn_into_one_hot(X, mapping_dict): one_hot = X num_classes_one_hot = {'Num classes microstructure': 45, 'Num classes preprocessing': 5, 'Num classes single/multiphase': 3, 'Num classes bcc/fcc/other': 3} one_hot["Microstructure One Hot"] = X["PROPERTY: Microstructure"].apply(to_categorical_num_classes_microstructure, num_classes_one_hot=num_classes_one_hot) one_hot["Processing Method One Hot"] = X["PROPERTY: Processing method"].apply(to_categorical_num_classes_processing, num_classes_one_hot=num_classes_one_hot) one_hot["BCC/FCC/other One Hot"] = X["PROPERTY: BCC/FCC/other"].apply(to_categorical_bcc_fcc_other, num_classes_one_hot=num_classes_one_hot) one_hot["Single/Multiphase One Hot"] = X["PROPERTY: Single/Multiphase"].apply(to_categorical_single_multiphase, num_classes_one_hot=num_classes_one_hot) flatten_microstructure = one_hot["Microstructure One Hot"].apply(pd.Series) flatten_processing = one_hot["Processing Method One Hot"].apply(pd.Series) flatten_bcc_fcc_other = one_hot["BCC/FCC/other One Hot"].apply(pd.Series) flatten_single_multiphase = one_hot["Single/Multiphase One Hot"].apply(pd.Series) one_hot.drop(columns=["Microstructure One Hot", "Processing Method One Hot", "BCC/FCC/other One Hot", "Single/Multiphase One Hot"]) for column in flatten_microstructure.columns: one_hot["Microstructure " + str( list(mapping_dict["PROPERTY: Microstructure"].keys())[int(column)])] = flatten_microstructure[int(column)] for column in flatten_processing.columns: one_hot["Preprocessing method " + str(list(mapping_dict["PROPERTY: Processing method"].keys())[int(column)])] = flatten_processing[column] for column in flatten_bcc_fcc_other.columns: one_hot["BCC/FCC/other " + str(list(mapping_dict["PROPERTY: BCC/FCC/other"].keys())[int(column)])] = flatten_bcc_fcc_other[column] for column in flatten_single_multiphase.columns: one_hot["Single/Multiphase " + str(list(mapping_dict["PROPERTY: Single/Multiphase"].keys())[int(column)])] = flatten_single_multiphase[column] one_hot = one_hot.drop(columns=["PROPERTY: Microstructure", "Microstructure One Hot", "BCC/FCC/other One Hot", "Single/Multiphase One Hot", "Processing Method One Hot", "PROPERTY: Processing method", "PROPERTY: BCC/FCC/other", "PROPERTY: Single/Multiphase"]) return one_hot