import pandas as pd import pymatgen as mg from pymatgen.core.structure import Composition import numpy as np import tensorflow as tf import shap import joblib import matplotlib.pyplot as plt # Explainer path explainer_filename = "explainer.bz2" feature_names = ['PROPERTY: Calculated Density (g/cm$^3$)', 'PROPERTY: Calculated Young modulus (GPa)', 'PROPERTY: Metal Al', 'PROPERTY: Metal Co', 'PROPERTY: Metal Fe', 'PROPERTY: Metal Ni', 'PROPERTY: Metal Si', 'PROPERTY: Metal Cr', 'PROPERTY: Metal Nb', 'PROPERTY: Metal Ti', 'PROPERTY: Metal Mn', 'PROPERTY: Metal V', 'PROPERTY: Metal Mo', 'PROPERTY: Metal Cu', 'PROPERTY: Metal Ta', 'PROPERTY: Metal Zr', 'PROPERTY: Metal Hf', 'PROPERTY: Metal W', 'PROPERTY: Metal Zn', 'PROPERTY: Metal Sn', 'PROPERTY: Metal Re', 'PROPERTY: Metal C', 'PROPERTY: Metal Pd', 'PROPERTY: Metal Sc', 'PROPERTY: Metal Y', 'Preprocessing method ANNEAL', 'Preprocessing method CAST', 'Preprocessing method OTHER', 'Preprocessing method POWDER', 'Preprocessing method WROUGHT', 'BCC/FCC/other BCC', 'BCC/FCC/other FCC', 'BCC/FCC/other OTHER', 'Single/Multiphase ', 'Single/Multiphase M', 'Single/Multiphase S'] def return_feature_names(): return feature_names def normalize_and_alphabetize_formula(formula): '''Normalizes composition labels. Used to enable matching / groupby on compositions.''' if formula: try: comp = Composition(formula) weights = [comp.get_atomic_fraction(ele) for ele in comp.elements] normalized_weights = [round(w/max(weights), 3) for w in weights] normalized_comp = "".join([str(x)+str(y) for x,y in zip(comp.elements, normalized_weights)]) return Composition(normalized_comp).alphabetical_formula except: print("INVALID: ", formula) return None else: return None def calculate_density(formula): '''Calculates densisty based on Rule of Mixtures (ROM).''' comp = Composition(formula) weights = [comp.get_atomic_fraction(e)for e in comp.elements] vols = np.array([e.molar_volume for e in comp.elements]) atomic_masses = np.array([e.atomic_mass for e in comp.elements]) val = np.sum(weights*atomic_masses) / np.sum(weights*vols) return round(val, 1) def calculate_youngs_modulus(formula): '''Calculates Young Modulus based on Rule of Mixtures (ROM).''' comp = Composition(formula) weights = np.array([comp.get_atomic_fraction(e)for e in comp.elements]) vols = np.array([e.molar_volume for e in comp.elements]) ym_vals = [] for e in comp.elements: if str(e) == 'C': #use diamond form for carbon ym_vals.append(1050) elif str(e) == 'B': #use minimum value for Boron Carbide ym_vals.append(362) elif str(e) == 'Mo': ym_vals.append(329) elif str(e) == 'Co': ym_vals.append(209) else: ym_vals.append(e.youngs_modulus) #ym_vals = np.array([e.youngs_modulus for e in comp.elements]) ym_vals = np.array(ym_vals) if None in ym_vals: print(formula, ym_vals) return '' val = np.sum(weights*vols*ym_vals) / np.sum(weights*vols) return int(round(val, 0)) def interpret(input): plt.clf() ex = joblib.load(filename=explainer_filename) shap_values = ex.shap_values(input) shap.summary_plot(shap_values[0], input, feature_names=feature_names) fig = plt.gcf() return fig, None def to_categorical_num_classes_microstructure(X, num_classes_one_hot): return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes microstructure"]) def to_categorical_num_classes_processing(X, num_classes_one_hot): return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes preprocessing"]) def to_categorical_bcc_fcc_other(X, num_classes_one_hot): return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes bcc/fcc/other"]) def to_categorical_single_multiphase(X, num_classes_one_hot): return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes single/multiphase"]) def return_num_classes_one_hot(df): num_classes_microstructure = len(np.unique(np.asarray(df['PROPERTY: Microstructure']))) num_classes_processing = len(np.unique(np.asarray(df['PROPERTY: Processing method']))) num_classes_single_multiphase = len(np.unique(np.asarray(df['PROPERTY: Single/Multiphase']))) num_classes_bcc_fcc_other = len(np.unique(np.asarray(df['PROPERTY: BCC/FCC/other']))) return {"Num classes microstructure": num_classes_microstructure, "Num classes preprocessing": num_classes_processing, "Num classes single/multiphase": num_classes_single_multiphase, "Num classes bcc/fcc/other": num_classes_bcc_fcc_other} def turn_into_one_hot(X, mapping_dict): one_hot = X num_classes_one_hot = {'Num classes microstructure': 45, 'Num classes preprocessing': 5, 'Num classes single/multiphase': 3, 'Num classes bcc/fcc/other': 3} #one_hot["Microstructure One Hot"] = X["PROPERTY: Microstructure"].apply(to_categorical_num_classes_microstructure, num_classes_one_hot=num_classes_one_hot) one_hot["Processing Method One Hot"] = X["PROPERTY: Processing method"].apply(to_categorical_num_classes_processing, num_classes_one_hot=num_classes_one_hot) one_hot["BCC/FCC/other One Hot"] = X["PROPERTY: BCC/FCC/other"].apply(to_categorical_bcc_fcc_other, num_classes_one_hot=num_classes_one_hot) one_hot["Single/Multiphase One Hot"] = X["PROPERTY: Single/Multiphase"].apply(to_categorical_single_multiphase, num_classes_one_hot=num_classes_one_hot) #flatten_microstructure = one_hot["Microstructure One Hot"].apply(pd.Series) flatten_processing = one_hot["Processing Method One Hot"].apply(pd.Series) flatten_bcc_fcc_other = one_hot["BCC/FCC/other One Hot"].apply(pd.Series) flatten_single_multiphase = one_hot["Single/Multiphase One Hot"].apply(pd.Series) one_hot.drop(columns=[#"Microstructure One Hot", "Processing Method One Hot", "BCC/FCC/other One Hot", "Single/Multiphase One Hot"]) #for column in flatten_microstructure.columns: # one_hot["Microstructure " + str( # list(mapping_dict["PROPERTY: Microstructure"].keys())[int(column)])] = flatten_microstructure[int(column)] for column in flatten_processing.columns: one_hot["Preprocessing method " + str(list(mapping_dict["PROPERTY: Processing method"].keys())[int(column)])] = flatten_processing[column] for column in flatten_bcc_fcc_other.columns: one_hot["BCC/FCC/other " + str(list(mapping_dict["PROPERTY: BCC/FCC/other"].keys())[int(column)])] = flatten_bcc_fcc_other[column] for column in flatten_single_multiphase.columns: one_hot["Single/Multiphase " + str(list(mapping_dict["PROPERTY: Single/Multiphase"].keys())[int(column)])] = flatten_single_multiphase[column] one_hot = one_hot.drop(columns=[#"PROPERTY: Microstructure", "Microstructure One Hot", "BCC/FCC/other One Hot", "Single/Multiphase One Hot", "Processing Method One Hot", "PROPERTY: Processing method", "PROPERTY: BCC/FCC/other", "PROPERTY: Single/Multiphase"]) return one_hot