snajmark's picture
Update utils.py
1ba1cd6
import pandas as pd
import pymatgen as mg
from pymatgen.core.structure import Composition
import numpy as np
import tensorflow as tf
import shap
import joblib
import matplotlib.pyplot as plt
# Explainer path
explainer_filename = "explainer.bz2"
feature_names = ['PROPERTY: Calculated Density (g/cm$^3$)',
'PROPERTY: Calculated Young modulus (GPa)', 'PROPERTY: Metal Al',
'PROPERTY: Metal Co', 'PROPERTY: Metal Fe', 'PROPERTY: Metal Ni',
'PROPERTY: Metal Si', 'PROPERTY: Metal Cr', 'PROPERTY: Metal Nb',
'PROPERTY: Metal Ti', 'PROPERTY: Metal Mn', 'PROPERTY: Metal V',
'PROPERTY: Metal Mo', 'PROPERTY: Metal Cu', 'PROPERTY: Metal Ta',
'PROPERTY: Metal Zr', 'PROPERTY: Metal Hf', 'PROPERTY: Metal W',
'PROPERTY: Metal Zn', 'PROPERTY: Metal Sn', 'PROPERTY: Metal Re',
'PROPERTY: Metal C', 'PROPERTY: Metal Pd', 'PROPERTY: Metal Sc',
'PROPERTY: Metal Y', 'Preprocessing method ANNEAL',
'Preprocessing method CAST', 'Preprocessing method OTHER',
'Preprocessing method POWDER', 'Preprocessing method WROUGHT',
'BCC/FCC/other BCC', 'BCC/FCC/other FCC', 'BCC/FCC/other OTHER',
'Single/Multiphase ', 'Single/Multiphase M', 'Single/Multiphase S']
def return_feature_names():
return feature_names
def normalize_and_alphabetize_formula(formula):
'''Normalizes composition labels. Used to enable matching / groupby on compositions.'''
if formula:
try:
comp = Composition(formula)
weights = [comp.get_atomic_fraction(ele) for ele in comp.elements]
normalized_weights = [round(w/max(weights), 3) for w in weights]
normalized_comp = "".join([str(x)+str(y) for x,y in zip(comp.elements, normalized_weights)])
return Composition(normalized_comp).alphabetical_formula
except:
print("INVALID: ", formula)
return None
else:
return None
def calculate_density(formula):
'''Calculates densisty based on Rule of Mixtures (ROM).'''
comp = Composition(formula)
weights = [comp.get_atomic_fraction(e)for e in comp.elements]
vols = np.array([e.molar_volume for e in comp.elements])
atomic_masses = np.array([e.atomic_mass for e in comp.elements])
val = np.sum(weights*atomic_masses) / np.sum(weights*vols)
return round(val, 1)
def calculate_youngs_modulus(formula):
'''Calculates Young Modulus based on Rule of Mixtures (ROM).'''
comp = Composition(formula)
weights = np.array([comp.get_atomic_fraction(e)for e in comp.elements])
vols = np.array([e.molar_volume for e in comp.elements])
ym_vals = []
for e in comp.elements:
if str(e) == 'C': #use diamond form for carbon
ym_vals.append(1050)
elif str(e) == 'B': #use minimum value for Boron Carbide
ym_vals.append(362)
elif str(e) == 'Mo':
ym_vals.append(329)
elif str(e) == 'Co':
ym_vals.append(209)
else:
ym_vals.append(e.youngs_modulus)
#ym_vals = np.array([e.youngs_modulus for e in comp.elements])
ym_vals = np.array(ym_vals)
if None in ym_vals:
print(formula, ym_vals)
return ''
val = np.sum(weights*vols*ym_vals) / np.sum(weights*vols)
return int(round(val, 0))
def interpret(input):
plt.clf()
ex = joblib.load(filename=explainer_filename)
shap_values = ex.shap_values(input)
shap.summary_plot(shap_values[0], input, feature_names=feature_names)
fig = plt.gcf()
return fig, None
def to_categorical_num_classes_microstructure(X, num_classes_one_hot):
return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes microstructure"])
def to_categorical_num_classes_processing(X, num_classes_one_hot):
return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes preprocessing"])
def to_categorical_bcc_fcc_other(X, num_classes_one_hot):
return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes bcc/fcc/other"])
def to_categorical_single_multiphase(X, num_classes_one_hot):
return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes single/multiphase"])
def return_num_classes_one_hot(df):
num_classes_microstructure = len(np.unique(np.asarray(df['PROPERTY: Microstructure'])))
num_classes_processing = len(np.unique(np.asarray(df['PROPERTY: Processing method'])))
num_classes_single_multiphase = len(np.unique(np.asarray(df['PROPERTY: Single/Multiphase'])))
num_classes_bcc_fcc_other = len(np.unique(np.asarray(df['PROPERTY: BCC/FCC/other'])))
return {"Num classes microstructure": num_classes_microstructure,
"Num classes preprocessing": num_classes_processing,
"Num classes single/multiphase": num_classes_single_multiphase,
"Num classes bcc/fcc/other": num_classes_bcc_fcc_other}
def turn_into_one_hot(X, mapping_dict):
one_hot = X
num_classes_one_hot = {'Num classes microstructure': 45, 'Num classes preprocessing': 5,
'Num classes single/multiphase': 3, 'Num classes bcc/fcc/other': 3}
#one_hot["Microstructure One Hot"] = X["PROPERTY: Microstructure"].apply(to_categorical_num_classes_microstructure, num_classes_one_hot=num_classes_one_hot)
one_hot["Processing Method One Hot"] = X["PROPERTY: Processing method"].apply(to_categorical_num_classes_processing,
num_classes_one_hot=num_classes_one_hot)
one_hot["BCC/FCC/other One Hot"] = X["PROPERTY: BCC/FCC/other"].apply(to_categorical_bcc_fcc_other,
num_classes_one_hot=num_classes_one_hot)
one_hot["Single/Multiphase One Hot"] = X["PROPERTY: Single/Multiphase"].apply(to_categorical_single_multiphase,
num_classes_one_hot=num_classes_one_hot)
#flatten_microstructure = one_hot["Microstructure One Hot"].apply(pd.Series)
flatten_processing = one_hot["Processing Method One Hot"].apply(pd.Series)
flatten_bcc_fcc_other = one_hot["BCC/FCC/other One Hot"].apply(pd.Series)
flatten_single_multiphase = one_hot["Single/Multiphase One Hot"].apply(pd.Series)
one_hot.drop(columns=[#"Microstructure One Hot",
"Processing Method One Hot", "BCC/FCC/other One Hot",
"Single/Multiphase One Hot"])
#for column in flatten_microstructure.columns:
# one_hot["Microstructure " + str(
# list(mapping_dict["PROPERTY: Microstructure"].keys())[int(column)])] = flatten_microstructure[int(column)]
for column in flatten_processing.columns:
one_hot["Preprocessing method " + str(list(mapping_dict["PROPERTY: Processing method"].keys())[int(column)])] = flatten_processing[column]
for column in flatten_bcc_fcc_other.columns:
one_hot["BCC/FCC/other " + str(list(mapping_dict["PROPERTY: BCC/FCC/other"].keys())[int(column)])] = flatten_bcc_fcc_other[column]
for column in flatten_single_multiphase.columns:
one_hot["Single/Multiphase " + str(list(mapping_dict["PROPERTY: Single/Multiphase"].keys())[int(column)])] = flatten_single_multiphase[column]
one_hot = one_hot.drop(columns=[#"PROPERTY: Microstructure", "Microstructure One Hot",
"BCC/FCC/other One Hot", "Single/Multiphase One Hot",
"Processing Method One Hot", "PROPERTY: Processing method", "PROPERTY: BCC/FCC/other", "PROPERTY: Single/Multiphase"])
return one_hot