allknowingroger's picture
Duplicate from sandl/private_inverse_design_alloy
a8aa40c
import pandas as pd
import pymatgen as mg
from pymatgen.core.structure import Composition
import numpy as np
import tensorflow as tf
import shap
import joblib
import matplotlib.pyplot as plt
# Explainer path
explainer_filename = "explainer.bz2"
feature_names = ['PROPERTY: Calculated Density (g/cm$^3$)',
'PROPERTY: Calculated Young modulus (GPa)', 'PROPERTY: Metal Al',
'PROPERTY: Metal Co', 'PROPERTY: Metal Fe', 'PROPERTY: Metal Ni',
'PROPERTY: Metal Si', 'PROPERTY: Metal Cr', 'PROPERTY: Metal Nb',
'PROPERTY: Metal Ti', 'PROPERTY: Metal Mn', 'PROPERTY: Metal V',
'PROPERTY: Metal Mo', 'PROPERTY: Metal Cu', 'PROPERTY: Metal Ta',
'PROPERTY: Metal Zr', 'PROPERTY: Metal Hf', 'PROPERTY: Metal W',
'PROPERTY: Metal Zn', 'PROPERTY: Metal Sn', 'PROPERTY: Metal Re',
'PROPERTY: Metal C', 'PROPERTY: Metal Pd', 'PROPERTY: Metal Sc',
'PROPERTY: Metal Y', 'Preprocessing method ANNEAL',
'Preprocessing method CAST', 'Preprocessing method OTHER',
'Preprocessing method POWDER', 'Preprocessing method WROUGHT',
'BCC/FCC/other BCC', 'BCC/FCC/other FCC', 'BCC/FCC/other OTHER',
'Single/Multiphase ', 'Single/Multiphase M', 'Single/Multiphase S']
def return_feature_names():
return feature_names
def normalize_and_alphabetize_formula(formula):
'''Normalizes composition labels. Used to enable matching / groupby on compositions.'''
if formula:
try:
comp = Composition(formula)
weights = [comp.get_atomic_fraction(ele) for ele in comp.elements]
normalized_weights = [round(w/max(weights), 3) for w in weights]
normalized_comp = "".join([str(x)+str(y) for x,y in zip(comp.elements, normalized_weights)])
return Composition(normalized_comp).alphabetical_formula
except:
print("INVALID: ", formula)
return None
else:
return None
def calculate_density(formula):
'''Calculates densisty based on Rule of Mixtures (ROM).'''
comp = Composition(formula)
weights = [comp.get_atomic_fraction(e)for e in comp.elements]
vols = np.array([e.molar_volume for e in comp.elements])
atomic_masses = np.array([e.atomic_mass for e in comp.elements])
val = np.sum(weights*atomic_masses) / np.sum(weights*vols)
return round(val, 1)
def calculate_youngs_modulus(formula):
'''Calculates Young Modulus based on Rule of Mixtures (ROM).'''
comp = Composition(formula)
weights = np.array([comp.get_atomic_fraction(e)for e in comp.elements])
vols = np.array([e.molar_volume for e in comp.elements])
ym_vals = []
for e in comp.elements:
if str(e) == 'C': #use diamond form for carbon
ym_vals.append(1050)
elif str(e) == 'B': #use minimum value for Boron Carbide
ym_vals.append(362)
elif str(e) == 'Mo':
ym_vals.append(329)
elif str(e) == 'Co':
ym_vals.append(209)
else:
ym_vals.append(e.youngs_modulus)
#ym_vals = np.array([e.youngs_modulus for e in comp.elements])
ym_vals = np.array(ym_vals)
if None in ym_vals:
print(formula, ym_vals)
return ''
val = np.sum(weights*vols*ym_vals) / np.sum(weights*vols)
return int(round(val, 0))
def interpret(input):
plt.clf()
ex = joblib.load(filename=explainer_filename)
shap_values = ex.shap_values(input)
shap.summary_plot(shap_values[0], input, feature_names=feature_names)
fig = plt.gcf()
return fig, None
def to_categorical_num_classes_microstructure(X, num_classes_one_hot):
return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes microstructure"])
def to_categorical_num_classes_processing(X, num_classes_one_hot):
return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes preprocessing"])
def to_categorical_bcc_fcc_other(X, num_classes_one_hot):
return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes bcc/fcc/other"])
def to_categorical_single_multiphase(X, num_classes_one_hot):
return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes single/multiphase"])
def return_num_classes_one_hot(df):
num_classes_microstructure = len(np.unique(np.asarray(df['PROPERTY: Microstructure'])))
num_classes_processing = len(np.unique(np.asarray(df['PROPERTY: Processing method'])))
num_classes_single_multiphase = len(np.unique(np.asarray(df['PROPERTY: Single/Multiphase'])))
num_classes_bcc_fcc_other = len(np.unique(np.asarray(df['PROPERTY: BCC/FCC/other'])))
return {"Num classes microstructure": num_classes_microstructure,
"Num classes preprocessing": num_classes_processing,
"Num classes single/multiphase": num_classes_single_multiphase,
"Num classes bcc/fcc/other": num_classes_bcc_fcc_other}
def turn_into_one_hot(X, mapping_dict):
one_hot = X
num_classes_one_hot = {'Num classes microstructure': 45, 'Num classes preprocessing': 5,
'Num classes single/multiphase': 3, 'Num classes bcc/fcc/other': 3}
#one_hot["Microstructure One Hot"] = X["PROPERTY: Microstructure"].apply(to_categorical_num_classes_microstructure, num_classes_one_hot=num_classes_one_hot)
one_hot["Processing Method One Hot"] = X["PROPERTY: Processing method"].apply(to_categorical_num_classes_processing,
num_classes_one_hot=num_classes_one_hot)
one_hot["BCC/FCC/other One Hot"] = X["PROPERTY: BCC/FCC/other"].apply(to_categorical_bcc_fcc_other,
num_classes_one_hot=num_classes_one_hot)
one_hot["Single/Multiphase One Hot"] = X["PROPERTY: Single/Multiphase"].apply(to_categorical_single_multiphase,
num_classes_one_hot=num_classes_one_hot)
#flatten_microstructure = one_hot["Microstructure One Hot"].apply(pd.Series)
flatten_processing = one_hot["Processing Method One Hot"].apply(pd.Series)
flatten_bcc_fcc_other = one_hot["BCC/FCC/other One Hot"].apply(pd.Series)
flatten_single_multiphase = one_hot["Single/Multiphase One Hot"].apply(pd.Series)
one_hot.drop(columns=[#"Microstructure One Hot",
"Processing Method One Hot", "BCC/FCC/other One Hot",
"Single/Multiphase One Hot"])
#for column in flatten_microstructure.columns:
# one_hot["Microstructure " + str(
# list(mapping_dict["PROPERTY: Microstructure"].keys())[int(column)])] = flatten_microstructure[int(column)]
for column in flatten_processing.columns:
one_hot["Preprocessing method " + str(list(mapping_dict["PROPERTY: Processing method"].keys())[int(column)])] = flatten_processing[column]
for column in flatten_bcc_fcc_other.columns:
one_hot["BCC/FCC/other " + str(list(mapping_dict["PROPERTY: BCC/FCC/other"].keys())[int(column)])] = flatten_bcc_fcc_other[column]
for column in flatten_single_multiphase.columns:
one_hot["Single/Multiphase " + str(list(mapping_dict["PROPERTY: Single/Multiphase"].keys())[int(column)])] = flatten_single_multiphase[column]
one_hot = one_hot.drop(columns=[#"PROPERTY: Microstructure", "Microstructure One Hot",
"BCC/FCC/other One Hot", "Single/Multiphase One Hot",
"Processing Method One Hot", "PROPERTY: Processing method", "PROPERTY: BCC/FCC/other", "PROPERTY: Single/Multiphase"])
return one_hot