Spaces:
Runtime error
Runtime error
File size: 7,422 Bytes
a8aa40c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
import pandas as pd
import pymatgen as mg
from pymatgen.core.structure import Composition
import numpy as np
import tensorflow as tf
import shap
import joblib
import matplotlib.pyplot as plt
# Explainer path
explainer_filename = "explainer.bz2"
feature_names = ['PROPERTY: Calculated Density (g/cm$^3$)',
'PROPERTY: Calculated Young modulus (GPa)', 'PROPERTY: Metal Al',
'PROPERTY: Metal Co', 'PROPERTY: Metal Fe', 'PROPERTY: Metal Ni',
'PROPERTY: Metal Si', 'PROPERTY: Metal Cr', 'PROPERTY: Metal Nb',
'PROPERTY: Metal Ti', 'PROPERTY: Metal Mn', 'PROPERTY: Metal V',
'PROPERTY: Metal Mo', 'PROPERTY: Metal Cu', 'PROPERTY: Metal Ta',
'PROPERTY: Metal Zr', 'PROPERTY: Metal Hf', 'PROPERTY: Metal W',
'PROPERTY: Metal Zn', 'PROPERTY: Metal Sn', 'PROPERTY: Metal Re',
'PROPERTY: Metal C', 'PROPERTY: Metal Pd', 'PROPERTY: Metal Sc',
'PROPERTY: Metal Y', 'Preprocessing method ANNEAL',
'Preprocessing method CAST', 'Preprocessing method OTHER',
'Preprocessing method POWDER', 'Preprocessing method WROUGHT',
'BCC/FCC/other BCC', 'BCC/FCC/other FCC', 'BCC/FCC/other OTHER',
'Single/Multiphase ', 'Single/Multiphase M', 'Single/Multiphase S']
def return_feature_names():
return feature_names
def normalize_and_alphabetize_formula(formula):
'''Normalizes composition labels. Used to enable matching / groupby on compositions.'''
if formula:
try:
comp = Composition(formula)
weights = [comp.get_atomic_fraction(ele) for ele in comp.elements]
normalized_weights = [round(w/max(weights), 3) for w in weights]
normalized_comp = "".join([str(x)+str(y) for x,y in zip(comp.elements, normalized_weights)])
return Composition(normalized_comp).alphabetical_formula
except:
print("INVALID: ", formula)
return None
else:
return None
def calculate_density(formula):
'''Calculates densisty based on Rule of Mixtures (ROM).'''
comp = Composition(formula)
weights = [comp.get_atomic_fraction(e)for e in comp.elements]
vols = np.array([e.molar_volume for e in comp.elements])
atomic_masses = np.array([e.atomic_mass for e in comp.elements])
val = np.sum(weights*atomic_masses) / np.sum(weights*vols)
return round(val, 1)
def calculate_youngs_modulus(formula):
'''Calculates Young Modulus based on Rule of Mixtures (ROM).'''
comp = Composition(formula)
weights = np.array([comp.get_atomic_fraction(e)for e in comp.elements])
vols = np.array([e.molar_volume for e in comp.elements])
ym_vals = []
for e in comp.elements:
if str(e) == 'C': #use diamond form for carbon
ym_vals.append(1050)
elif str(e) == 'B': #use minimum value for Boron Carbide
ym_vals.append(362)
elif str(e) == 'Mo':
ym_vals.append(329)
elif str(e) == 'Co':
ym_vals.append(209)
else:
ym_vals.append(e.youngs_modulus)
#ym_vals = np.array([e.youngs_modulus for e in comp.elements])
ym_vals = np.array(ym_vals)
if None in ym_vals:
print(formula, ym_vals)
return ''
val = np.sum(weights*vols*ym_vals) / np.sum(weights*vols)
return int(round(val, 0))
def interpret(input):
plt.clf()
ex = joblib.load(filename=explainer_filename)
shap_values = ex.shap_values(input)
shap.summary_plot(shap_values[0], input, feature_names=feature_names)
fig = plt.gcf()
return fig, None
def to_categorical_num_classes_microstructure(X, num_classes_one_hot):
return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes microstructure"])
def to_categorical_num_classes_processing(X, num_classes_one_hot):
return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes preprocessing"])
def to_categorical_bcc_fcc_other(X, num_classes_one_hot):
return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes bcc/fcc/other"])
def to_categorical_single_multiphase(X, num_classes_one_hot):
return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes single/multiphase"])
def return_num_classes_one_hot(df):
num_classes_microstructure = len(np.unique(np.asarray(df['PROPERTY: Microstructure'])))
num_classes_processing = len(np.unique(np.asarray(df['PROPERTY: Processing method'])))
num_classes_single_multiphase = len(np.unique(np.asarray(df['PROPERTY: Single/Multiphase'])))
num_classes_bcc_fcc_other = len(np.unique(np.asarray(df['PROPERTY: BCC/FCC/other'])))
return {"Num classes microstructure": num_classes_microstructure,
"Num classes preprocessing": num_classes_processing,
"Num classes single/multiphase": num_classes_single_multiphase,
"Num classes bcc/fcc/other": num_classes_bcc_fcc_other}
def turn_into_one_hot(X, mapping_dict):
one_hot = X
num_classes_one_hot = {'Num classes microstructure': 45, 'Num classes preprocessing': 5,
'Num classes single/multiphase': 3, 'Num classes bcc/fcc/other': 3}
#one_hot["Microstructure One Hot"] = X["PROPERTY: Microstructure"].apply(to_categorical_num_classes_microstructure, num_classes_one_hot=num_classes_one_hot)
one_hot["Processing Method One Hot"] = X["PROPERTY: Processing method"].apply(to_categorical_num_classes_processing,
num_classes_one_hot=num_classes_one_hot)
one_hot["BCC/FCC/other One Hot"] = X["PROPERTY: BCC/FCC/other"].apply(to_categorical_bcc_fcc_other,
num_classes_one_hot=num_classes_one_hot)
one_hot["Single/Multiphase One Hot"] = X["PROPERTY: Single/Multiphase"].apply(to_categorical_single_multiphase,
num_classes_one_hot=num_classes_one_hot)
#flatten_microstructure = one_hot["Microstructure One Hot"].apply(pd.Series)
flatten_processing = one_hot["Processing Method One Hot"].apply(pd.Series)
flatten_bcc_fcc_other = one_hot["BCC/FCC/other One Hot"].apply(pd.Series)
flatten_single_multiphase = one_hot["Single/Multiphase One Hot"].apply(pd.Series)
one_hot.drop(columns=[#"Microstructure One Hot",
"Processing Method One Hot", "BCC/FCC/other One Hot",
"Single/Multiphase One Hot"])
#for column in flatten_microstructure.columns:
# one_hot["Microstructure " + str(
# list(mapping_dict["PROPERTY: Microstructure"].keys())[int(column)])] = flatten_microstructure[int(column)]
for column in flatten_processing.columns:
one_hot["Preprocessing method " + str(list(mapping_dict["PROPERTY: Processing method"].keys())[int(column)])] = flatten_processing[column]
for column in flatten_bcc_fcc_other.columns:
one_hot["BCC/FCC/other " + str(list(mapping_dict["PROPERTY: BCC/FCC/other"].keys())[int(column)])] = flatten_bcc_fcc_other[column]
for column in flatten_single_multiphase.columns:
one_hot["Single/Multiphase " + str(list(mapping_dict["PROPERTY: Single/Multiphase"].keys())[int(column)])] = flatten_single_multiphase[column]
one_hot = one_hot.drop(columns=[#"PROPERTY: Microstructure", "Microstructure One Hot",
"BCC/FCC/other One Hot", "Single/Multiphase One Hot",
"Processing Method One Hot", "PROPERTY: Processing method", "PROPERTY: BCC/FCC/other", "PROPERTY: Single/Multiphase"])
return one_hot |