Spaces:
Runtime error
Runtime error
import pandas as pd | |
import pymatgen as mg | |
from pymatgen.core.structure import Composition | |
import numpy as np | |
import tensorflow as tf | |
import shap | |
import joblib | |
import matplotlib.pyplot as plt | |
# Explainer path | |
explainer_filename = "explainer.bz2" | |
feature_names = ['PROPERTY: Calculated Density (g/cm$^3$)', | |
'PROPERTY: Calculated Young modulus (GPa)', 'PROPERTY: Metal Al', | |
'PROPERTY: Metal Co', 'PROPERTY: Metal Fe', 'PROPERTY: Metal Ni', | |
'PROPERTY: Metal Si', 'PROPERTY: Metal Cr', 'PROPERTY: Metal Nb', | |
'PROPERTY: Metal Ti', 'PROPERTY: Metal Mn', 'PROPERTY: Metal V', | |
'PROPERTY: Metal Mo', 'PROPERTY: Metal Cu', 'PROPERTY: Metal Ta', | |
'PROPERTY: Metal Zr', 'PROPERTY: Metal Hf', 'PROPERTY: Metal W', | |
'PROPERTY: Metal Zn', 'PROPERTY: Metal Sn', 'PROPERTY: Metal Re', | |
'PROPERTY: Metal C', 'PROPERTY: Metal Pd', 'PROPERTY: Metal Sc', | |
'PROPERTY: Metal Y', 'Preprocessing method ANNEAL', | |
'Preprocessing method CAST', 'Preprocessing method OTHER', | |
'Preprocessing method POWDER', 'Preprocessing method WROUGHT', | |
'BCC/FCC/other BCC', 'BCC/FCC/other FCC', 'BCC/FCC/other OTHER', | |
'Single/Multiphase ', 'Single/Multiphase M', 'Single/Multiphase S'] | |
def return_feature_names(): | |
return feature_names | |
def normalize_and_alphabetize_formula(formula): | |
'''Normalizes composition labels. Used to enable matching / groupby on compositions.''' | |
if formula: | |
try: | |
comp = Composition(formula) | |
weights = [comp.get_atomic_fraction(ele) for ele in comp.elements] | |
normalized_weights = [round(w/max(weights), 3) for w in weights] | |
normalized_comp = "".join([str(x)+str(y) for x,y in zip(comp.elements, normalized_weights)]) | |
return Composition(normalized_comp).alphabetical_formula | |
except: | |
print("INVALID: ", formula) | |
return None | |
else: | |
return None | |
def calculate_density(formula): | |
'''Calculates densisty based on Rule of Mixtures (ROM).''' | |
comp = Composition(formula) | |
weights = [comp.get_atomic_fraction(e)for e in comp.elements] | |
vols = np.array([e.molar_volume for e in comp.elements]) | |
atomic_masses = np.array([e.atomic_mass for e in comp.elements]) | |
val = np.sum(weights*atomic_masses) / np.sum(weights*vols) | |
return round(val, 1) | |
def calculate_youngs_modulus(formula): | |
'''Calculates Young Modulus based on Rule of Mixtures (ROM).''' | |
comp = Composition(formula) | |
weights = np.array([comp.get_atomic_fraction(e)for e in comp.elements]) | |
vols = np.array([e.molar_volume for e in comp.elements]) | |
ym_vals = [] | |
for e in comp.elements: | |
if str(e) == 'C': #use diamond form for carbon | |
ym_vals.append(1050) | |
elif str(e) == 'B': #use minimum value for Boron Carbide | |
ym_vals.append(362) | |
elif str(e) == 'Mo': | |
ym_vals.append(329) | |
elif str(e) == 'Co': | |
ym_vals.append(209) | |
else: | |
ym_vals.append(e.youngs_modulus) | |
#ym_vals = np.array([e.youngs_modulus for e in comp.elements]) | |
ym_vals = np.array(ym_vals) | |
if None in ym_vals: | |
print(formula, ym_vals) | |
return '' | |
val = np.sum(weights*vols*ym_vals) / np.sum(weights*vols) | |
return int(round(val, 0)) | |
def interpret(input): | |
plt.clf() | |
ex = joblib.load(filename=explainer_filename) | |
shap_values = ex.shap_values(input) | |
shap.summary_plot(shap_values[0], input, feature_names=feature_names) | |
fig = plt.gcf() | |
return fig, None | |
def to_categorical_num_classes_microstructure(X, num_classes_one_hot): | |
return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes microstructure"]) | |
def to_categorical_num_classes_processing(X, num_classes_one_hot): | |
return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes preprocessing"]) | |
def to_categorical_bcc_fcc_other(X, num_classes_one_hot): | |
return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes bcc/fcc/other"]) | |
def to_categorical_single_multiphase(X, num_classes_one_hot): | |
return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes single/multiphase"]) | |
def return_num_classes_one_hot(df): | |
num_classes_microstructure = len(np.unique(np.asarray(df['PROPERTY: Microstructure']))) | |
num_classes_processing = len(np.unique(np.asarray(df['PROPERTY: Processing method']))) | |
num_classes_single_multiphase = len(np.unique(np.asarray(df['PROPERTY: Single/Multiphase']))) | |
num_classes_bcc_fcc_other = len(np.unique(np.asarray(df['PROPERTY: BCC/FCC/other']))) | |
return {"Num classes microstructure": num_classes_microstructure, | |
"Num classes preprocessing": num_classes_processing, | |
"Num classes single/multiphase": num_classes_single_multiphase, | |
"Num classes bcc/fcc/other": num_classes_bcc_fcc_other} | |
def turn_into_one_hot(X, mapping_dict): | |
one_hot = X | |
num_classes_one_hot = {'Num classes microstructure': 45, 'Num classes preprocessing': 5, | |
'Num classes single/multiphase': 3, 'Num classes bcc/fcc/other': 3} | |
#one_hot["Microstructure One Hot"] = X["PROPERTY: Microstructure"].apply(to_categorical_num_classes_microstructure, num_classes_one_hot=num_classes_one_hot) | |
one_hot["Processing Method One Hot"] = X["PROPERTY: Processing method"].apply(to_categorical_num_classes_processing, | |
num_classes_one_hot=num_classes_one_hot) | |
one_hot["BCC/FCC/other One Hot"] = X["PROPERTY: BCC/FCC/other"].apply(to_categorical_bcc_fcc_other, | |
num_classes_one_hot=num_classes_one_hot) | |
one_hot["Single/Multiphase One Hot"] = X["PROPERTY: Single/Multiphase"].apply(to_categorical_single_multiphase, | |
num_classes_one_hot=num_classes_one_hot) | |
#flatten_microstructure = one_hot["Microstructure One Hot"].apply(pd.Series) | |
flatten_processing = one_hot["Processing Method One Hot"].apply(pd.Series) | |
flatten_bcc_fcc_other = one_hot["BCC/FCC/other One Hot"].apply(pd.Series) | |
flatten_single_multiphase = one_hot["Single/Multiphase One Hot"].apply(pd.Series) | |
one_hot.drop(columns=[#"Microstructure One Hot", | |
"Processing Method One Hot", "BCC/FCC/other One Hot", | |
"Single/Multiphase One Hot"]) | |
#for column in flatten_microstructure.columns: | |
# one_hot["Microstructure " + str( | |
# list(mapping_dict["PROPERTY: Microstructure"].keys())[int(column)])] = flatten_microstructure[int(column)] | |
for column in flatten_processing.columns: | |
one_hot["Preprocessing method " + str(list(mapping_dict["PROPERTY: Processing method"].keys())[int(column)])] = flatten_processing[column] | |
for column in flatten_bcc_fcc_other.columns: | |
one_hot["BCC/FCC/other " + str(list(mapping_dict["PROPERTY: BCC/FCC/other"].keys())[int(column)])] = flatten_bcc_fcc_other[column] | |
for column in flatten_single_multiphase.columns: | |
one_hot["Single/Multiphase " + str(list(mapping_dict["PROPERTY: Single/Multiphase"].keys())[int(column)])] = flatten_single_multiphase[column] | |
one_hot = one_hot.drop(columns=[#"PROPERTY: Microstructure", "Microstructure One Hot", | |
"BCC/FCC/other One Hot", "Single/Multiphase One Hot", | |
"Processing Method One Hot", "PROPERTY: Processing method", "PROPERTY: BCC/FCC/other", "PROPERTY: Single/Multiphase"]) | |
return one_hot |