bndl commited on
Commit
480eccd
·
1 Parent(s): 51bf77d

Create utils.py

Browse files
Files changed (1) hide show
  1. utils.py +174 -0
utils.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import pymatgen as mg
3
+ from pymatgen.core.structure import Composition
4
+ import numpy as np
5
+ import tensorflow as tf
6
+ import shap
7
+ import joblib
8
+ import matplotlib.pyplot as plt
9
+
10
+ # Explainer path
11
+ explainer_filename = "explainer.bz2"
12
+
13
+ feature_names = ['PROPERTY: Calculated Density (g/cm$^3$)',
14
+ 'PROPERTY: Calculated Young modulus (GPa)', 'Microstructure B2',
15
+ 'Microstructure B2+BCC', 'Microstructure B2+L12',
16
+ 'Microstructure B2+Laves+Sec.', 'Microstructure B2+Sec.',
17
+ 'Microstructure BCC', 'Microstructure BCC+B2',
18
+ 'Microstructure BCC+B2+FCC', 'Microstructure BCC+B2+FCC+Sec.',
19
+ 'Microstructure BCC+B2+L12', 'Microstructure BCC+B2+Laves',
20
+ 'Microstructure BCC+B2+Sec.', 'Microstructure BCC+BCC',
21
+ 'Microstructure BCC+BCC+HCP', 'Microstructure BCC+BCC+Laves',
22
+ 'Microstructure BCC+BCC+Laves(C14)',
23
+ 'Microstructure BCC+BCC+Laves(C15)', 'Microstructure BCC+FCC',
24
+ 'Microstructure BCC+HCP', 'Microstructure BCC+Laves',
25
+ 'Microstructure BCC+Laves(C14)', 'Microstructure BCC+Laves(C15)',
26
+ 'Microstructure BCC+Laves+Sec.', 'Microstructure BCC+Sec.',
27
+ 'Microstructure FCC', 'Microstructure FCC+B2',
28
+ 'Microstructure FCC+B2+Sec.', 'Microstructure FCC+BCC',
29
+ 'Microstructure FCC+BCC+B2', 'Microstructure FCC+BCC+B2+Sec.',
30
+ 'Microstructure FCC+BCC+BCC', 'Microstructure FCC+BCC+Sec.',
31
+ 'Microstructure FCC+FCC', 'Microstructure FCC+HCP',
32
+ 'Microstructure FCC+HCP+Sec.', 'Microstructure FCC+L12',
33
+ 'Microstructure FCC+L12+B2', 'Microstructure FCC+L12+Sec.',
34
+ 'Microstructure FCC+Laves', 'Microstructure FCC+Laves(C14)',
35
+ 'Microstructure FCC+Laves+Sec.', 'Microstructure FCC+Sec.',
36
+ 'Microstructure L12+B2', 'Microstructure Laves(C14)+Sec.',
37
+ 'Microstructure OTHER', 'Preprocessing method ANNEAL',
38
+ 'Preprocessing method CAST', 'Preprocessing method OTHER',
39
+ 'Preprocessing method POWDER', 'Preprocessing method WROUGHT',
40
+ 'BCC/FCC/other BCC', 'BCC/FCC/other FCC', 'BCC/FCC/other OTHER',
41
+ 'Single/Multiphase ', 'Single/Multiphase M', 'Single/Multiphase S']
42
+
43
+ def normalize_and_alphabetize_formula(formula):
44
+ '''Normalizes composition labels. Used to enable matching / groupby on compositions.'''
45
+
46
+ if formula:
47
+ try:
48
+ comp = Composition(formula)
49
+ weights = [comp.get_atomic_fraction(ele) for ele in comp.elements]
50
+ normalized_weights = [round(w/max(weights), 3) for w in weights]
51
+ normalized_comp = "".join([str(x)+str(y) for x,y in zip(comp.elements, normalized_weights)])
52
+
53
+ return Composition(normalized_comp).alphabetical_formula
54
+ except:
55
+ print("INVALID: ", formula)
56
+ return None
57
+ else:
58
+ return None
59
+
60
+ def calculate_density(formula):
61
+ '''Calculates densisty based on Rule of Mixtures (ROM).'''
62
+
63
+ comp = Composition(formula)
64
+
65
+ weights = [comp.get_atomic_fraction(e)for e in comp.elements]
66
+ vols = np.array([e.molar_volume for e in comp.elements])
67
+ atomic_masses = np.array([e.atomic_mass for e in comp.elements])
68
+
69
+ val = np.sum(weights*atomic_masses) / np.sum(weights*vols)
70
+
71
+ return round(val, 1)
72
+
73
+ def calculate_youngs_modulus(formula):
74
+ '''Calculates Young Modulus based on Rule of Mixtures (ROM).'''
75
+
76
+ comp = Composition(formula)
77
+
78
+ weights = np.array([comp.get_atomic_fraction(e)for e in comp.elements])
79
+ vols = np.array([e.molar_volume for e in comp.elements])
80
+ ym_vals = []
81
+ for e in comp.elements:
82
+ if str(e) == 'C': #use diamond form for carbon
83
+ ym_vals.append(1050)
84
+ elif str(e) == 'B': #use minimum value for Boron Carbide
85
+ ym_vals.append(362)
86
+ elif str(e) == 'Mo':
87
+ ym_vals.append(329)
88
+ elif str(e) == 'Co':
89
+ ym_vals.append(209)
90
+ else:
91
+ ym_vals.append(e.youngs_modulus)
92
+
93
+ #ym_vals = np.array([e.youngs_modulus for e in comp.elements])
94
+ ym_vals = np.array(ym_vals)
95
+
96
+ if None in ym_vals:
97
+ print(formula, ym_vals)
98
+ return ''
99
+
100
+ val = np.sum(weights*vols*ym_vals) / np.sum(weights*vols)
101
+
102
+ return int(round(val, 0))
103
+
104
+ def interpret(input):
105
+ plt.clf()
106
+ ex = joblib.load(filename=explainer_filename)
107
+ shap_values = ex.shap_values(input)
108
+
109
+ plt.figure(figsize=(15, 15))
110
+ plt.subplot(1,2,1)
111
+ shap.summary_plot(shap_values[0], input, show=False, feature_names=feature_names, plot_size=(15, 15))
112
+ plt.subplot(1,2,2)
113
+ shap.summary_plot(shap_values[1], input, show=False, feature_names=feature_names, plot_size=(15, 15))
114
+ plt.tight_layout()
115
+ plt.subplots_adjust(wspace=2.0)
116
+
117
+ fig = plt.gcf()
118
+ return fig
119
+
120
+ def to_categorical_num_classes_microstructure(X, num_classes_one_hot):
121
+ return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes microstructure"])
122
+
123
+ def to_categorical_num_classes_processing(X, num_classes_one_hot):
124
+ return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes preprocessing"])
125
+
126
+ def to_categorical_bcc_fcc_other(X, num_classes_one_hot):
127
+ return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes bcc/fcc/other"])
128
+
129
+ def to_categorical_single_multiphase(X, num_classes_one_hot):
130
+ return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes single/multiphase"])
131
+
132
+ def return_num_classes_one_hot(df):
133
+ num_classes_microstructure = len(np.unique(np.asarray(df['PROPERTY: Microstructure'])))
134
+ num_classes_processing = len(np.unique(np.asarray(df['PROPERTY: Processing method'])))
135
+ num_classes_single_multiphase = len(np.unique(np.asarray(df['PROPERTY: Single/Multiphase'])))
136
+ num_classes_bcc_fcc_other = len(np.unique(np.asarray(df['PROPERTY: BCC/FCC/other'])))
137
+ return {"Num classes microstructure": num_classes_microstructure,
138
+ "Num classes preprocessing": num_classes_processing,
139
+ "Num classes single/multiphase": num_classes_single_multiphase,
140
+ "Num classes bcc/fcc/other": num_classes_bcc_fcc_other}
141
+
142
+ def turn_into_one_hot(X, mapping_dict):
143
+ one_hot = X
144
+ num_classes_one_hot = {'Num classes microstructure': 45, 'Num classes preprocessing': 5,
145
+ 'Num classes single/multiphase': 3, 'Num classes bcc/fcc/other': 3}
146
+ one_hot["Microstructure One Hot"] = X["PROPERTY: Microstructure"].apply(to_categorical_num_classes_microstructure, num_classes_one_hot=num_classes_one_hot)
147
+ one_hot["Processing Method One Hot"] = X["PROPERTY: Processing method"].apply(to_categorical_num_classes_processing,
148
+ num_classes_one_hot=num_classes_one_hot)
149
+ one_hot["BCC/FCC/other One Hot"] = X["PROPERTY: BCC/FCC/other"].apply(to_categorical_bcc_fcc_other,
150
+ num_classes_one_hot=num_classes_one_hot)
151
+ one_hot["Single/Multiphase One Hot"] = X["PROPERTY: Single/Multiphase"].apply(to_categorical_single_multiphase,
152
+ num_classes_one_hot=num_classes_one_hot)
153
+
154
+ flatten_microstructure = one_hot["Microstructure One Hot"].apply(pd.Series)
155
+ flatten_processing = one_hot["Processing Method One Hot"].apply(pd.Series)
156
+ flatten_bcc_fcc_other = one_hot["BCC/FCC/other One Hot"].apply(pd.Series)
157
+ flatten_single_multiphase = one_hot["Single/Multiphase One Hot"].apply(pd.Series)
158
+
159
+ one_hot.drop(columns=["Microstructure One Hot", "Processing Method One Hot", "BCC/FCC/other One Hot",
160
+ "Single/Multiphase One Hot"])
161
+
162
+ for column in flatten_microstructure.columns:
163
+ one_hot["Microstructure " + str(
164
+ list(mapping_dict["PROPERTY: Microstructure"].keys())[int(column)])] = flatten_microstructure[int(column)]
165
+ for column in flatten_processing.columns:
166
+ one_hot["Preprocessing method " + str(list(mapping_dict["PROPERTY: Processing method"].keys())[int(column)])] = flatten_processing[column]
167
+ for column in flatten_bcc_fcc_other.columns:
168
+ one_hot["BCC/FCC/other " + str(list(mapping_dict["PROPERTY: BCC/FCC/other"].keys())[int(column)])] = flatten_bcc_fcc_other[column]
169
+ for column in flatten_single_multiphase.columns:
170
+ one_hot["Single/Multiphase " + str(list(mapping_dict["PROPERTY: Single/Multiphase"].keys())[int(column)])] = flatten_single_multiphase[column]
171
+
172
+ one_hot = one_hot.drop(columns=["PROPERTY: Microstructure", "Microstructure One Hot", "BCC/FCC/other One Hot", "Single/Multiphase One Hot",
173
+ "Processing Method One Hot", "PROPERTY: Processing method", "PROPERTY: BCC/FCC/other", "PROPERTY: Single/Multiphase"])
174
+ return one_hot