bndl commited on
Commit
5de9b04
·
1 Parent(s): fb66bc7

Create utils.py

Browse files
Files changed (1) hide show
  1. utils.py +204 -0
utils.py ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import pymatgen as mg
3
+ from pymatgen.core.structure import Composition
4
+ import numpy as np
5
+ import tensorflow as tf
6
+ import shap
7
+ import joblib
8
+ import matplotlib.pyplot as plt
9
+
10
+ # Explainer path
11
+ explainer_filename = "models/explainer_old.bz2"
12
+
13
+ feature_names = ['PROPERTY: BCC/FCC/other', 'PROPERTY: Calculated Density (g/cm$^3$)',
14
+ 'PROPERTY: Calculated Young modulus (GPa)',
15
+ 'PROPERTY: Processing method', 'PROPERTY: Microstructure',
16
+ 'PROPERTY: Single/Multiphase', 'Microstructure One Hot',
17
+ 'Processing Method One Hot', 'BCC/FCC/other One Hot',
18
+ 'Single/Multiphase One Hot', 'Microstructure B2',
19
+ 'Microstructure B2+BCC', 'Microstructure B2+L12',
20
+ 'Microstructure B2+Laves+Sec.', 'Microstructure B2+Sec.',
21
+ 'Microstructure BCC', 'Microstructure BCC+B2',
22
+ 'Microstructure BCC+B2+FCC', 'Microstructure BCC+B2+FCC+Sec.',
23
+ 'Microstructure BCC+B2+L12', 'Microstructure BCC+B2+Laves',
24
+ 'Microstructure BCC+B2+Sec.', 'Microstructure BCC+BCC',
25
+ 'Microstructure BCC+BCC+HCP', 'Microstructure BCC+BCC+Laves',
26
+ 'Microstructure BCC+BCC+Laves(C14)',
27
+ 'Microstructure BCC+BCC+Laves(C15)', 'Microstructure BCC+FCC',
28
+ 'Microstructure BCC+HCP', 'Microstructure BCC+Laves',
29
+ 'Microstructure BCC+Laves(C14)', 'Microstructure BCC+Laves(C15)',
30
+ 'Microstructure BCC+Laves+Sec.', 'Microstructure BCC+Sec.',
31
+ 'Microstructure FCC', 'Microstructure FCC+B2',
32
+ 'Microstructure FCC+B2+Sec.', 'Microstructure FCC+BCC',
33
+ 'Microstructure FCC+BCC+B2', 'Microstructure FCC+BCC+B2+Sec.',
34
+ 'Microstructure FCC+BCC+BCC', 'Microstructure FCC+BCC+Sec.',
35
+ 'Microstructure FCC+FCC', 'Microstructure FCC+HCP',
36
+ 'Microstructure FCC+HCP+Sec.', 'Microstructure FCC+L12',
37
+ 'Microstructure FCC+L12+B2', 'Microstructure FCC+L12+Sec.',
38
+ 'Microstructure FCC+Laves', 'Microstructure FCC+Laves(C14)',
39
+ 'Microstructure FCC+Laves+Sec.', 'Microstructure FCC+Sec.',
40
+ 'Microstructure L12+B2', 'Microstructure Laves(C14)+Sec.',
41
+ 'Microstructure OTHER', 'Preprocessing method ANNEAL',
42
+ 'Preprocessing method CAST', 'Preprocessing method OTHER',
43
+ 'Preprocessing method POWDER', 'Preprocessing method WROUGHT',
44
+ 'BCC/FCC/other BCC', 'BCC/FCC/other FCC', 'BCC/FCC/other OTHER',
45
+ 'Single/Multiphase ', 'Single/Multiphase M', 'Single/Multiphase S']
46
+
47
+ def normalize_and_alphabetize_formula(formula):
48
+ '''Normalizes composition labels. Used to enable matching / groupby on compositions.'''
49
+
50
+ if formula:
51
+ try:
52
+ comp = Composition(formula)
53
+ weights = [comp.get_atomic_fraction(ele) for ele in comp.elements]
54
+ normalized_weights = [round(w/max(weights), 3) for w in weights]
55
+ normalized_comp = "".join([str(x)+str(y) for x,y in zip(comp.elements, normalized_weights)])
56
+
57
+ return Composition(normalized_comp).alphabetical_formula
58
+ except:
59
+ print("INVALID: ", formula)
60
+ return None
61
+ else:
62
+ return None
63
+
64
+ def calculate_density(formula):
65
+ '''Calculates densisty based on Rule of Mixtures (ROM).'''
66
+
67
+ comp = Composition(formula)
68
+
69
+ weights = [comp.get_atomic_fraction(e)for e in comp.elements]
70
+ vols = np.array([e.molar_volume for e in comp.elements])
71
+ atomic_masses = np.array([e.atomic_mass for e in comp.elements])
72
+
73
+ val = np.sum(weights*atomic_masses) / np.sum(weights*vols)
74
+
75
+ return round(val, 1)
76
+
77
+ def calculate_youngs_modulus(formula):
78
+ '''Calculates Young Modulus based on Rule of Mixtures (ROM).'''
79
+
80
+ comp = Composition(formula)
81
+
82
+ weights = np.array([comp.get_atomic_fraction(e)for e in comp.elements])
83
+ vols = np.array([e.molar_volume for e in comp.elements])
84
+ ym_vals = []
85
+ for e in comp.elements:
86
+ if str(e) == 'C': #use diamond form for carbon
87
+ ym_vals.append(1050)
88
+ elif str(e) == 'B': #use minimum value for Boron Carbide
89
+ ym_vals.append(362)
90
+ elif str(e) == 'Mo':
91
+ ym_vals.append(329)
92
+ elif str(e) == 'Co':
93
+ ym_vals.append(209)
94
+ else:
95
+ ym_vals.append(e.youngs_modulus)
96
+
97
+ #ym_vals = np.array([e.youngs_modulus for e in comp.elements])
98
+ ym_vals = np.array(ym_vals)
99
+
100
+ if None in ym_vals:
101
+ print(formula, ym_vals)
102
+ return ''
103
+
104
+ val = np.sum(weights*vols*ym_vals) / np.sum(weights*vols)
105
+
106
+ return int(round(val, 0))
107
+
108
+ def interpret(input):
109
+ plt.clf()
110
+ ex = joblib.load(filename=explainer_filename)
111
+ shap_values = ex.shap_values(input)
112
+ shap.summary_plot(shap_values[0], input, feature_names=feature_names)
113
+ fig = plt.gcf()
114
+ return fig, None
115
+
116
+ def to_categorical_num_classes_microstructure(X, num_classes_one_hot):
117
+ return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes microstructure"])
118
+
119
+ def to_categorical_num_classes_processing(X, num_classes_one_hot):
120
+ return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes preprocessing"])
121
+
122
+ def to_categorical_bcc_fcc_other(X, num_classes_one_hot):
123
+ return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes bcc/fcc/other"])
124
+
125
+ def to_categorical_single_multiphase(X, num_classes_one_hot):
126
+ return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes single/multiphase"])
127
+
128
+ def return_num_classes_one_hot(df):
129
+ num_classes_microstructure = len(np.unique(np.asarray(df['PROPERTY: Microstructure'])))
130
+ num_classes_processing = len(np.unique(np.asarray(df['PROPERTY: Processing method'])))
131
+ num_classes_single_multiphase = len(np.unique(np.asarray(df['PROPERTY: Single/Multiphase'])))
132
+ num_classes_bcc_fcc_other = len(np.unique(np.asarray(df['PROPERTY: BCC/FCC/other'])))
133
+ return {"Num classes microstructure": num_classes_microstructure,
134
+ "Num classes preprocessing": num_classes_processing,
135
+ "Num classes single/multiphase": num_classes_single_multiphase,
136
+ "Num classes bcc/fcc/other": num_classes_bcc_fcc_other}
137
+
138
+ # def turn_into_one_hot(X, mapping_dict):
139
+ # one_hot = X
140
+ # num_classes_one_hot = {'Num classes microstructure': 45, 'Num classes preprocessing': 5,
141
+ # 'Num classes single/multiphase': 3, 'Num classes bcc/fcc/other': 3}
142
+ # one_hot["Microstructure One Hot"] = X["PROPERTY: Microstructure"].apply(to_categorical_num_classes_microstructure, num_classes_one_hot=num_classes_one_hot)
143
+ # one_hot["Processing Method One Hot"] = X["PROPERTY: Processing method"].apply(to_categorical_num_classes_processing,
144
+ # num_classes_one_hot=num_classes_one_hot)
145
+ # one_hot["BCC/FCC/other One Hot"] = X["PROPERTY: BCC/FCC/other"].apply(to_categorical_bcc_fcc_other,
146
+ # num_classes_one_hot=num_classes_one_hot)
147
+ # one_hot["Single/Multiphase One Hot"] = X["PROPERTY: Single/Multiphase"].apply(to_categorical_single_multiphase,
148
+ # num_classes_one_hot=num_classes_one_hot)
149
+
150
+ # flatten_microstructure = one_hot["Microstructure One Hot"].apply(pd.Series)
151
+ # flatten_processing = one_hot["Processing Method One Hot"].apply(pd.Series)
152
+ # flatten_bcc_fcc_other = one_hot["BCC/FCC/other One Hot"].apply(pd.Series)
153
+ # flatten_single_multiphase = one_hot["Single/Multiphase One Hot"].apply(pd.Series)
154
+
155
+ # one_hot.drop(columns=["Microstructure One Hot", "Processing Method One Hot", "BCC/FCC/other One Hot",
156
+ # "Single/Multiphase One Hot"])
157
+
158
+ # for column in flatten_microstructure.columns:
159
+ # one_hot["Microstructure " + str(
160
+ # list(mapping_dict["PROPERTY: Microstructure"].keys())[int(column)])] = flatten_microstructure[int(column)]
161
+ # for column in flatten_processing.columns:
162
+ # one_hot["Preprocessing method " + str(list(mapping_dict["PROPERTY: Processing method"].keys())[int(column)])] = flatten_processing[column]
163
+ # for column in flatten_bcc_fcc_other.columns:
164
+ # one_hot["BCC/FCC/other " + str(list(mapping_dict["PROPERTY: BCC/FCC/other"].keys())[int(column)])] = flatten_bcc_fcc_other[column]
165
+ # for column in flatten_single_multiphase.columns:
166
+ # one_hot["Single/Multiphase " + str(list(mapping_dict["PROPERTY: Single/Multiphase"].keys())[int(column)])] = flatten_single_multiphase[column]
167
+
168
+ # one_hot = one_hot.drop(columns=["PROPERTY: Microstructure", "Microstructure One Hot", "BCC/FCC/other One Hot", "Single/Multiphase One Hot",
169
+ # "Processing Method One Hot", "PROPERTY: Processing method", "PROPERTY: BCC/FCC/other", "PROPERTY: Single/Multiphase"])
170
+ # return one_hot
171
+
172
+
173
+ def turn_into_one_hot(X, mapping_dict):
174
+ one_hot = X
175
+ num_classes_one_hot = {'Num classes microstructure': 30, 'Num classes preprocessing': 5,
176
+ 'Num classes single/multiphase': 3, 'Num classes bcc/fcc/other': 3}
177
+ one_hot["Microstructure One Hot"] = X["PROPERTY: Microstructure"].apply(to_categorical_num_classes_microstructure, num_classes_one_hot=num_classes_one_hot)
178
+ one_hot["Processing Method One Hot"] = X["PROPERTY: Processing method"].apply(to_categorical_num_classes_processing,
179
+ num_classes_one_hot=num_classes_one_hot)
180
+ one_hot["BCC/FCC/other One Hot"] = X["PROPERTY: BCC/FCC/other"].apply(to_categorical_bcc_fcc_other,
181
+ num_classes_one_hot=num_classes_one_hot)
182
+ one_hot["Single/Multiphase One Hot"] = X["PROPERTY: Single/Multiphase"].apply(to_categorical_single_multiphase,
183
+ num_classes_one_hot=num_classes_one_hot)
184
+
185
+ flatten_microstructure = one_hot["Microstructure One Hot"].apply(pd.Series)
186
+ flatten_processing = one_hot["Processing Method One Hot"].apply(pd.Series)
187
+ flatten_bcc_fcc_other = one_hot["BCC/FCC/other One Hot"].apply(pd.Series)
188
+ flatten_single_multiphase = one_hot["Single/Multiphase One Hot"].apply(pd.Series)
189
+
190
+ one_hot.drop(columns=["Microstructure One Hot", "Processing Method One Hot", "BCC/FCC/other One Hot",
191
+ "Single/Multiphase One Hot"])
192
+
193
+ for column in flatten_microstructure.columns:
194
+ one_hot["Microstructure " + str(
195
+ list(mapping_dict["PROPERTY: Microstructure"].keys())[int(column)])] = flatten_microstructure[int(column)]
196
+ for column in flatten_processing.columns:
197
+ one_hot["Preprocessing method " + str(list(mapping_dict["PROPERTY: Processing method"].keys())[int(column)])] = flatten_processing[column]
198
+ for column in flatten_bcc_fcc_other.columns:
199
+ one_hot["BCC/FCC/other " + str(list(mapping_dict["PROPERTY: BCC/FCC/other"].keys())[int(column)])] = flatten_bcc_fcc_other[column]
200
+ for column in flatten_single_multiphase.columns:
201
+ one_hot["Single/Multiphase " + str(list(mapping_dict["PROPERTY: Single/Multiphase"].keys())[int(column)])] = flatten_single_multiphase[column]
202
+
203
+ one_hot = one_hot.drop(columns=["PROPERTY: Microstructure", "Microstructure One Hot", "BCC/FCC/other One Hot", "Single/Multiphase One Hot", "Processing Method One Hot", "PROPERTY: Processing method", "PROPERTY: BCC/FCC/other", "PROPERTY: Single/Multiphase"])
204
+ return one_hot