File size: 7,422 Bytes
5de9b04
 
 
 
 
 
 
 
 
 
1282d04
5de9b04
52b28f3
1ba1cd6
 
 
 
 
 
 
 
 
5de9b04
 
 
52b28f3
5de9b04
c318130
 
 
5de9b04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97c782e
5de9b04
1ba1cd6
5de9b04
 
 
 
 
 
 
1ba1cd6
5de9b04
 
 
 
1ba1cd6
 
 
5de9b04
1ba1cd6
 
 
5de9b04
 
 
 
 
 
 
1ba1cd6
 
 
5de9b04
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import pandas as pd
import pymatgen as mg
from pymatgen.core.structure import Composition
import numpy as np
import tensorflow as tf
import shap
import joblib
import matplotlib.pyplot as plt

# Explainer path
explainer_filename = "explainer.bz2"

feature_names = ['PROPERTY: Calculated Density (g/cm$^3$)',
       'PROPERTY: Calculated Young modulus (GPa)', 'PROPERTY: Metal Al',
       'PROPERTY: Metal Co', 'PROPERTY: Metal Fe', 'PROPERTY: Metal Ni',
       'PROPERTY: Metal Si', 'PROPERTY: Metal Cr', 'PROPERTY: Metal Nb',
       'PROPERTY: Metal Ti', 'PROPERTY: Metal Mn', 'PROPERTY: Metal V',
       'PROPERTY: Metal Mo', 'PROPERTY: Metal Cu', 'PROPERTY: Metal Ta',
       'PROPERTY: Metal Zr', 'PROPERTY: Metal Hf', 'PROPERTY: Metal W',
       'PROPERTY: Metal Zn', 'PROPERTY: Metal Sn', 'PROPERTY: Metal Re',
       'PROPERTY: Metal C', 'PROPERTY: Metal Pd', 'PROPERTY: Metal Sc',
       'PROPERTY: Metal Y', 'Preprocessing method ANNEAL',
       'Preprocessing method CAST', 'Preprocessing method OTHER',
       'Preprocessing method POWDER', 'Preprocessing method WROUGHT',
       'BCC/FCC/other BCC', 'BCC/FCC/other FCC', 'BCC/FCC/other OTHER',
       'Single/Multiphase ', 'Single/Multiphase M', 'Single/Multiphase S']

def return_feature_names():
    return feature_names
    
def normalize_and_alphabetize_formula(formula):
    '''Normalizes composition labels. Used to enable matching / groupby on compositions.'''
    
    if formula:
        try:
            comp = Composition(formula)
            weights = [comp.get_atomic_fraction(ele) for ele in comp.elements]
            normalized_weights = [round(w/max(weights), 3) for w in weights]
            normalized_comp = "".join([str(x)+str(y) for x,y in zip(comp.elements, normalized_weights)])
            
            return Composition(normalized_comp).alphabetical_formula
        except:
            print("INVALID: ", formula)
            return None
    else:
        return None

def calculate_density(formula):
    '''Calculates densisty based on Rule of Mixtures (ROM).'''
    
    comp = Composition(formula)
    
    weights = [comp.get_atomic_fraction(e)for e in comp.elements]
    vols = np.array([e.molar_volume for e in comp.elements])
    atomic_masses = np.array([e.atomic_mass for e in comp.elements])
    
    val = np.sum(weights*atomic_masses) / np.sum(weights*vols)

    return round(val, 1)

def calculate_youngs_modulus(formula):
    '''Calculates Young Modulus based on Rule of Mixtures (ROM).'''
    
    comp = Composition(formula)
    
    weights = np.array([comp.get_atomic_fraction(e)for e in comp.elements])
    vols = np.array([e.molar_volume for e in comp.elements])
    ym_vals = []
    for e in comp.elements:
        if str(e) == 'C': #use diamond form for carbon
            ym_vals.append(1050)
        elif str(e) == 'B': #use minimum value for Boron Carbide
            ym_vals.append(362)
        elif str(e) == 'Mo':
            ym_vals.append(329)
        elif str(e) == 'Co':
            ym_vals.append(209)
        else:
            ym_vals.append(e.youngs_modulus)
            
    #ym_vals = np.array([e.youngs_modulus for e in comp.elements])
    ym_vals = np.array(ym_vals)
    
    if None in ym_vals:
        print(formula, ym_vals)
        return ''
    
    val = np.sum(weights*vols*ym_vals) / np.sum(weights*vols)
    
    return int(round(val, 0))

def interpret(input):
    plt.clf()
    ex = joblib.load(filename=explainer_filename)
    shap_values = ex.shap_values(input)
    shap.summary_plot(shap_values[0], input, feature_names=feature_names)
    fig = plt.gcf()
    return fig, None

def to_categorical_num_classes_microstructure(X, num_classes_one_hot):
    return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes microstructure"])

def to_categorical_num_classes_processing(X, num_classes_one_hot):
    return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes preprocessing"])

def to_categorical_bcc_fcc_other(X, num_classes_one_hot):
    return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes bcc/fcc/other"])

def to_categorical_single_multiphase(X, num_classes_one_hot):
    return tf.keras.utils.to_categorical(X, num_classes_one_hot["Num classes single/multiphase"])

def return_num_classes_one_hot(df):
    num_classes_microstructure = len(np.unique(np.asarray(df['PROPERTY: Microstructure'])))
    num_classes_processing = len(np.unique(np.asarray(df['PROPERTY: Processing method'])))
    num_classes_single_multiphase = len(np.unique(np.asarray(df['PROPERTY: Single/Multiphase'])))
    num_classes_bcc_fcc_other = len(np.unique(np.asarray(df['PROPERTY: BCC/FCC/other'])))
    return {"Num classes microstructure": num_classes_microstructure,
            "Num classes preprocessing": num_classes_processing,
            "Num classes single/multiphase": num_classes_single_multiphase,
            "Num classes bcc/fcc/other": num_classes_bcc_fcc_other}

def turn_into_one_hot(X, mapping_dict):
    one_hot = X
    num_classes_one_hot = {'Num classes microstructure': 45, 'Num classes preprocessing': 5,
                           'Num classes single/multiphase': 3, 'Num classes bcc/fcc/other': 3}
    #one_hot["Microstructure One Hot"] = X["PROPERTY: Microstructure"].apply(to_categorical_num_classes_microstructure, num_classes_one_hot=num_classes_one_hot)
    one_hot["Processing Method One Hot"] = X["PROPERTY: Processing method"].apply(to_categorical_num_classes_processing,
        num_classes_one_hot=num_classes_one_hot)
    one_hot["BCC/FCC/other One Hot"] = X["PROPERTY: BCC/FCC/other"].apply(to_categorical_bcc_fcc_other,
        num_classes_one_hot=num_classes_one_hot)
    one_hot["Single/Multiphase One Hot"] = X["PROPERTY: Single/Multiphase"].apply(to_categorical_single_multiphase,
        num_classes_one_hot=num_classes_one_hot)

    #flatten_microstructure = one_hot["Microstructure One Hot"].apply(pd.Series)
    flatten_processing = one_hot["Processing Method One Hot"].apply(pd.Series)
    flatten_bcc_fcc_other = one_hot["BCC/FCC/other One Hot"].apply(pd.Series)
    flatten_single_multiphase = one_hot["Single/Multiphase One Hot"].apply(pd.Series)

    one_hot.drop(columns=[#"Microstructure One Hot",
        "Processing Method One Hot", "BCC/FCC/other One Hot",
        "Single/Multiphase One Hot"])

    #for column in flatten_microstructure.columns:
       # one_hot["Microstructure " + str(
         #   list(mapping_dict["PROPERTY: Microstructure"].keys())[int(column)])] = flatten_microstructure[int(column)]
    for column in flatten_processing.columns:
        one_hot["Preprocessing method " + str(list(mapping_dict["PROPERTY: Processing method"].keys())[int(column)])] = flatten_processing[column]
    for column in flatten_bcc_fcc_other.columns:
        one_hot["BCC/FCC/other " + str(list(mapping_dict["PROPERTY: BCC/FCC/other"].keys())[int(column)])] = flatten_bcc_fcc_other[column]
    for column in flatten_single_multiphase.columns:
        one_hot["Single/Multiphase " + str(list(mapping_dict["PROPERTY: Single/Multiphase"].keys())[int(column)])] = flatten_single_multiphase[column]
    
    one_hot = one_hot.drop(columns=[#"PROPERTY: Microstructure", "Microstructure One Hot",
        "BCC/FCC/other One Hot", "Single/Multiphase One Hot",
        "Processing Method One Hot", "PROPERTY: Processing method", "PROPERTY: BCC/FCC/other", "PROPERTY: Single/Multiphase"])
    return one_hot