bndl commited on
Commit
51a84f1
·
1 Parent(s): edf1058

Upload alloy_data_preprocessing.py

Browse files
Files changed (1) hide show
  1. alloy_data_preprocessing.py +177 -0
alloy_data_preprocessing.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pymatgen as mg
2
+ import pandas as pd
3
+ import numpy as np
4
+ from pymatgen.core.structure import Composition
5
+
6
+
7
+ def calculate_density(comp):
8
+ """Calculates densisty based on Rule of Mixtures (ROM)."""
9
+
10
+ # comp = Composition(formula)
11
+
12
+ weights = [comp.get_atomic_fraction(e) for e in comp.elements]
13
+ vols = np.array([e.molar_volume for e in comp.elements])
14
+ atomic_masses = np.array([e.atomic_mass for e in comp.elements])
15
+
16
+ val = np.sum(weights * atomic_masses) / np.sum(weights * vols)
17
+
18
+ return round(val, 1)
19
+
20
+
21
+ def calculate_young_modulus(comp):
22
+ """Calculates Young Modulus based on Rule of Mixtures (ROM)."""
23
+
24
+ # comp = Composition(formula)
25
+
26
+ weights = np.array([comp.get_atomic_fraction(e) for e in comp.elements])
27
+ vols = np.array([e.molar_volume for e in comp.elements])
28
+ ym_vals = []
29
+ for e in comp.elements:
30
+ if str(e) == "C": # use diamond form for carbon
31
+ ym_vals.append(1050)
32
+ elif str(e) == "B": # use minimum value for Boron Carbide
33
+ ym_vals.append(362)
34
+ elif str(e) == "Mo":
35
+ ym_vals.append(329)
36
+ elif str(e) == "Co":
37
+ ym_vals.append(209)
38
+ else:
39
+ ym_vals.append(e.youngs_modulus)
40
+
41
+ # ym_vals = np.array([e.youngs_modulus for e in comp.elements])
42
+ ym_vals = np.array(ym_vals)
43
+
44
+ if None in ym_vals:
45
+ print(comp, ym_vals)
46
+ return ""
47
+
48
+ val = np.sum(weights * vols * ym_vals) / np.sum(weights * vols)
49
+ if val is np.nan:
50
+ val = 0
51
+
52
+ return int(round(val, 0))
53
+
54
+
55
+ def calculate_electronegativity(comp):
56
+ return comp.average_electroneg
57
+
58
+
59
+ def create_composition(comp_df):
60
+ ls_comp = comp_df.to_dict("records")
61
+ res = []
62
+ for comp_dict in ls_comp:
63
+ elem_fill = np.sum([comp_dict[e] for e in comp_dict])
64
+ comp_dict["Fe"] = 100 - elem_fill
65
+
66
+ # print(comp_dict)
67
+ compo = Composition.from_weight_dict(comp_dict)
68
+ res.append(compo)
69
+
70
+ comp_df["composition"] = res
71
+ return comp_df
72
+
73
+
74
+ def calculate_electronegativity(comp):
75
+ return comp.average_electroneg
76
+
77
+
78
+ def calculate_valence_electron_concentration(comp):
79
+ """
80
+ Using the formuma from https://www.sciencedirect.com/science/article/pii/S0927025622000015#s0100
81
+ VEC = Sum(j=1 to N)C(j)VEC(j)
82
+ where N is the number of alloying elements, C(j) and VEC(j) are the atomic percentage and the valence electron number of element j
83
+ """
84
+
85
+ weights = np.array([comp.get_atomic_fraction(e) for e in comp.elements])
86
+
87
+ val_ls = []
88
+ for e in comp.elements:
89
+ if str(e) == "Cr":
90
+ val_ls.append(6)
91
+ elif str(e) == "Mo":
92
+ # For Mo valence electron can vary from 2 to 6
93
+ val_ls.append(4)
94
+ else:
95
+ val_ls.append(e.valence[1])
96
+ val_ls = np.array(val_ls)
97
+
98
+ vec = np.sum(weights * val_ls)
99
+
100
+ return vec
101
+
102
+
103
+ def calculate_configuration_entropy(comp):
104
+ """
105
+ Using the formuma from https://www.sciencedirect.com/science/article/pii/S0927025622000015#s0100
106
+ VEC = -R*Sum(j=1 to N)C(j)ln(C(j))
107
+ where N is the number of alloying elements, C(j) is the atomic percentage element j and R is the gas constant
108
+ The gas constant is omitted for now
109
+ """
110
+
111
+ weights = np.array([comp.get_atomic_fraction(e) for e in comp.elements])
112
+
113
+ ent = np.sum(weights * np.log(weights))
114
+
115
+ return ent
116
+
117
+
118
+ def add_physics_features(df):
119
+ """
120
+ Adds the density and young modulus as additional columns
121
+ elem_df: pd.DataFrame containing the proportion of each elements
122
+
123
+ """
124
+ mapping = {"%C": "C", "%Co": "Co", "%Cr": "Cr", "%V": "V", "%Mo": "Mo", "%W": "W"}
125
+ if type(df) != pd.DataFrame:
126
+ # Fix for the case where the input df is not a dataframe but an array
127
+ print(df.shape)
128
+ if df.shape[1] < 10:
129
+ cols = ["%C", "%Co", "%Cr", "%V", "%Mo", "%W", "Temperature_C"]
130
+ else:
131
+ cols = [
132
+ "%C",
133
+ "%Co",
134
+ "%Cr",
135
+ "%V",
136
+ "%Mo",
137
+ "%W",
138
+ "M6C",
139
+ "M23C6",
140
+ "FCCA1#2",
141
+ "M2C",
142
+ "MC - SHP",
143
+ "MC ETA",
144
+ "%C matrice",
145
+ "%Co matrice",
146
+ "%Cr matrice",
147
+ "%V matrice",
148
+ "%Mo matrice",
149
+ "%W matrice",
150
+ "Temperature_C",
151
+ ]
152
+ df = pd.DataFrame(df, columns=cols)
153
+ print(df.shape)
154
+ elem_df = df[mapping.keys()]
155
+ elem_df.rename(columns=mapping, inplace=True)
156
+ elem_df["Fe"] = 100 - elem_df.sum(axis=1)
157
+ df_w_compo = create_composition(elem_df)
158
+
159
+ df["density"] = np.vectorize(calculate_density)(df_w_compo["composition"])
160
+ df["young_modulus"] = np.vectorize(calculate_young_modulus)(df_w_compo["composition"])
161
+ df["electronegativity"] = np.vectorize(calculate_electronegativity)(df_w_compo["composition"])
162
+ df["valence_electron_concentration"] = np.vectorize(calculate_valence_electron_concentration)(
163
+ df_w_compo["composition"]
164
+ )
165
+ df["configuration_entropy"] = np.vectorize(calculate_configuration_entropy)(df_w_compo["composition"])
166
+ return df
167
+
168
+
169
+ if __name__ == "__main__":
170
+ df = pd.DataFrame([[0.3, 5, 3.9, 2.1, 5, 1.2]], columns=["%C", "%Co", "%Cr", "%V", "%Mo", "%W"])
171
+ df = pd.DataFrame([[0.3, 5, 3.9, 2.1, 5, 1.2]], columns=["C", "Co", "Cr", "V", "Mo", "W"])
172
+ df = pd.DataFrame([[7, 38]], columns=["Al", "Ni"]) # Debug density issue on gradio demo
173
+
174
+ # add_physics_features(df)
175
+ df = create_composition(df)
176
+ val = calculate_density(df["composition"].iloc[0])
177
+ print(val)