demo_active_learning / alloy_data_preprocessing.py
bndl's picture
Upload alloy_data_preprocessing.py
51a84f1
raw
history blame
5.62 kB
import pymatgen as mg
import pandas as pd
import numpy as np
from pymatgen.core.structure import Composition
def calculate_density(comp):
"""Calculates densisty based on Rule of Mixtures (ROM)."""
# comp = Composition(formula)
weights = [comp.get_atomic_fraction(e) for e in comp.elements]
vols = np.array([e.molar_volume for e in comp.elements])
atomic_masses = np.array([e.atomic_mass for e in comp.elements])
val = np.sum(weights * atomic_masses) / np.sum(weights * vols)
return round(val, 1)
def calculate_young_modulus(comp):
"""Calculates Young Modulus based on Rule of Mixtures (ROM)."""
# comp = Composition(formula)
weights = np.array([comp.get_atomic_fraction(e) for e in comp.elements])
vols = np.array([e.molar_volume for e in comp.elements])
ym_vals = []
for e in comp.elements:
if str(e) == "C": # use diamond form for carbon
ym_vals.append(1050)
elif str(e) == "B": # use minimum value for Boron Carbide
ym_vals.append(362)
elif str(e) == "Mo":
ym_vals.append(329)
elif str(e) == "Co":
ym_vals.append(209)
else:
ym_vals.append(e.youngs_modulus)
# ym_vals = np.array([e.youngs_modulus for e in comp.elements])
ym_vals = np.array(ym_vals)
if None in ym_vals:
print(comp, ym_vals)
return ""
val = np.sum(weights * vols * ym_vals) / np.sum(weights * vols)
if val is np.nan:
val = 0
return int(round(val, 0))
def calculate_electronegativity(comp):
return comp.average_electroneg
def create_composition(comp_df):
ls_comp = comp_df.to_dict("records")
res = []
for comp_dict in ls_comp:
elem_fill = np.sum([comp_dict[e] for e in comp_dict])
comp_dict["Fe"] = 100 - elem_fill
# print(comp_dict)
compo = Composition.from_weight_dict(comp_dict)
res.append(compo)
comp_df["composition"] = res
return comp_df
def calculate_electronegativity(comp):
return comp.average_electroneg
def calculate_valence_electron_concentration(comp):
"""
Using the formuma from https://www.sciencedirect.com/science/article/pii/S0927025622000015#s0100
VEC = Sum(j=1 to N)C(j)VEC(j)
where N is the number of alloying elements, C(j) and VEC(j) are the atomic percentage and the valence electron number of element j
"""
weights = np.array([comp.get_atomic_fraction(e) for e in comp.elements])
val_ls = []
for e in comp.elements:
if str(e) == "Cr":
val_ls.append(6)
elif str(e) == "Mo":
# For Mo valence electron can vary from 2 to 6
val_ls.append(4)
else:
val_ls.append(e.valence[1])
val_ls = np.array(val_ls)
vec = np.sum(weights * val_ls)
return vec
def calculate_configuration_entropy(comp):
"""
Using the formuma from https://www.sciencedirect.com/science/article/pii/S0927025622000015#s0100
VEC = -R*Sum(j=1 to N)C(j)ln(C(j))
where N is the number of alloying elements, C(j) is the atomic percentage element j and R is the gas constant
The gas constant is omitted for now
"""
weights = np.array([comp.get_atomic_fraction(e) for e in comp.elements])
ent = np.sum(weights * np.log(weights))
return ent
def add_physics_features(df):
"""
Adds the density and young modulus as additional columns
elem_df: pd.DataFrame containing the proportion of each elements
"""
mapping = {"%C": "C", "%Co": "Co", "%Cr": "Cr", "%V": "V", "%Mo": "Mo", "%W": "W"}
if type(df) != pd.DataFrame:
# Fix for the case where the input df is not a dataframe but an array
print(df.shape)
if df.shape[1] < 10:
cols = ["%C", "%Co", "%Cr", "%V", "%Mo", "%W", "Temperature_C"]
else:
cols = [
"%C",
"%Co",
"%Cr",
"%V",
"%Mo",
"%W",
"M6C",
"M23C6",
"FCCA1#2",
"M2C",
"MC - SHP",
"MC ETA",
"%C matrice",
"%Co matrice",
"%Cr matrice",
"%V matrice",
"%Mo matrice",
"%W matrice",
"Temperature_C",
]
df = pd.DataFrame(df, columns=cols)
print(df.shape)
elem_df = df[mapping.keys()]
elem_df.rename(columns=mapping, inplace=True)
elem_df["Fe"] = 100 - elem_df.sum(axis=1)
df_w_compo = create_composition(elem_df)
df["density"] = np.vectorize(calculate_density)(df_w_compo["composition"])
df["young_modulus"] = np.vectorize(calculate_young_modulus)(df_w_compo["composition"])
df["electronegativity"] = np.vectorize(calculate_electronegativity)(df_w_compo["composition"])
df["valence_electron_concentration"] = np.vectorize(calculate_valence_electron_concentration)(
df_w_compo["composition"]
)
df["configuration_entropy"] = np.vectorize(calculate_configuration_entropy)(df_w_compo["composition"])
return df
if __name__ == "__main__":
df = pd.DataFrame([[0.3, 5, 3.9, 2.1, 5, 1.2]], columns=["%C", "%Co", "%Cr", "%V", "%Mo", "%W"])
df = pd.DataFrame([[0.3, 5, 3.9, 2.1, 5, 1.2]], columns=["C", "Co", "Cr", "V", "Mo", "W"])
df = pd.DataFrame([[7, 38]], columns=["Al", "Ni"]) # Debug density issue on gradio demo
# add_physics_features(df)
df = create_composition(df)
val = calculate_density(df["composition"].iloc[0])
print(val)