Spaces:

sandl
/

demo_active_learning

Sleeping

App Files Files Community

demo_active_learning / alloy_data_preprocessing.py

bndl

Upload alloy_data_preprocessing.py

51a84f1 over 1 year ago

raw

history blame

5.62 kB

	import pymatgen as mg
	import pandas as pd
	import numpy as np
	from pymatgen.core.structure import Composition


	def calculate_density(comp):
	"""Calculates densisty based on Rule of Mixtures (ROM)."""

	# comp = Composition(formula)

	weights = [comp.get_atomic_fraction(e) for e in comp.elements]
	vols = np.array([e.molar_volume for e in comp.elements])
	atomic_masses = np.array([e.atomic_mass for e in comp.elements])

	val = np.sum(weights * atomic_masses) / np.sum(weights * vols)

	return round(val, 1)


	def calculate_young_modulus(comp):
	"""Calculates Young Modulus based on Rule of Mixtures (ROM)."""

	# comp = Composition(formula)

	weights = np.array([comp.get_atomic_fraction(e) for e in comp.elements])
	vols = np.array([e.molar_volume for e in comp.elements])
	ym_vals = []
	for e in comp.elements:
	if str(e) == "C": # use diamond form for carbon
	ym_vals.append(1050)
	elif str(e) == "B": # use minimum value for Boron Carbide
	ym_vals.append(362)
	elif str(e) == "Mo":
	ym_vals.append(329)
	elif str(e) == "Co":
	ym_vals.append(209)
	else:
	ym_vals.append(e.youngs_modulus)

	# ym_vals = np.array([e.youngs_modulus for e in comp.elements])
	ym_vals = np.array(ym_vals)

	if None in ym_vals:
	print(comp, ym_vals)
	return ""

	val = np.sum(weights * vols * ym_vals) / np.sum(weights * vols)
	if val is np.nan:
	val = 0

	return int(round(val, 0))


	def calculate_electronegativity(comp):
	return comp.average_electroneg


	def create_composition(comp_df):
	ls_comp = comp_df.to_dict("records")
	res = []
	for comp_dict in ls_comp:
	elem_fill = np.sum([comp_dict[e] for e in comp_dict])
	comp_dict["Fe"] = 100 - elem_fill

	# print(comp_dict)
	compo = Composition.from_weight_dict(comp_dict)
	res.append(compo)

	comp_df["composition"] = res
	return comp_df


	def calculate_electronegativity(comp):
	return comp.average_electroneg


	def calculate_valence_electron_concentration(comp):
	"""
	Using the formuma from https://www.sciencedirect.com/science/article/pii/S0927025622000015#s0100
	VEC = Sum(j=1 to N)C(j)VEC(j)
	where N is the number of alloying elements, C(j) and VEC(j) are the atomic percentage and the valence electron number of element j
	"""

	weights = np.array([comp.get_atomic_fraction(e) for e in comp.elements])

	val_ls = []
	for e in comp.elements:
	if str(e) == "Cr":
	val_ls.append(6)
	elif str(e) == "Mo":
	# For Mo valence electron can vary from 2 to 6
	val_ls.append(4)
	else:
	val_ls.append(e.valence[1])
	val_ls = np.array(val_ls)

	vec = np.sum(weights * val_ls)

	return vec


	def calculate_configuration_entropy(comp):
	"""
	Using the formuma from https://www.sciencedirect.com/science/article/pii/S0927025622000015#s0100
	VEC = -R*Sum(j=1 to N)C(j)ln(C(j))
	where N is the number of alloying elements, C(j) is the atomic percentage element j and R is the gas constant
	The gas constant is omitted for now
	"""

	weights = np.array([comp.get_atomic_fraction(e) for e in comp.elements])

	ent = np.sum(weights * np.log(weights))

	return ent


	def add_physics_features(df):
	"""
	Adds the density and young modulus as additional columns
	elem_df: pd.DataFrame containing the proportion of each elements

	"""
	mapping = {"%C": "C", "%Co": "Co", "%Cr": "Cr", "%V": "V", "%Mo": "Mo", "%W": "W"}
	if type(df) != pd.DataFrame:
	# Fix for the case where the input df is not a dataframe but an array
	print(df.shape)
	if df.shape[1] < 10:
	cols = ["%C", "%Co", "%Cr", "%V", "%Mo", "%W", "Temperature_C"]
	else:
	cols = [
	"%C",
	"%Co",
	"%Cr",
	"%V",
	"%Mo",
	"%W",
	"M6C",
	"M23C6",
	"FCCA1#2",
	"M2C",
	"MC - SHP",
	"MC ETA",
	"%C matrice",
	"%Co matrice",
	"%Cr matrice",
	"%V matrice",
	"%Mo matrice",
	"%W matrice",
	"Temperature_C",
	]
	df = pd.DataFrame(df, columns=cols)
	print(df.shape)
	elem_df = df[mapping.keys()]
	elem_df.rename(columns=mapping, inplace=True)
	elem_df["Fe"] = 100 - elem_df.sum(axis=1)
	df_w_compo = create_composition(elem_df)

	df["density"] = np.vectorize(calculate_density)(df_w_compo["composition"])
	df["young_modulus"] = np.vectorize(calculate_young_modulus)(df_w_compo["composition"])
	df["electronegativity"] = np.vectorize(calculate_electronegativity)(df_w_compo["composition"])
	df["valence_electron_concentration"] = np.vectorize(calculate_valence_electron_concentration)(
	df_w_compo["composition"]
	)
	df["configuration_entropy"] = np.vectorize(calculate_configuration_entropy)(df_w_compo["composition"])
	return df


	if __name__ == "__main__":
	df = pd.DataFrame([[0.3, 5, 3.9, 2.1, 5, 1.2]], columns=["%C", "%Co", "%Cr", "%V", "%Mo", "%W"])
	df = pd.DataFrame([[0.3, 5, 3.9, 2.1, 5, 1.2]], columns=["C", "Co", "Cr", "V", "Mo", "W"])
	df = pd.DataFrame([[7, 38]], columns=["Al", "Ni"]) # Debug density issue on gradio demo

	# add_physics_features(df)
	df = create_composition(df)
	val = calculate_density(df["composition"].iloc[0])
	print(val)