File size: 4,826 Bytes
4f5540c 9e770a8 4f5540c b5dd3d3 4f5540c 9e770a8 4f5540c 9e770a8 4f5540c 9e770a8 4f5540c 9e770a8 b5dd3d3 9e770a8 4f5540c 9e770a8 4f5540c 9e770a8 4f5540c 9e770a8 4f5540c 9e770a8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import os
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
import torch
import pandas as pd
from polymerlearn.utils import GraphDataset, get_Tg_add, get_IV_add
from polymerlearn.models.gnn import PolymerGNN_Tg, PolymerGNN_IV
from polymerlearn.utils import make_like_batch
import numpy as np
# def convert_to_graphdataset(df, acid_positions = (0, 12), glycol_positions = (13, 25)):
# """
# Takes a dataframe with the input data and converts it to a graph dataset for the prediction
# For the Tg the model takes the proportions of acid and glycols and the log of the Mw (PS) (no other parameters)
# """
# targets = ["Tg", "IV"]
# # Need to pass the target column to do the preprocessing - not sure why it is needed - must be clarified
# for col in targets:
# df[col] = 0
# df_duplicated = pd.concat([df, df]) # Because must specify a test set - this must be changed in the code
# add_duplicated = get_Tg_add(df_duplicated)
# print(f"Inference to be done on data of size {df.shape}")
# graph_data = GraphDataset(
# data = df_duplicated,
# structure_dir = './Structures/AG/xyz',
# Y_target=targets,
# test_size = 0.5,
# add_features = add_duplicated,
# ac = acid_positions,
# gc = glycol_positions
# )
# test_data, Ytest, add_test = graph_data.get_test()
# print(f"After data preprocessing, inference on {len(test_data)}")
# n_predictions = len(Ytest)
# return test_data, add_test, n_predictions
def predict(df, model_path="../polymerlearn/data_models/", acid_positions = (0, 12), glycol_positions = (13, 25)):
targets = ["Tg", "IV"]
# Need to pass the target column to do the preprocessing - not sure why it is needed - must be clarified
for col in targets:
df[col] = 0
df_duplicated = pd.concat([df, df]) # Because must specify a test set - this must be changed in the code
add_features = {"Tg": get_Tg_add(df_duplicated), "IV": get_IV_add(df_duplicated)}
print(f"Inference to be done on data of size {df.shape}")
print("Tg", add_features["Tg"].shape)
print("Tg", add_features["IV"].shape)
pred_all = []
for pred in targets:
graph_data = GraphDataset(
data = df_duplicated,
structure_dir = './Structures/AG/xyz',
Y_target=targets,
test_size = 0.5,
add_features = add_features[pred],
ac = acid_positions,
gc = glycol_positions
)
test_data, Ytest, add_test = graph_data.get_test()
print(f"After data preprocessing, inference on {len(test_data)}")
n_predictions = len(Ytest)
if pred == "Tg":
model = PolymerGNN_Tg(
input_feat= 6, # How many input features on each node; don't change this
hidden_channels= 32, # How many intermediate dimensions to use in model
# Can change this ^^
num_additional= add_features[pred].shape[1] # How many additional resin properties to include in the prediction
# Corresponds to the number in get_IV_add
)
else:
model = PolymerGNN_IV(
input_feat= 6, # How many input features on each node; don't change this
hidden_channels= 32, # How many intermediate dimensions to use in model
# Can change this ^^
num_additional= add_features[pred].shape[1] # How many additional resin properties to include in the prediction
# Corresponds to the number in get_IV_add
)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
state = torch.load(os.path.join(model_path, f"{pred}_model.pth"), map_location=device)
model.load_state_dict(state)
model.eval()
predictions = []
print(f"Prediction done on {n_predictions}")
with torch.no_grad():
for i in range(n_predictions):
batch_like_tup = make_like_batch(test_data[i])
pred = np.round(model(*batch_like_tup, add_test[i]).item(), 1)
predictions.append(pred)
pred_all.append(predictions)
return pred_all
# def predict(df, acid_positions = (0, 12), glycol_positions = (13, 25), model_path="../polymerlearn/data_models/tg_model_test.pth"):
# test_data, add_test, n_predictions = convert_to_graphdataset(df, acid_positions=acid_positions, glycol_positions=glycol_positions)
# predictions = predict_from_graph(test_data, add_test, n_predictions, model_path=model_path)
# return predictions |