import numpy as np from torch import nn from rdkit.Chem import Descriptors, AllChem, MolFromSmiles from deepscreen.models.components.mlp import LazyMLP DeepSCAMs = LazyMLP( out_channels=1, hidden_channels=[100, 1000, 1000], activation=nn.Tanh, dropout=0.0 ) def featurizer(smiles, radius=2, n_bits=1024): descr = Descriptors._descList[0:2] + Descriptors._descList[3:] calc = [x[1] for x in descr] try: mol = MolFromSmiles(smiles) features = [] fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius=radius, nBits=n_bits) fp_list = [] fp_list.extend(fp.ToBitString()) fp_expl = [float(x) for x in fp_list] ds_n = [] for d in calc: v = d(mol) if v > np.finfo(np.float32).max: ds_n.append(np.finfo(np.float32).max) else: ds_n.append(np.float32(v)) features += [fp_expl + list(ds_n)] except: log.warning(f'RDKit could not find process SMILES: {smiles}; converted to all 0 features') features = np.zeros((n_bits,)) return features