DeepSEQreen_NAR_fb

Sleeping

App Files Files Community

DeepSEQreen_NAR_fb / deepscreen /models /predictors /coa_dti_pro.py

libokj

Upload 358 files

05ca42f almost 2 years ago

raw

history blame

14.9 kB

	import math
	from collections import defaultdict
	from typing import Literal

	import numpy as np
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from rdkit import Chem
	from scipy.sparse import coo_matrix
	from torch_geometric.data import Data
	from torch_geometric.nn.pool.topk_pool import TopKPooling
	from torch_geometric.nn.glob import global_mean_pool as gap, global_max_pool as gmp
	from torch_geometric.utils import add_self_loops, remove_self_loops
	from torch_geometric.nn.conv.message_passing import MessagePassing


	class CoaDTIPro(nn.Module):
	def __init__(self,
	esm_model_and_alphabet, n_fingerprint, dim, n_word, layer_output, layer_coa, nhead=8, dropout=0.1,
	co_attention: Literal['stack', 'encoder', 'inter'] = 'inter', gcn_pooling=False, ):
	super().__init__()
	self.co_attention = co_attention
	self.layer_output = layer_output
	self.layer_coa = layer_coa
	self.embed_word = nn.Embedding(n_word, dim)
	self.gnn = GNN(n_fingerprint, gcn_pooling)
	self.esm_model, self.alphabet = esm_model_and_alphabet
	self.batch_converter = self.alphabet.get_batch_converter()

	self.W_attention = nn.Linear(dim, dim)

	self.W_out = nn.Sequential(
	nn.Linear(2 * dim, dim),
	nn.Linear(dim, 128),
	nn.Linear(128, 64)
	)

	self.coa_layers = CoAttention(dim, nhead, dropout, layer_coa, co_attention)
	self.lin = nn.Linear(768, 512) # bert1024 esm768
	self.W_interaction = nn.Linear(64, 2)

	def attention_cnn(self, x, xs, layer):
	"""The attention mechanism is applied to the last layer of CNN."""
	xs = torch.unsqueeze(torch.unsqueeze(xs, 0), 0)
	for i in range(layer):
	xs = torch.relu(self.W_cnn[i](xs))
	xs = torch.squeeze(torch.squeeze(xs, 0), 0)

	h = torch.relu(self.W_attention(x))
	hs = torch.relu(self.W_attention(xs))
	weights = torch.tanh(F.linear(h, hs))
	ys = torch.t(weights) * hs

	return torch.unsqueeze(torch.mean(ys, 0), 0)

	def forward(self, inputs, proteins):
	"""Compound vector with GNN."""
	compound_vector = self.gnn(inputs)
	compound_vector = torch.unsqueeze(compound_vector, 0) # sequence-like GNN ouput

	_, _, proteins = self.batch_converter([(None, protein) for protein in proteins])
	with torch.no_grad():
	results = self.esm_model(proteins.to(compound_vector.device), repr_layers=[6])
	token_representations = results["representations"][6]

	protein_vector = token_representations[:, 1:, :]
	protein_vector = self.lin(torch.squeeze(protein_vector, 1))

	protein_vector, compound_vector = self.coa_layers(protein_vector, compound_vector)

	protein_vector = protein_vector.mean(dim=1)
	compound_vector = compound_vector.mean(dim=1)
	"""Concatenate the above two vectors and output the interaction."""
	cat_vector = torch.cat((compound_vector, protein_vector), 1)
	cat_vector = torch.tanh(self.W_out(cat_vector))
	interaction = self.W_interaction(cat_vector)
	return interaction


	class CoAttention(nn.Module):
	def __init__(self, dim, nhead, dropout, layer_coa, co_attention):
	super().__init__()
	self.co_attention = co_attention
	if self.co_attention == 'encoder':
	self.coa_layers = EncoderCrossAtt(dim, nhead, dropout, layer_coa)
	elif self.co_attention == 'stack':
	self.coa_layers = nn.ModuleList([StackCrossAtt(dim, nhead, dropout) for _ in range(layer_coa)])
	elif self.co_attention == 'inter':
	self.coa_layers = nn.ModuleList([InterCrossAtt(dim, nhead, dropout) for _ in range(layer_coa)])

	def forward(self, protein_vector, compound_vector):
	# x and y are the input tensors for the two modalities
	# edge_index_x and edge_index_y are the edge indices for the graph data
	if self.co_attention == 'encoder':
	return self.coa_layers(protein_vector, compound_vector)
	else:
	# loop over the sequential layers and pass the arguments
	for layer in self.coa_layers:
	protein_vector, compound_vector = layer(protein_vector, compound_vector)
	return protein_vector, compound_vector


	class EncoderCrossAtt(nn.Module):
	def __init__(self, dim, nhead, dropout, layers):
	super().__init__()
	# self.encoder_layers = nn.ModuleList([SEA(dim, dropout) for _ in range(layers)])
	self.encoder_layers = nn.ModuleList([SA(dim, nhead, dropout) for _ in range(layers)])
	self.decoder_sa = nn.ModuleList([SA(dim, nhead, dropout) for _ in range(layers)])
	self.decoder_coa = nn.ModuleList([DPA(dim, nhead, dropout) for _ in range(layers)])
	self.layer_coa = layers

	def forward(self, protein_vector, compound_vector):
	for i in range(self.layer_coa):
	compound_vector = self.encoder_layers[i](compound_vector, None) # self-attention
	for i in range(self.layer_coa):
	protein_vector = self.decoder_sa[i](protein_vector, None)
	protein_vector = self.decoder_coa[i](protein_vector, compound_vector, None)# co-attention

	return protein_vector, compound_vector


	class InterCrossAtt(nn.Module):
	def __init__(self, dim, nhead, dropout):
	super().__init__()
	self.sca = SA(dim, nhead, dropout)
	self.spa = SA(dim, nhead, dropout)
	self.coa_pc = DPA(dim, nhead, dropout)
	self.coa_cp = DPA(dim, nhead, dropout)

	def forward(self, protein_vector, compound_vector):
	compound_vector = self.sca(compound_vector, None) # self-attention
	protein_vector = self.spa(protein_vector, None) # self-attention
	compound_covector = self.coa_pc(compound_vector, protein_vector, None) # co-attention
	protein_covector = self.coa_cp(protein_vector, compound_vector, None) # co-attention

	return protein_covector, compound_covector


	class StackCrossAtt(nn.Module):
	def __init__(self, dim, nhead, dropout):
	super().__init__()
	self.sca = SA(dim, nhead, dropout)
	self.spa = SA(dim, nhead, dropout)
	self.coa_cp = DPA(dim, nhead, dropout)

	def forward(self, protein_vector, compound_vector):
	compound_vector = self.sca(compound_vector, None) # self-attention
	protein_vector = self.spa(protein_vector, None) # self-attention
	protein_covector = self.coa_cp(protein_vector, compound_vector, None) # co-attention

	return protein_covector, compound_vector


	class MHAtt(nn.Module):
	def __init__(self, hid_dim, n_heads, dropout):
	super().__init__()

	self.linear_v = nn.Linear(hid_dim, hid_dim)
	self.linear_k = nn.Linear(hid_dim, hid_dim)
	self.linear_q = nn.Linear(hid_dim, hid_dim)
	self.linear_merge = nn.Linear(hid_dim, hid_dim)
	self.hid_dim = hid_dim
	self.dropout = dropout
	self.nhead = n_heads

	self.dropout = nn.Dropout(dropout)
	self.hidden_size_head = int(self.hid_dim / self.nhead)

	def forward(self, v, k, q, mask):
	n_batches = q.size(0)
	v = self.linear_v(v).view(n_batches, -1, self.nhead, self.hidden_size_head).transpose(1, 2)
	k = self.linear_k(k).view(n_batches, -1, self.nhead, self.hidden_size_head).transpose(1, 2)
	q = self.linear_q(q).view(n_batches, -1, self.nhead, self.hidden_size_head).transpose(1, 2)

	atted = self.att(v, k, q, mask)
	atted = atted.transpose(1, 2).contiguous().view(n_batches, -1, self.hid_dim)

	atted = self.linear_merge(atted)

	return atted

	def att(self, value, key, query, mask):
	d_k = query.size(-1)

	scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(d_k)

	if mask is not None:
	scores = scores.masked_fill(mask, -1e9)

	att_map = F.softmax(scores, dim=-1)
	att_map = self.dropout(att_map)

	return torch.matmul(att_map, value)


	class DPA(nn.Module):
	def __init__(self, hid_dim, n_heads, dropout):
	super().__init__()

	self.mhatt1 = MHAtt(hid_dim, n_heads, dropout)
	self.dropout1 = nn.Dropout(dropout)
	self.norm1 = nn.LayerNorm(hid_dim)

	def forward(self, x, y, y_mask=None):
	x = self.norm1(x + self.dropout1(self.mhatt1(y, y, x, y_mask)))
	return x


	class SA(nn.Module):
	def __init__(self, hid_dim, n_heads, dropout):
	super().__init__()

	self.mhatt1 = MHAtt(hid_dim, n_heads, dropout)
	self.dropout1 = nn.Dropout(dropout)
	self.norm1 = nn.LayerNorm(hid_dim)

	def forward(self, x, mask=None):
	x = self.norm1(x + self.dropout1(self.mhatt1(x, x, x, mask)))
	return x


	class SAGEConv(MessagePassing):
	def __init__(self, in_channels, out_channels):
	super().__init__(aggr='max') # "Max" aggregation.
	self.lin = torch.nn.Linear(in_channels, out_channels)
	self.act = torch.nn.ReLU()
	self.update_lin = torch.nn.Linear(in_channels + out_channels, in_channels, bias=False)
	self.update_act = torch.nn.ReLU()

	def forward(self, x, edge_index):
	# x has shape [N, in_channels]
	# edge_index has shape [2, E]
	edge_index, _ = remove_self_loops(edge_index)
	edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))

	return self.propagate(edge_index, size=(x.size(0), x.size(0)), x=x)

	def message(self, x_j):
	# x_j has shape [E, in_channels]
	x_j = self.lin(x_j)
	x_j = self.act(x_j)

	return x_j

	def update(self, aggr_out, x):
	# aggr_out has shape [N, out_channels]
	new_embedding = torch.cat([aggr_out, x], dim=1)

	new_embedding = self.update_lin(new_embedding)
	new_embedding = self.update_act(new_embedding)

	return new_embedding


	class GNN(nn.Module):
	def __init__(self, n_fingerprint, pooling, embed_dim=128):
	super().__init__()
	self.pooling = pooling
	self.embed_fingerprint = nn.Embedding(num_embeddings=n_fingerprint, embedding_dim=embed_dim)
	self.conv1 = SAGEConv(embed_dim, 128)
	self.pool1 = TopKPooling(128, ratio=0.8)
	self.conv2 = SAGEConv(128, 128)
	self.pool2 = TopKPooling(128, ratio=0.8)
	self.conv3 = SAGEConv(128, 128)
	self.pool3 = TopKPooling(128, ratio=0.8)
	self.linp1 = torch.nn.Linear(256, 128)
	self.linp2 = torch.nn.Linear(128, 512)

	self.lin = torch.nn.Linear(128, 512)
	self.bn1 = torch.nn.BatchNorm1d(128)
	self.bn2 = torch.nn.BatchNorm1d(64)
	self.act1 = torch.nn.ReLU()
	self.act2 = torch.nn.ReLU()

	def forward(self, data):
	# x, edge_index, batch = data.x, data.edge_index, data.batch
	x, edge_index, batch = data.x, data.edge_index, data.batch
	x = self.embed_fingerprint(x)
	x = x.squeeze(1)
	x = F.relu(self.conv1(x, edge_index))

	if self.pooling:
	x, edge_index, _, batch, _, _ = self.pool1(x, edge_index, None, batch)
	x1 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

	x = F.relu(self.conv2(x, edge_index))

	x, edge_index, _, batch, _, _ = self.pool2(x, edge_index, None, batch)
	x2 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

	x, edge_index, _, batch, _, _ = self.pool3(x, edge_index, None, batch)
	x3 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

	x = x1 + x2 + x3
	x = self.linp1(x)
	x = self.act1(x)
	x = self.linp2(x)

	else:
	x = F.relu(self.conv2(x, edge_index))
	x = self.lin(x)

	return x


	atom_dict = defaultdict(lambda: len(atom_dict)) # 51 bindingdb: 26
	bond_dict = defaultdict(lambda: len(bond_dict)) # 4 bindingdb: 4
	fingerprint_dict = defaultdict(lambda: len(fingerprint_dict)) # 6341 bindingdb: 20366
	edge_dict = defaultdict(lambda: len(edge_dict)) # 17536 bindingdb: 77916
	word_dict = defaultdict(lambda: len(word_dict)) # 22 bindingdb: 21


	def drug_featurizer(smiles, radius=2):
	mol = Chem.AddHs(Chem.MolFromSmiles(smiles))
	atoms = create_atoms(mol)
	i_jbond_dict = create_ijbonddict(mol)
	fingerprints = extract_fingerprints(atoms, i_jbond_dict, radius)
	adjacency = coo_matrix(Chem.GetAdjacencyMatrix(mol))
	adjacency = coo_matrix(adjacency)
	edge_index = np.array([adjacency.row, adjacency.col])

	return Data(x=torch.LongTensor(fingerprints).unsqueeze(1), edge_index=torch.LongTensor(edge_index))


	def create_atoms(mol):
	"""Create a list of atom (e.g., hydrogen and oxygen) IDs
	considering the aromaticity."""
	# GetSymbol: obtain the symbol of the atom
	atoms = [a.GetSymbol() for a in mol.GetAtoms()]
	for a in mol.GetAromaticAtoms():
	i = a.GetIdx()
	atoms[i] = (atoms[i], 'aromatic')
	# turn it into index
	atoms = [atom_dict[a] for a in atoms]

	return np.array(atoms)


	def create_ijbonddict(mol):
	"""Create a dictionary, which each key is a node ID
	and each value is the tuples of its neighboring node
	and bond (e.g., single and double) IDs."""
	i_jbond_dict = defaultdict(lambda: [])
	for b in mol.GetBonds():
	i, j = b.GetBeginAtomIdx(), b.GetEndAtomIdx()
	bond = bond_dict[str(b.GetBondType())]
	i_jbond_dict[i].append((j, bond))
	i_jbond_dict[j].append((i, bond))
	return i_jbond_dict


	def extract_fingerprints(atoms, i_jbond_dict, radius=2):
	"""Extract the r-radius subgraphs (i.e., fingerprints)
	from a molecular graph using Weisfeiler-Lehman algorithm."""
	fingerprints = None

	if (len(atoms) == 1) or (radius == 0):
	fingerprints = [fingerprint_dict[a] for a in atoms]

	else:
	nodes = atoms
	i_jedge_dict = i_jbond_dict

	for _ in range(radius):

	"""Update each node ID considering its neighboring nodes and edges
	(i.e., r-radius subgraphs or fingerprints)."""
	fingerprints = []
	for i, j_edge in i_jedge_dict.items():
	neighbors = [(nodes[j], edge) for j, edge in j_edge]
	fingerprint = (nodes[i], tuple(sorted(neighbors)))
	fingerprints.append(fingerprint_dict[fingerprint])
	nodes = fingerprints

	"""Also update each edge ID considering two nodes
	on its both sides."""
	_i_jedge_dict = defaultdict(lambda: [])
	for i, j_edge in i_jedge_dict.items():
	for j, edge in j_edge:
	both_side = tuple(sorted((nodes[i], nodes[j])))
	edge = edge_dict[(both_side, edge)]
	_i_jedge_dict[i].append((j, edge))
	i_jedge_dict = _i_jedge_dict

	return np.array(fingerprints)