demo_plinder_smina

Runtime error

App Files Files Community

demo_plinder_smina / inference_app.py

simonduerr

Update inference_app.py

dd8b8f6 verified 10 months ago

raw

history blame

6.59 kB


	import time

	import gradio as gr

	from gradio_molecule3d import Molecule3D

	import sys
	import os
	import os
	import numpy as np
	from rdkit import Chem
	from rdkit.Chem import AllChem
	from rdkit.Chem import Draw
	from rdkit.Chem.Draw import IPythonConsole
	from rdkit.Chem import DataStructs
	from rdkit.Chem import RDConfig
	from rdkit.Chem import rdBase
	import pickle

	from Bio.PDB import *
	import requests
	import subprocess

	import mdtraj as md
	from enspara import geometry
	from sklearn.cluster import DBSCAN
	import pandas as pd

	os.system("pwd")

	os.system("ls")

	os.system("touch ligand.sdf")

	def run_smina(
	ligand_path, protein_path, out_path, pocket_center, pocket_size, num_poses=1, exhaustiveness=1
	):
	"""
	Perform docking with Smina.

	Parameters
	----------
	ligand_path: str or pathlib.Path
	Path to ligand PDBQT file that should be docked.
	protein_path: str or pathlib.Path
	Path to protein PDBQT file that should be docked to.
	out_path: str or pathlib.Path
	Path to which docking poses should be saved, SDF or PDB format.
	pocket_center: iterable of float or int
	Coordinates defining the center of the binding site.
	pocket_size: iterable of float or int
	Lengths of edges defining the binding site.
	num_poses: int
	Maximum number of poses to generate.
	exhaustiveness: int
	Accuracy of docking calculations.

	Returns
	-------
	output_text: str
	The output of the Smina calculation.
	"""
	output_text = subprocess.check_output(
	[
	"./smina.static",
	"--ligand",
	str(ligand_path),
	"--receptor",
	str(protein_path),
	"--out",
	str(out_path),
	"--center_x",
	str(pocket_center[0]),
	"--center_y",
	str(pocket_center[1]),
	"--center_z",
	str(pocket_center[2]),
	"--size_x",
	str(pocket_size[0]),
	"--size_y",
	str(pocket_size[1]),
	"--size_z",
	str(pocket_size[2]),
	"--num_modes",
	str(num_poses),
	"--exhaustiveness",
	str(exhaustiveness),
	],
	universal_newlines=True, # needed to capture output text
	)
	return output_text

	def predict (input_sequence, input_ligand, input_protein):
	start_time = time.time()

	m=Chem.MolFromSmiles(input_ligand)

	m2=Chem.AddHs(m)
	AllChem.EmbedMolecule(m2)
	AllChem.MMFFOptimizeMolecule(m2)

	Chem.SDWriter("/usr/src/app/ligand.sdf").write(m2)

	os.system(f"obabel {input_protein.name} -xr -O /usr/src/app/receptor.pdbqt")
	os.system("obabel -isdf /usr/src/app/ligand.sdf -O /usr/src/app/ligand.pdbqt")
	#Find pocket
	pdb = md.load(input_protein.name)
	# run ligsite
	pockets_xyz = geometry.pockets.get_pocket_cells(struct=pdb)
	eps_value = 0.15
	min_samples_value = 5
	dbscan = DBSCAN(eps=eps_value, min_samples=min_samples_value)
	labels = dbscan.fit_predict(pockets_xyz)

	# Find the unique clusters and their sizes
	unique_labels, counts = np.unique(labels, return_counts=True)

	# Exclude noise points
	valid_clusters = unique_labels[unique_labels != -1]
	valid_counts = counts[unique_labels != -1]

	# Find the cluster with the most points (highest density)
	densest_cluster_label = valid_clusters[np.argmax(valid_counts)]
	densest_cluster_points = pockets_xyz[labels == densest_cluster_label]

	pocket_center = np.mean(densest_cluster_points, axis=0)


	import pandas as pd

	top_df = pd.DataFrame()
	top_df['serial'] = list(range(densest_cluster_points.shape[0]))
	top_df['name'] = 'PK'
	top_df['element'] = 'H'
	top_df['resSeq'] = list(range(densest_cluster_points.shape[0]))
	top_df['resName'] = 'PCK'
	top_df['chainID'] = 0

	pocket_top = md.Topology.from_dataframe(top_df, np.array([]))
	pocket_trj = md.Trajectory(xyz=densest_cluster_points, topology=pocket_top)
	pocket_trj.save('/usr/src/app/pockets_dense.pdb')

	parser = PDBParser()
	struc = parser.get_structure("X", "/usr/src/app/pockets_dense.pdb")
	coords = [x.coord for x in struc.get_atoms()]
	pocket_center = np.mean(coords, axis=0)
	output_text = run_smina(
	"/usr/src/app/ligand.pdbqt",
	"/usr/src/app/receptor.pdbqt",
	"/usr/src/app/docking_pose.pdb",
	pocket_center,
	[10,10,10],
	)
	os.system("pdb_rplresname -UNL:LIG /usr/src/app/docking_pose.pdb")
	os.system("pdb_merge /usr/src/app/receptor.pdb /usr/src/app/docking_pose.pdb > /usr/src/app/output.pdb")
	end_time = time.time()
	run_time = end_time - start_time
	return "/usr/src/app/output.pdb", run_time

	with gr.Blocks() as app:

	gr.Markdown("# Template for inference")

	gr.Markdown("Title, description, and other information about the model")
	with gr.Row():
	input_sequence = gr.Textbox(lines=3, label="Input Protein sequence (FASTA)")
	input_ligand = gr.Textbox(lines=3, label="Input ligand SMILES")
	input_protein = gr.File(label="Input protein monomer")


	# define any options here

	# for automated inference the default options are used
	# slider_option = gr.Slider(0,10, label="Slider Option")
	# checkbox_option = gr.Checkbox(label="Checkbox Option")
	# dropdown_option = gr.Dropdown(["Option 1", "Option 2", "Option 3"], label="Radio Option")

	btn = gr.Button("Run Inference")

	gr.Examples(
	[
	[
	"SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL:SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL",
	"COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O",
	"test_out.pdb"
	],
	],
	[input_sequence, input_ligand, input_protein],
	)
	reps = [
	{
	"model": 0,
	"style": "cartoon",
	"color": "whiteCarbon",
	},
	{
	"model": 0,
	"resname": "UNK",
	"style": "stick",
	"color": "greenCarbon",
	},
	{
	"model": 0,
	"resname": "LIG",
	"style": "stick",
	"color": "greenCarbon",
	}

	]

	out = Molecule3D(reps=reps)
	run_time = gr.Textbox(label="Runtime")

	btn.click(predict, inputs=[input_sequence, input_ligand, input_protein], outputs=[out, run_time])

	app.launch()