Spaces:
Runtime error
Runtime error
File size: 7,141 Bytes
bebad14 dffaf30 bebad14 e37d702 bebad14 e37d702 89a82df e37d702 28cb117 e37d702 f1fe378 e37d702 4c35d83 e37d702 28cb117 ff1a3bf bebad14 e37d702 18dabba e37d702 86237ae e37d702 86237ae dd8b8f6 ff1a3bf e37d702 2932859 e37d702 ff1a3bf e37d702 86237ae e37d702 86237ae e37d702 ff1a3bf c924062 86237ae c2f27bf e37d702 ff1a3bf bebad14 b845900 bebad14 f624b87 bebad14 18dabba bebad14 18dabba bebad14 f354223 bebad14 f354223 bebad14 28cb117 4c35d83 bebad14 44470f9 28cb117 5122f94 28cb117 ff1a3bf 28cb117 130e4f7 28cb117 5122f94 28cb117 4853a01 c2f27bf 28cb117 4853a01 28cb117 bebad14 ff1a3bf dffaf30 bebad14 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 |
import time
import gradio as gr
from gradio_molecule3d import Molecule3D
import sys
import os
import os
import numpy as np
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import DataStructs
from rdkit.Chem import RDConfig
from rdkit.Chem import rdBase
import pickle
from Bio.PDB import *
from Bio import PDB
import requests
import subprocess
import mdtraj as md
from enspara import geometry
from sklearn.cluster import DBSCAN
import pandas as pd
def run_smina(
ligand_path, protein_path, out_path, pocket_center, pocket_size, num_poses=1, exhaustiveness=1
):
"""
Perform docking with Smina.
Parameters
----------
ligand_path: str or pathlib.Path
Path to ligand PDBQT file that should be docked.
protein_path: str or pathlib.Path
Path to protein PDBQT file that should be docked to.
out_path: str or pathlib.Path
Path to which docking poses should be saved, SDF or PDB format.
pocket_center: iterable of float or int
Coordinates defining the center of the binding site.
pocket_size: iterable of float or int
Lengths of edges defining the binding site.
num_poses: int
Maximum number of poses to generate.
exhaustiveness: int
Accuracy of docking calculations.
Returns
-------
output_text: str
The output of the Smina calculation.
"""
output_text = subprocess.check_output(
[
"./smina.static",
"--ligand",
str(ligand_path),
"--receptor",
str(protein_path),
"--out",
str(out_path),
"--center_x",
str(pocket_center[0]),
"--center_y",
str(pocket_center[1]),
"--center_z",
str(pocket_center[2]),
"--size_x",
str(pocket_size[0]),
"--size_y",
str(pocket_size[1]),
"--size_z",
str(pocket_size[2]),
"--num_modes",
str(num_poses),
"--exhaustiveness",
str(exhaustiveness),
],
universal_newlines=True, # needed to capture output text
)
time.sleep(0.5)
return output_text
def predict (input_sequence, input_ligand, input_protein, exhaustiveness):
"""
Main prediction function that calls ligsite and smina
Parameters
----------
input_sequence: str
monomer sequence
input_ligand: str
ligand as SMILES string
protein_path: gradio.File
Gradio file object to monomer protein structure as PDB
exhaustiveness: int
SMINA parameter
Returns
-------
output_structures: tuple
(output_protein, output_ligand_sdf)
run_time: float
run time of the program
"""
start_time = time.time()
if input_protein==None:
raise gr.Error("need pdb input")
m=Chem.MolFromSmiles(input_ligand)
m2=Chem.AddHs(m)
AllChem.EmbedMolecule(m2)
AllChem.MMFFOptimizeMolecule(m2)
Chem.SDWriter("/usr/src/app/ligand.sdf").write(m2)
os.system(f"obabel {input_protein.name} -xr -O /usr/src/app/receptor.pdbqt")
os.system("obabel -isdf /usr/src/app/ligand.sdf -O /usr/src/app/ligand.pdbqt")
#Find pocket
pdb = md.load(input_protein.name)
# run ligsite
pockets_xyz = geometry.pockets.get_pocket_cells(struct=pdb)
eps_value = 0.15
min_samples_value = 5
dbscan = DBSCAN(eps=eps_value, min_samples=min_samples_value)
labels = dbscan.fit_predict(pockets_xyz)
# Find the unique clusters and their sizes
unique_labels, counts = np.unique(labels, return_counts=True)
# Exclude noise points
valid_clusters = unique_labels[unique_labels != -1]
valid_counts = counts[unique_labels != -1]
# Find the cluster with the most points (highest density)
densest_cluster_label = valid_clusters[np.argmax(valid_counts)]
densest_cluster_points = pockets_xyz[labels == densest_cluster_label]
# write cluster to PDB
top_df = pd.DataFrame()
top_df['serial'] = list(range(densest_cluster_points.shape[0]))
top_df['name'] = 'PK'
top_df['element'] = 'H'
top_df['resSeq'] = list(range(densest_cluster_points.shape[0]))
top_df['resName'] = 'PCK'
top_df['chainID'] = 0
pocket_top = md.Topology.from_dataframe(top_df, np.array([]))
pocket_trj = md.Trajectory(xyz=densest_cluster_points, topology=pocket_top)
pocket_trj.save('/usr/src/app/pockets_dense.pdb')
parser = PDBParser()
struc = parser.get_structure("X", "/usr/src/app/pockets_dense.pdb")
coords = [x.coord for x in struc.get_atoms()]
pocket_center = np.mean(coords, axis=0)
# run smina
output_text = run_smina(
"/usr/src/app/ligand.pdbqt",
"/usr/src/app/receptor.pdbqt",
"/usr/src/app/docking_pose.sdf",
pocket_center,
[10,10,10],
exhaustiveness=exhaustiveness
)
end_time = time.time()
run_time = end_time - start_time
return [input_protein.name,"/usr/src/app/docking_pose.sdf"], run_time
with gr.Blocks() as app:
gr.Markdown("# LigSite + Smina")
gr.Markdown("Example model using LigSite and DBScan to find a binding pocket in the protein and then SMINA to dock the ligand in the found pocket.")
with gr.Row():
input_sequence = gr.Textbox(lines=3, label="Input Protein sequence (FASTA)")
input_ligand = gr.Textbox(lines=3, label="Input ligand SMILES")
input_protein = gr.File(label="Input protein monomer")
# define any options here
# for automated inference the default options are used
exhaustiveness = gr.Slider(1,10,value=1, label="Exhaustiveness")
# checkbox_option = gr.Checkbox(label="Checkbox Option")
# dropdown_option = gr.Dropdown(["Option 1", "Option 2", "Option 3"], label="Radio Option")
btn = gr.Button("Run Inference")
gr.Examples(
[
[
"SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL:SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL",
"COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O",
"input_test.pdb"
],
],
[input_sequence, input_ligand, input_protein],
)
reps = [
{
"model": 0,
"style": "cartoon",
"color": "whiteCarbon",
},
{
"model": 0,
"resname": "UNK",
"style": "stick",
"color": "greenCarbon",
},
{
"model": 0,
"resname": "LIG",
"style": "stick",
"color": "greenCarbon",
},
{
"model": 1,
"style": "stick",
"color": "greenCarbon",
}
]
out = Molecule3D(reps=reps)
run_time = gr.Textbox(label="Runtime")
btn.click(predict, inputs=[input_sequence, input_ligand, input_protein, exhaustiveness], outputs=[out, run_time])
app.launch()
|