Spaces:
Runtime error
Runtime error
File size: 7,530 Bytes
bebad14 dffaf30 bebad14 e37d702 bebad14 e37d702 28cb117 e37d702 f1fe378 e37d702 28cb117 4853a01 bebad14 e37d702 18dabba e37d702 86237ae e37d702 86237ae dd8b8f6 e37d702 2932859 e37d702 86237ae e37d702 86237ae e37d702 c924062 86237ae e37d702 0f43758 16bde42 ff98dc4 16bde42 ff98dc4 bebad14 86237ae bebad14 f624b87 bebad14 18dabba bebad14 18dabba bebad14 f354223 bebad14 f354223 bebad14 28cb117 bebad14 44470f9 28cb117 5122f94 28cb117 f354223 28cb117 130e4f7 28cb117 5122f94 28cb117 4853a01 28cb117 4853a01 28cb117 bebad14 4853a01 dffaf30 bebad14 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 |
import time
import gradio as gr
from gradio_molecule3d import Molecule3D
import sys
import os
import os
import numpy as np
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import DataStructs
from rdkit.Chem import RDConfig
from rdkit.Chem import rdBase
import pickle
from Bio.PDB import *
import requests
import subprocess
import mdtraj as md
from enspara import geometry
from sklearn.cluster import DBSCAN
import pandas as pd
os.system("pwd")
os.system("ls")
os.system("touch ligand.sdf")
def run_smina(
ligand_path, protein_path, out_path, pocket_center, pocket_size, num_poses=1, exhaustiveness=1
):
"""
Perform docking with Smina.
Parameters
----------
ligand_path: str or pathlib.Path
Path to ligand PDBQT file that should be docked.
protein_path: str or pathlib.Path
Path to protein PDBQT file that should be docked to.
out_path: str or pathlib.Path
Path to which docking poses should be saved, SDF or PDB format.
pocket_center: iterable of float or int
Coordinates defining the center of the binding site.
pocket_size: iterable of float or int
Lengths of edges defining the binding site.
num_poses: int
Maximum number of poses to generate.
exhaustiveness: int
Accuracy of docking calculations.
Returns
-------
output_text: str
The output of the Smina calculation.
"""
output_text = subprocess.check_output(
[
"./smina.static",
"--ligand",
str(ligand_path),
"--receptor",
str(protein_path),
"--out",
str(out_path),
"--center_x",
str(pocket_center[0]),
"--center_y",
str(pocket_center[1]),
"--center_z",
str(pocket_center[2]),
"--size_x",
str(pocket_size[0]),
"--size_y",
str(pocket_size[1]),
"--size_z",
str(pocket_size[2]),
"--num_modes",
str(num_poses),
"--exhaustiveness",
str(exhaustiveness),
],
universal_newlines=True, # needed to capture output text
)
return output_text
def predict (input_sequence, input_ligand, input_protein):
start_time = time.time()
if input_protein==None:
raise gr.Error("need pdb input")
m=Chem.MolFromSmiles(input_ligand)
m2=Chem.AddHs(m)
AllChem.EmbedMolecule(m2)
AllChem.MMFFOptimizeMolecule(m2)
Chem.SDWriter("/usr/src/app/ligand.sdf").write(m2)
os.system(f"obabel {input_protein.name} -xr -O /usr/src/app/receptor.pdbqt")
os.system("obabel -isdf /usr/src/app/ligand.sdf -O /usr/src/app/ligand.pdbqt")
#Find pocket
pdb = md.load(input_protein.name)
# run ligsite
pockets_xyz = geometry.pockets.get_pocket_cells(struct=pdb)
eps_value = 0.15
min_samples_value = 5
dbscan = DBSCAN(eps=eps_value, min_samples=min_samples_value)
labels = dbscan.fit_predict(pockets_xyz)
# Find the unique clusters and their sizes
unique_labels, counts = np.unique(labels, return_counts=True)
# Exclude noise points
valid_clusters = unique_labels[unique_labels != -1]
valid_counts = counts[unique_labels != -1]
# Find the cluster with the most points (highest density)
densest_cluster_label = valid_clusters[np.argmax(valid_counts)]
densest_cluster_points = pockets_xyz[labels == densest_cluster_label]
pocket_center = np.mean(densest_cluster_points, axis=0)
import pandas as pd
top_df = pd.DataFrame()
top_df['serial'] = list(range(densest_cluster_points.shape[0]))
top_df['name'] = 'PK'
top_df['element'] = 'H'
top_df['resSeq'] = list(range(densest_cluster_points.shape[0]))
top_df['resName'] = 'PCK'
top_df['chainID'] = 0
pocket_top = md.Topology.from_dataframe(top_df, np.array([]))
pocket_trj = md.Trajectory(xyz=densest_cluster_points, topology=pocket_top)
pocket_trj.save('/usr/src/app/pockets_dense.pdb')
parser = PDBParser()
struc = parser.get_structure("X", "/usr/src/app/pockets_dense.pdb")
coords = [x.coord for x in struc.get_atoms()]
pocket_center = np.mean(coords, axis=0)
output_text = run_smina(
"/usr/src/app/ligand.pdbqt",
"/usr/src/app/receptor.pdbqt",
"/usr/src/app/docking_pose.pdb",
pocket_center,
[10,10,10],
)
os.system("pdb_rplresname -UNL:LIG /usr/src/app/docking_pose.pdb > /usr/src/app/docking_pose_renamed.pdb")
parser = PDB.PDBParser(QUIET=True)
# Parse the PDB files
structure1 = parser.get_structure('Structure1', input_protein.name)
structure2 = parser.get_structure('Structure2', "/usr/src/app/docking_pose_renamed.pdb")
# Create a new structure for the combined output
combined_structure = PDB.Structure.Structure('Combined_Structure')
# Add models from the first structure
for model in structure1:
combined_structure.add(model)
# Add models from the second structure
for model in structure2:
combined_structure.add(model)
# Save the combined structure to the output file
io = PDB.PDBIO()
io.set_structure(combined_structure)
io.save("/usr/src/app/output.pdb")
#os.system(f"pdb_merge {input_protein.name} /usr/src/app/docking_pose_renamed.pdb > /usr/src/app/output.pdb")
end_time = time.time()
run_time = end_time - start_time
return "/usr/src/app/output.pdb", run_time
with gr.Blocks() as app:
gr.Markdown("# LigSite + Smina")
gr.Markdown("Example model using LigSite and DBScan to find a binding pocket in the protein and then SMINA to dock the ligand in the found pocket.")
with gr.Row():
input_sequence = gr.Textbox(lines=3, label="Input Protein sequence (FASTA)")
input_ligand = gr.Textbox(lines=3, label="Input ligand SMILES")
input_protein = gr.File(label="Input protein monomer")
# define any options here
# for automated inference the default options are used
# slider_option = gr.Slider(0,10, label="Slider Option")
# checkbox_option = gr.Checkbox(label="Checkbox Option")
# dropdown_option = gr.Dropdown(["Option 1", "Option 2", "Option 3"], label="Radio Option")
btn = gr.Button("Run Inference")
gr.Examples(
[
[
"SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL:SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL",
"COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O",
"test_out.pdb"
],
],
[input_sequence, input_ligand, input_protein],
)
reps = [
{
"model": 0,
"style": "cartoon",
"color": "whiteCarbon",
},
{
"model": 0,
"resname": "UNK",
"style": "stick",
"color": "greenCarbon",
},
{
"model": 0,
"resname": "LIG",
"style": "stick",
"color": "greenCarbon",
}
]
out = Molecule3D(reps=reps)
run_time = gr.Textbox(label="Runtime")
btn.click(predict, inputs=[input_sequence, input_ligand, input_protein], outputs=[out, run_time])
app.launch()
|