Spaces:
Runtime error
Runtime error
File size: 6,591 Bytes
bebad14 dffaf30 bebad14 e37d702 bebad14 e37d702 28cb117 e37d702 f1fe378 e37d702 28cb117 4853a01 bebad14 e37d702 86237ae e37d702 86237ae dd8b8f6 e37d702 2932859 e37d702 86237ae e37d702 86237ae e37d702 c924062 86237ae e37d702 86237ae bebad14 86237ae bebad14 f624b87 bebad14 f354223 bebad14 f354223 bebad14 28cb117 bebad14 44470f9 28cb117 5122f94 28cb117 f354223 28cb117 130e4f7 28cb117 5122f94 28cb117 4853a01 28cb117 4853a01 28cb117 bebad14 4853a01 dffaf30 bebad14 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 |
import time
import gradio as gr
from gradio_molecule3d import Molecule3D
import sys
import os
import os
import numpy as np
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import DataStructs
from rdkit.Chem import RDConfig
from rdkit.Chem import rdBase
import pickle
from Bio.PDB import *
import requests
import subprocess
import mdtraj as md
from enspara import geometry
from sklearn.cluster import DBSCAN
import pandas as pd
os.system("pwd")
os.system("ls")
os.system("touch ligand.sdf")
def run_smina(
ligand_path, protein_path, out_path, pocket_center, pocket_size, num_poses=1, exhaustiveness=1
):
"""
Perform docking with Smina.
Parameters
----------
ligand_path: str or pathlib.Path
Path to ligand PDBQT file that should be docked.
protein_path: str or pathlib.Path
Path to protein PDBQT file that should be docked to.
out_path: str or pathlib.Path
Path to which docking poses should be saved, SDF or PDB format.
pocket_center: iterable of float or int
Coordinates defining the center of the binding site.
pocket_size: iterable of float or int
Lengths of edges defining the binding site.
num_poses: int
Maximum number of poses to generate.
exhaustiveness: int
Accuracy of docking calculations.
Returns
-------
output_text: str
The output of the Smina calculation.
"""
output_text = subprocess.check_output(
[
"./smina.static",
"--ligand",
str(ligand_path),
"--receptor",
str(protein_path),
"--out",
str(out_path),
"--center_x",
str(pocket_center[0]),
"--center_y",
str(pocket_center[1]),
"--center_z",
str(pocket_center[2]),
"--size_x",
str(pocket_size[0]),
"--size_y",
str(pocket_size[1]),
"--size_z",
str(pocket_size[2]),
"--num_modes",
str(num_poses),
"--exhaustiveness",
str(exhaustiveness),
],
universal_newlines=True, # needed to capture output text
)
return output_text
def predict (input_sequence, input_ligand, input_protein):
start_time = time.time()
m=Chem.MolFromSmiles(input_ligand)
m2=Chem.AddHs(m)
AllChem.EmbedMolecule(m2)
AllChem.MMFFOptimizeMolecule(m2)
Chem.SDWriter("/usr/src/app/ligand.sdf").write(m2)
os.system(f"obabel {input_protein.name} -xr -O /usr/src/app/receptor.pdbqt")
os.system("obabel -isdf /usr/src/app/ligand.sdf -O /usr/src/app/ligand.pdbqt")
#Find pocket
pdb = md.load(input_protein.name)
# run ligsite
pockets_xyz = geometry.pockets.get_pocket_cells(struct=pdb)
eps_value = 0.15
min_samples_value = 5
dbscan = DBSCAN(eps=eps_value, min_samples=min_samples_value)
labels = dbscan.fit_predict(pockets_xyz)
# Find the unique clusters and their sizes
unique_labels, counts = np.unique(labels, return_counts=True)
# Exclude noise points
valid_clusters = unique_labels[unique_labels != -1]
valid_counts = counts[unique_labels != -1]
# Find the cluster with the most points (highest density)
densest_cluster_label = valid_clusters[np.argmax(valid_counts)]
densest_cluster_points = pockets_xyz[labels == densest_cluster_label]
pocket_center = np.mean(densest_cluster_points, axis=0)
import pandas as pd
top_df = pd.DataFrame()
top_df['serial'] = list(range(densest_cluster_points.shape[0]))
top_df['name'] = 'PK'
top_df['element'] = 'H'
top_df['resSeq'] = list(range(densest_cluster_points.shape[0]))
top_df['resName'] = 'PCK'
top_df['chainID'] = 0
pocket_top = md.Topology.from_dataframe(top_df, np.array([]))
pocket_trj = md.Trajectory(xyz=densest_cluster_points, topology=pocket_top)
pocket_trj.save('/usr/src/app/pockets_dense.pdb')
parser = PDBParser()
struc = parser.get_structure("X", "/usr/src/app/pockets_dense.pdb")
coords = [x.coord for x in struc.get_atoms()]
pocket_center = np.mean(coords, axis=0)
output_text = run_smina(
"/usr/src/app/ligand.pdbqt",
"/usr/src/app/receptor.pdbqt",
"/usr/src/app/docking_pose.pdb",
pocket_center,
[10,10,10],
)
os.system("pdb_rplresname -UNL:LIG /usr/src/app/docking_pose.pdb")
os.system("pdb_merge /usr/src/app/receptor.pdb /usr/src/app/docking_pose.pdb > /usr/src/app/output.pdb")
end_time = time.time()
run_time = end_time - start_time
return "/usr/src/app/output.pdb", run_time
with gr.Blocks() as app:
gr.Markdown("# Template for inference")
gr.Markdown("Title, description, and other information about the model")
with gr.Row():
input_sequence = gr.Textbox(lines=3, label="Input Protein sequence (FASTA)")
input_ligand = gr.Textbox(lines=3, label="Input ligand SMILES")
input_protein = gr.File(label="Input protein monomer")
# define any options here
# for automated inference the default options are used
# slider_option = gr.Slider(0,10, label="Slider Option")
# checkbox_option = gr.Checkbox(label="Checkbox Option")
# dropdown_option = gr.Dropdown(["Option 1", "Option 2", "Option 3"], label="Radio Option")
btn = gr.Button("Run Inference")
gr.Examples(
[
[
"SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL:SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL",
"COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O",
"test_out.pdb"
],
],
[input_sequence, input_ligand, input_protein],
)
reps = [
{
"model": 0,
"style": "cartoon",
"color": "whiteCarbon",
},
{
"model": 0,
"resname": "UNK",
"style": "stick",
"color": "greenCarbon",
},
{
"model": 0,
"resname": "LIG",
"style": "stick",
"color": "greenCarbon",
}
]
out = Molecule3D(reps=reps)
run_time = gr.Textbox(label="Runtime")
btn.click(predict, inputs=[input_sequence, input_ligand, input_protein], outputs=[out, run_time])
app.launch()
|