File size: 7,141 Bytes
bebad14
 
 
dffaf30
 
bebad14
 
e37d702
 
 
 
 
 
 
 
 
 
 
 
bebad14
e37d702
89a82df
e37d702
 
28cb117
e37d702
 
 
 
 
f1fe378
e37d702
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c35d83
e37d702
28cb117
ff1a3bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bebad14
e37d702
18dabba
 
e37d702
 
 
 
 
 
86237ae
e37d702
86237ae
dd8b8f6
ff1a3bf
e37d702
2932859
e37d702
 
 
 
 
 
 
 
 
 
 
 
 
 
ff1a3bf
e37d702
 
 
 
 
 
 
 
 
86237ae
e37d702
86237ae
e37d702
 
ff1a3bf
c924062
86237ae
 
c2f27bf
e37d702
 
ff1a3bf
 
bebad14
 
b845900
bebad14
f624b87
bebad14
18dabba
bebad14
18dabba
bebad14
f354223
bebad14
f354223
 
bebad14
 
28cb117
4c35d83
bebad14
 
 
44470f9
28cb117
 
 
 
5122f94
28cb117
ff1a3bf
28cb117
 
130e4f7
28cb117
 
 
 
 
 
 
 
5122f94
 
28cb117
 
4853a01
 
 
 
 
 
c2f27bf
 
 
 
 
28cb117
4853a01
28cb117
 
 
bebad14
 
ff1a3bf
dffaf30
bebad14
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233

import time

import gradio as gr

from gradio_molecule3d import Molecule3D

import sys
import os
import os
import numpy as np
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import DataStructs
from rdkit.Chem import RDConfig
from rdkit.Chem import rdBase
import pickle

from Bio.PDB import *
from Bio import PDB
import requests
import subprocess

import mdtraj as md
from enspara import geometry
from sklearn.cluster import DBSCAN
import pandas as pd


def run_smina(
    ligand_path, protein_path, out_path, pocket_center, pocket_size, num_poses=1, exhaustiveness=1
):
    """
    Perform docking with Smina.

    Parameters
    ----------
    ligand_path: str or pathlib.Path
        Path to ligand PDBQT file that should be docked.
    protein_path: str or pathlib.Path
        Path to protein PDBQT file that should be docked to.
    out_path: str or pathlib.Path
        Path to which docking poses should be saved, SDF or PDB format.
    pocket_center: iterable of float or int
        Coordinates defining the center of the binding site.
    pocket_size: iterable of float or int
        Lengths of edges defining the binding site.
    num_poses: int
        Maximum number of poses to generate.
    exhaustiveness: int
        Accuracy of docking calculations.

    Returns
    -------
    output_text: str
        The output of the Smina calculation.
    """
    output_text = subprocess.check_output(
        [
            "./smina.static",
            "--ligand",
            str(ligand_path),
            "--receptor",
            str(protein_path),
            "--out",
            str(out_path),
            "--center_x",
            str(pocket_center[0]),
            "--center_y",
            str(pocket_center[1]),
            "--center_z",
            str(pocket_center[2]),
            "--size_x",
            str(pocket_size[0]),
            "--size_y",
            str(pocket_size[1]),
            "--size_z",
            str(pocket_size[2]),
            "--num_modes",
            str(num_poses),
            "--exhaustiveness",
            str(exhaustiveness),
        ],
        universal_newlines=True,  # needed to capture output text
    )
    time.sleep(0.5)
    return output_text

def predict (input_sequence, input_ligand, input_protein, exhaustiveness):
    """
    Main prediction function that calls ligsite and smina

    Parameters
    ----------
    input_sequence: str
        monomer sequence
    input_ligand: str
        ligand as SMILES string
    protein_path: gradio.File
        Gradio file object to monomer protein structure as PDB
    exhaustiveness: int
        SMINA parameter

    Returns
    -------
    output_structures: tuple
        (output_protein, output_ligand_sdf)
    run_time: float
        run time of the program
    """
    start_time = time.time()

    if input_protein==None:
        raise gr.Error("need pdb input")
    m=Chem.MolFromSmiles(input_ligand)

    m2=Chem.AddHs(m)
    AllChem.EmbedMolecule(m2)
    AllChem.MMFFOptimizeMolecule(m2)
    
    Chem.SDWriter("/usr/src/app/ligand.sdf").write(m2)

    os.system(f"obabel {input_protein.name} -xr -O /usr/src/app/receptor.pdbqt")
    os.system("obabel -isdf /usr/src/app/ligand.sdf -O /usr/src/app/ligand.pdbqt")

    #Find pocket
    pdb = md.load(input_protein.name)
    # run ligsite
    pockets_xyz = geometry.pockets.get_pocket_cells(struct=pdb)
    eps_value = 0.15
    min_samples_value = 5
    dbscan = DBSCAN(eps=eps_value, min_samples=min_samples_value)
    labels = dbscan.fit_predict(pockets_xyz)
    # Find the unique clusters and their sizes
    unique_labels, counts = np.unique(labels, return_counts=True)
    # Exclude noise points
    valid_clusters = unique_labels[unique_labels != -1]
    valid_counts = counts[unique_labels != -1]
    # Find the cluster with the most points (highest density)
    densest_cluster_label = valid_clusters[np.argmax(valid_counts)]
    densest_cluster_points = pockets_xyz[labels == densest_cluster_label]
    # write cluster to PDB   
    top_df = pd.DataFrame()
    top_df['serial'] = list(range(densest_cluster_points.shape[0]))
    top_df['name'] = 'PK'
    top_df['element'] = 'H'
    top_df['resSeq'] = list(range(densest_cluster_points.shape[0]))
    top_df['resName'] = 'PCK'
    top_df['chainID'] = 0
    pocket_top = md.Topology.from_dataframe(top_df, np.array([]))
    pocket_trj = md.Trajectory(xyz=densest_cluster_points, topology=pocket_top)
    pocket_trj.save('/usr/src/app/pockets_dense.pdb')
    parser = PDBParser()
    struc = parser.get_structure("X", "/usr/src/app/pockets_dense.pdb")
    coords = [x.coord for x in struc.get_atoms()]
    pocket_center = np.mean(coords, axis=0)
    # run smina
    output_text = run_smina(
        "/usr/src/app/ligand.pdbqt",
        "/usr/src/app/receptor.pdbqt",
        "/usr/src/app/docking_pose.sdf",
        pocket_center,
        [10,10,10],
        exhaustiveness=exhaustiveness
    )    
    end_time = time.time()
    run_time = end_time - start_time
    return [input_protein.name,"/usr/src/app/docking_pose.sdf"], run_time

with gr.Blocks() as app:

    gr.Markdown("# LigSite + Smina")

    gr.Markdown("Example model using LigSite and DBScan to find a binding pocket in the protein and then SMINA to dock the ligand in the found pocket.")   
    with gr.Row():
        input_sequence = gr.Textbox(lines=3, label="Input Protein sequence (FASTA)")
        input_ligand = gr.Textbox(lines=3, label="Input ligand SMILES")
        input_protein = gr.File(label="Input protein monomer")
        
    
    # define any options here
    # for automated inference the default options are used
    exhaustiveness = gr.Slider(1,10,value=1, label="Exhaustiveness")
    # checkbox_option = gr.Checkbox(label="Checkbox Option")
    # dropdown_option = gr.Dropdown(["Option 1", "Option 2", "Option 3"], label="Radio Option")

    btn = gr.Button("Run Inference")

    gr.Examples(
        [
            [
                "SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL:SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL",
                "COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O",
                "input_test.pdb"
            ],
        ],
        [input_sequence, input_ligand, input_protein],
    )
    reps =    [
    {
      "model": 0,
      "style": "cartoon",
      "color": "whiteCarbon",
    },
    {
      "model": 0,
      "resname": "UNK",
      "style": "stick",
      "color": "greenCarbon",
    },
        {
      "model": 0,
      "resname": "LIG",
      "style": "stick",
      "color": "greenCarbon",
    },
         {
      "model": 1,
      "style": "stick",
      "color": "greenCarbon",
    }
        
  ]
    
    out = Molecule3D(reps=reps)
    run_time = gr.Textbox(label="Runtime")

    btn.click(predict, inputs=[input_sequence, input_ligand, input_protein, exhaustiveness], outputs=[out, run_time])

app.launch()