demo_plinder_smina

Runtime error

File size: 6,591 Bytes


import time

import gradio as gr

from gradio_molecule3d import Molecule3D

import sys
import os
import os
import numpy as np
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import DataStructs
from rdkit.Chem import RDConfig
from rdkit.Chem import rdBase
import pickle

from Bio.PDB import *
import requests
import subprocess

import mdtraj as md
from enspara import geometry
from sklearn.cluster import DBSCAN
import pandas as pd

os.system("pwd")

os.system("ls")

os.system("touch ligand.sdf")

def run_smina(
    ligand_path, protein_path, out_path, pocket_center, pocket_size, num_poses=1, exhaustiveness=1
):
    """
    Perform docking with Smina.

    Parameters
    ----------
    ligand_path: str or pathlib.Path
        Path to ligand PDBQT file that should be docked.
    protein_path: str or pathlib.Path
        Path to protein PDBQT file that should be docked to.
    out_path: str or pathlib.Path
        Path to which docking poses should be saved, SDF or PDB format.
    pocket_center: iterable of float or int
        Coordinates defining the center of the binding site.
    pocket_size: iterable of float or int
        Lengths of edges defining the binding site.
    num_poses: int
        Maximum number of poses to generate.
    exhaustiveness: int
        Accuracy of docking calculations.

    Returns
    -------
    output_text: str
        The output of the Smina calculation.
    """
    output_text = subprocess.check_output(
        [
            "./smina.static",
            "--ligand",
            str(ligand_path),
            "--receptor",
            str(protein_path),
            "--out",
            str(out_path),
            "--center_x",
            str(pocket_center[0]),
            "--center_y",
            str(pocket_center[1]),
            "--center_z",
            str(pocket_center[2]),
            "--size_x",
            str(pocket_size[0]),
            "--size_y",
            str(pocket_size[1]),
            "--size_z",
            str(pocket_size[2]),
            "--num_modes",
            str(num_poses),
            "--exhaustiveness",
            str(exhaustiveness),
        ],
        universal_newlines=True,  # needed to capture output text
    )
    return output_text

def predict (input_sequence, input_ligand, input_protein):
    start_time = time.time()

    m=Chem.MolFromSmiles(input_ligand)

    m2=Chem.AddHs(m)
    AllChem.EmbedMolecule(m2)
    AllChem.MMFFOptimizeMolecule(m2)
    
    Chem.SDWriter("/usr/src/app/ligand.sdf").write(m2)

    os.system(f"obabel {input_protein.name} -xr -O /usr/src/app/receptor.pdbqt")
    os.system("obabel -isdf /usr/src/app/ligand.sdf -O /usr/src/app/ligand.pdbqt")
    #Find pocket
    pdb = md.load(input_protein.name)
    # run ligsite
    pockets_xyz = geometry.pockets.get_pocket_cells(struct=pdb)
    eps_value = 0.15
    min_samples_value = 5
    dbscan = DBSCAN(eps=eps_value, min_samples=min_samples_value)
    labels = dbscan.fit_predict(pockets_xyz)
    
    # Find the unique clusters and their sizes
    unique_labels, counts = np.unique(labels, return_counts=True)
    
    # Exclude noise points
    valid_clusters = unique_labels[unique_labels != -1]
    valid_counts = counts[unique_labels != -1]
    
    # Find the cluster with the most points (highest density)
    densest_cluster_label = valid_clusters[np.argmax(valid_counts)]
    densest_cluster_points = pockets_xyz[labels == densest_cluster_label]
    
    pocket_center = np.mean(densest_cluster_points, axis=0)
    
    
    import pandas as pd
    
    top_df = pd.DataFrame()
    top_df['serial'] = list(range(densest_cluster_points.shape[0]))
    top_df['name'] = 'PK'
    top_df['element'] = 'H'
    top_df['resSeq'] = list(range(densest_cluster_points.shape[0]))
    top_df['resName'] = 'PCK'
    top_df['chainID'] = 0
    
    pocket_top = md.Topology.from_dataframe(top_df, np.array([]))
    pocket_trj = md.Trajectory(xyz=densest_cluster_points, topology=pocket_top)
    pocket_trj.save('/usr/src/app/pockets_dense.pdb')
    
    parser = PDBParser()
    struc = parser.get_structure("X", "/usr/src/app/pockets_dense.pdb")
    coords = [x.coord for x in struc.get_atoms()]
    pocket_center = np.mean(coords, axis=0)
    output_text = run_smina(
        "/usr/src/app/ligand.pdbqt",
        "/usr/src/app/receptor.pdbqt",
        "/usr/src/app/docking_pose.pdb",
        pocket_center,
        [10,10,10],
    )
    os.system("pdb_rplresname -UNL:LIG /usr/src/app/docking_pose.pdb")
    os.system("pdb_merge /usr/src/app/receptor.pdb /usr/src/app/docking_pose.pdb > /usr/src/app/output.pdb")
    end_time = time.time()
    run_time = end_time - start_time
    return "/usr/src/app/output.pdb", run_time

with gr.Blocks() as app:

    gr.Markdown("# Template for inference")

    gr.Markdown("Title, description, and other information about the model")   
    with gr.Row():
        input_sequence = gr.Textbox(lines=3, label="Input Protein sequence (FASTA)")
        input_ligand = gr.Textbox(lines=3, label="Input ligand SMILES")
        input_protein = gr.File(label="Input protein monomer")
        
    
    # define any options here

    # for automated inference the default options are used
    # slider_option = gr.Slider(0,10, label="Slider Option")
    # checkbox_option = gr.Checkbox(label="Checkbox Option")
    # dropdown_option = gr.Dropdown(["Option 1", "Option 2", "Option 3"], label="Radio Option")

    btn = gr.Button("Run Inference")

    gr.Examples(
        [
            [
                "SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL:SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL",
                "COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O",
                "test_out.pdb"
            ],
        ],
        [input_sequence, input_ligand, input_protein],
    )
    reps =    [
    {
      "model": 0,
      "style": "cartoon",
      "color": "whiteCarbon",
    },
    {
      "model": 0,
      "resname": "UNK",
      "style": "stick",
      "color": "greenCarbon",
    },
        {
      "model": 0,
      "resname": "LIG",
      "style": "stick",
      "color": "greenCarbon",
    }
        
  ]
    
    out = Molecule3D(reps=reps)
    run_time = gr.Textbox(label="Runtime")

    btn.click(predict, inputs=[input_sequence, input_ligand, input_protein], outputs=[out, run_time])

app.launch()