import time import gradio as gr from gradio_molecule3d import Molecule3D import sys import os import os import numpy as np from rdkit import Chem from rdkit.Chem import AllChem from rdkit.Chem import Draw from rdkit.Chem.Draw import IPythonConsole from rdkit.Chem import DataStructs from rdkit.Chem import RDConfig from rdkit.Chem import rdBase import pickle from Bio.PDB import * from Bio import PDB import requests import subprocess import mdtraj as md from enspara import geometry from sklearn.cluster import DBSCAN import pandas as pd def run_smina( ligand_path, protein_path, out_path, pocket_center, pocket_size, num_poses=1, exhaustiveness=1 ): """ Perform docking with Smina. Parameters ---------- ligand_path: str or pathlib.Path Path to ligand PDBQT file that should be docked. protein_path: str or pathlib.Path Path to protein PDBQT file that should be docked to. out_path: str or pathlib.Path Path to which docking poses should be saved, SDF or PDB format. pocket_center: iterable of float or int Coordinates defining the center of the binding site. pocket_size: iterable of float or int Lengths of edges defining the binding site. num_poses: int Maximum number of poses to generate. exhaustiveness: int Accuracy of docking calculations. Returns ------- output_text: str The output of the Smina calculation. """ output_text = subprocess.check_output( [ "./smina.static", "--ligand", str(ligand_path), "--receptor", str(protein_path), "--out", str(out_path), "--center_x", str(pocket_center[0]), "--center_y", str(pocket_center[1]), "--center_z", str(pocket_center[2]), "--size_x", str(pocket_size[0]), "--size_y", str(pocket_size[1]), "--size_z", str(pocket_size[2]), "--num_modes", str(num_poses), "--exhaustiveness", str(exhaustiveness), ], universal_newlines=True, # needed to capture output text ) time.sleep(0.5) return output_text def predict (input_sequence, input_ligand, input_protein, exhaustiveness): """ Main prediction function that calls ligsite and smina Parameters ---------- input_sequence: str monomer sequence input_ligand: str ligand as SMILES string protein_path: gradio.File Gradio file object to monomer protein structure as PDB exhaustiveness: int SMINA parameter Returns ------- output_structures: tuple (output_protein, output_ligand_sdf) run_time: float run time of the program """ start_time = time.time() if input_protein==None: raise gr.Error("need pdb input") m=Chem.MolFromSmiles(input_ligand) m2=Chem.AddHs(m) AllChem.EmbedMolecule(m2) AllChem.MMFFOptimizeMolecule(m2) Chem.SDWriter("/usr/src/app/ligand.sdf").write(m2) os.system(f"obabel {input_protein.name} -xr -O /usr/src/app/receptor.pdbqt") os.system("obabel -isdf /usr/src/app/ligand.sdf -O /usr/src/app/ligand.pdbqt") #Find pocket pdb = md.load(input_protein.name) # run ligsite pockets_xyz = geometry.pockets.get_pocket_cells(struct=pdb) eps_value = 0.15 min_samples_value = 5 dbscan = DBSCAN(eps=eps_value, min_samples=min_samples_value) labels = dbscan.fit_predict(pockets_xyz) # Find the unique clusters and their sizes unique_labels, counts = np.unique(labels, return_counts=True) # Exclude noise points valid_clusters = unique_labels[unique_labels != -1] valid_counts = counts[unique_labels != -1] # Find the cluster with the most points (highest density) densest_cluster_label = valid_clusters[np.argmax(valid_counts)] densest_cluster_points = pockets_xyz[labels == densest_cluster_label] # write cluster to PDB top_df = pd.DataFrame() top_df['serial'] = list(range(densest_cluster_points.shape[0])) top_df['name'] = 'PK' top_df['element'] = 'H' top_df['resSeq'] = list(range(densest_cluster_points.shape[0])) top_df['resName'] = 'PCK' top_df['chainID'] = 0 pocket_top = md.Topology.from_dataframe(top_df, np.array([])) pocket_trj = md.Trajectory(xyz=densest_cluster_points, topology=pocket_top) pocket_trj.save('/usr/src/app/pockets_dense.pdb') parser = PDBParser() struc = parser.get_structure("X", "/usr/src/app/pockets_dense.pdb") coords = [x.coord for x in struc.get_atoms()] pocket_center = np.mean(coords, axis=0) # run smina output_text = run_smina( "/usr/src/app/ligand.pdbqt", "/usr/src/app/receptor.pdbqt", "/usr/src/app/docking_pose.sdf", pocket_center, [10,10,10], exhaustiveness=exhaustiveness ) end_time = time.time() run_time = end_time - start_time return [input_protein.name,"/usr/src/app/docking_pose.sdf"], run_time with gr.Blocks() as app: gr.Markdown("# LigSite + Smina") gr.Markdown("Example model using LigSite and DBScan to find a binding pocket in the protein and then SMINA to dock the ligand in the found pocket.") with gr.Row(): input_sequence = gr.Textbox(lines=3, label="Input Protein sequence (FASTA)") input_ligand = gr.Textbox(lines=3, label="Input ligand SMILES") input_protein = gr.File(label="Input protein monomer") # define any options here # for automated inference the default options are used exhaustiveness = gr.Slider(1,10,value=1, label="Exhaustiveness") # checkbox_option = gr.Checkbox(label="Checkbox Option") # dropdown_option = gr.Dropdown(["Option 1", "Option 2", "Option 3"], label="Radio Option") btn = gr.Button("Run Inference") gr.Examples( [ [ "SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL:SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL", "COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O", "input_test.pdb" ], ], [input_sequence, input_ligand, input_protein], ) reps = [ { "model": 0, "style": "cartoon", "color": "whiteCarbon", }, { "model": 0, "resname": "UNK", "style": "stick", "color": "greenCarbon", }, { "model": 0, "resname": "LIG", "style": "stick", "color": "greenCarbon", }, { "model": 1, "style": "stick", "color": "greenCarbon", } ] out = Molecule3D(reps=reps) run_time = gr.Textbox(label="Runtime") btn.click(predict, inputs=[input_sequence, input_ligand, input_protein, exhaustiveness], outputs=[out, run_time]) app.launch()