import time import gradio as gr from gradio_molecule3d import Molecule3D import sys import os import os import numpy as np from rdkit import Chem from rdkit.Chem import AllChem from rdkit.Chem import Draw from rdkit.Chem.Draw import IPythonConsole from rdkit.Chem import DataStructs from rdkit.Chem import RDConfig from rdkit.Chem import rdBase import pickle from Bio.PDB import * import requests import subprocess import mdtraj as md from enspara import geometry from sklearn.cluster import DBSCAN import pandas as pd os.system("pwd") os.system("ls") os.system("touch ligand.sdf") def run_smina( ligand_path, protein_path, out_path, pocket_center, pocket_size, num_poses=1, exhaustiveness=1 ): """ Perform docking with Smina. Parameters ---------- ligand_path: str or pathlib.Path Path to ligand PDBQT file that should be docked. protein_path: str or pathlib.Path Path to protein PDBQT file that should be docked to. out_path: str or pathlib.Path Path to which docking poses should be saved, SDF or PDB format. pocket_center: iterable of float or int Coordinates defining the center of the binding site. pocket_size: iterable of float or int Lengths of edges defining the binding site. num_poses: int Maximum number of poses to generate. exhaustiveness: int Accuracy of docking calculations. Returns ------- output_text: str The output of the Smina calculation. """ output_text = subprocess.check_output( [ "./smina.static", "--ligand", str(ligand_path), "--receptor", str(protein_path), "--out", str(out_path), "--center_x", str(pocket_center[0]), "--center_y", str(pocket_center[1]), "--center_z", str(pocket_center[2]), "--size_x", str(pocket_size[0]), "--size_y", str(pocket_size[1]), "--size_z", str(pocket_size[2]), "--num_modes", str(num_poses), "--exhaustiveness", str(exhaustiveness), ], universal_newlines=True, # needed to capture output text ) return output_text def predict (input_sequence, input_ligand, input_protein): start_time = time.time() if input_protein==None: raise gr.Error("need pdb input") m=Chem.MolFromSmiles(input_ligand) m2=Chem.AddHs(m) AllChem.EmbedMolecule(m2) AllChem.MMFFOptimizeMolecule(m2) Chem.SDWriter("/usr/src/app/ligand.sdf").write(m2) os.system(f"obabel {input_protein.name} -xr -O /usr/src/app/receptor.pdbqt") os.system("obabel -isdf /usr/src/app/ligand.sdf -O /usr/src/app/ligand.pdbqt") #Find pocket pdb = md.load(input_protein.name) # run ligsite pockets_xyz = geometry.pockets.get_pocket_cells(struct=pdb) eps_value = 0.15 min_samples_value = 5 dbscan = DBSCAN(eps=eps_value, min_samples=min_samples_value) labels = dbscan.fit_predict(pockets_xyz) # Find the unique clusters and their sizes unique_labels, counts = np.unique(labels, return_counts=True) # Exclude noise points valid_clusters = unique_labels[unique_labels != -1] valid_counts = counts[unique_labels != -1] # Find the cluster with the most points (highest density) densest_cluster_label = valid_clusters[np.argmax(valid_counts)] densest_cluster_points = pockets_xyz[labels == densest_cluster_label] pocket_center = np.mean(densest_cluster_points, axis=0) import pandas as pd top_df = pd.DataFrame() top_df['serial'] = list(range(densest_cluster_points.shape[0])) top_df['name'] = 'PK' top_df['element'] = 'H' top_df['resSeq'] = list(range(densest_cluster_points.shape[0])) top_df['resName'] = 'PCK' top_df['chainID'] = 0 pocket_top = md.Topology.from_dataframe(top_df, np.array([])) pocket_trj = md.Trajectory(xyz=densest_cluster_points, topology=pocket_top) pocket_trj.save('/usr/src/app/pockets_dense.pdb') parser = PDBParser() struc = parser.get_structure("X", "/usr/src/app/pockets_dense.pdb") coords = [x.coord for x in struc.get_atoms()] pocket_center = np.mean(coords, axis=0) output_text = run_smina( "/usr/src/app/ligand.pdbqt", "/usr/src/app/receptor.pdbqt", "/usr/src/app/docking_pose.pdb", pocket_center, [10,10,10], ) os.system("pdb_rplresname -UNL:LIG /usr/src/app/docking_pose.pdb > /usr/src/app/docking_pose_renamed.pdb") parser = PDB.PDBParser(QUIET=True) # Parse the PDB files structure1 = parser.get_structure('Structure1', input_protein.name) structure2 = parser.get_structure('Structure2', "/usr/src/app/docking_pose_renamed.pdb") # Create a new structure for the combined output combined_structure = PDB.Structure.Structure('Combined_Structure') # Add models from the first structure for model in structure1: combined_structure.add(model) # Add models from the second structure for model in structure2: combined_structure.add(model) # Save the combined structure to the output file io = PDB.PDBIO() io.set_structure(combined_structure) io.save("/usr/src/app/output.pdb") #os.system(f"pdb_merge {input_protein.name} /usr/src/app/docking_pose_renamed.pdb > /usr/src/app/output.pdb") end_time = time.time() run_time = end_time - start_time return "/usr/src/app/output.pdb", run_time with gr.Blocks() as app: gr.Markdown("# LigSite + Smina") gr.Markdown("Example model using LigSite and DBScan to find a binding pocket in the protein and then SMINA to dock the ligand in the found pocket.") with gr.Row(): input_sequence = gr.Textbox(lines=3, label="Input Protein sequence (FASTA)") input_ligand = gr.Textbox(lines=3, label="Input ligand SMILES") input_protein = gr.File(label="Input protein monomer") # define any options here # for automated inference the default options are used # slider_option = gr.Slider(0,10, label="Slider Option") # checkbox_option = gr.Checkbox(label="Checkbox Option") # dropdown_option = gr.Dropdown(["Option 1", "Option 2", "Option 3"], label="Radio Option") btn = gr.Button("Run Inference") gr.Examples( [ [ "SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL:SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL", "COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O", "test_out.pdb" ], ], [input_sequence, input_ligand, input_protein], ) reps = [ { "model": 0, "style": "cartoon", "color": "whiteCarbon", }, { "model": 0, "resname": "UNK", "style": "stick", "color": "greenCarbon", }, { "model": 0, "resname": "LIG", "style": "stick", "color": "greenCarbon", } ] out = Molecule3D(reps=reps) run_time = gr.Textbox(label="Runtime") btn.click(predict, inputs=[input_sequence, input_ligand, input_protein], outputs=[out, run_time]) app.launch()