import time import gradio as gr from gradio_molecule3d import Molecule3D import sys import os import os import numpy as np from rdkit import Chem from rdkit.Chem import AllChem from rdkit.Chem import Draw from rdkit.Chem.Draw import IPythonConsole from rdkit.Chem import DataStructs from rdkit.Chem import RDConfig from rdkit.Chem import rdBase import pickle from Bio.PDB import * import requests import subprocess import mdtraj as md from enspara import geometry from sklearn.cluster import DBSCAN import pandas as pd os.system("pwd") os.system("ls") os.system("touch ligand.sdf") def run_smina( ligand_path, protein_path, out_path, pocket_center, pocket_size, num_poses=1, exhaustiveness=1 ): """ Perform docking with Smina. Parameters ---------- ligand_path: str or pathlib.Path Path to ligand PDBQT file that should be docked. protein_path: str or pathlib.Path Path to protein PDBQT file that should be docked to. out_path: str or pathlib.Path Path to which docking poses should be saved, SDF or PDB format. pocket_center: iterable of float or int Coordinates defining the center of the binding site. pocket_size: iterable of float or int Lengths of edges defining the binding site. num_poses: int Maximum number of poses to generate. exhaustiveness: int Accuracy of docking calculations. Returns ------- output_text: str The output of the Smina calculation. """ output_text = subprocess.check_output( [ "./smina.static", "--ligand", str(ligand_path), "--receptor", str(protein_path), "--out", str(out_path), "--center_x", str(pocket_center[0]), "--center_y", str(pocket_center[1]), "--center_z", str(pocket_center[2]), "--size_x", str(pocket_size[0]), "--size_y", str(pocket_size[1]), "--size_z", str(pocket_size[2]), "--num_modes", str(num_poses), "--exhaustiveness", str(exhaustiveness), ], universal_newlines=True, # needed to capture output text ) return output_text def predict (input_sequence, input_ligand, input_protein): start_time = time.time() m=Chem.MolFromSmiles(input_ligand) m2=Chem.AddHs(m) AllChem.EmbedMolecule(m2) AllChem.MMFFOptimizeMolecule(m2) Chem.SDWriter("/usr/src/app/ligand.sdf").write(m2) os.system(f"obabel {input_protein.name} -xr -O /usr/src/app/receptor.pdbqt") os.system("obabel -isdf /usr/src/app/ligand.sdf -O /usr/src/app/ligand.pdbqt") #Find pocket pdb = md.load(input_protein.name) # run ligsite pockets_xyz = geometry.pockets.get_pocket_cells(struct=pdb) eps_value = 0.15 min_samples_value = 5 dbscan = DBSCAN(eps=eps_value, min_samples=min_samples_value) labels = dbscan.fit_predict(pockets_xyz) # Find the unique clusters and their sizes unique_labels, counts = np.unique(labels, return_counts=True) # Exclude noise points valid_clusters = unique_labels[unique_labels != -1] valid_counts = counts[unique_labels != -1] # Find the cluster with the most points (highest density) densest_cluster_label = valid_clusters[np.argmax(valid_counts)] densest_cluster_points = pockets_xyz[labels == densest_cluster_label] pocket_center = np.mean(densest_cluster_points, axis=0) import pandas as pd top_df = pd.DataFrame() top_df['serial'] = list(range(densest_cluster_points.shape[0])) top_df['name'] = 'PK' top_df['element'] = 'H' top_df['resSeq'] = list(range(densest_cluster_points.shape[0])) top_df['resName'] = 'PCK' top_df['chainID'] = 0 pocket_top = md.Topology.from_dataframe(top_df, np.array([])) pocket_trj = md.Trajectory(xyz=densest_cluster_points, topology=pocket_top) pocket_trj.save('/usr/src/app/pockets_dense.pdb') parser = PDBParser() struc = parser.get_structure("X", "/usr/src/app/pockets_dense.pdb") coords = [x.coord for x in struc.get_atoms()] pocket_center = np.mean(coords, axis=0) output_text = run_smina( "/usr/src/app/ligand.pdbqt", "/usr/src/app/receptor.pdbqt", "/usr/src/app/docking_pose.pdb", pocket_center, [10,10,10], ) os.system("pdb_rplresname -UNL:LIG /usr/src/app/docking_pose.pdb") os.system("pdb_merge /usr/src/app/receptor.pdb /usr/src/app/docking_pose.pdb > /usr/src/app/output.pdb") end_time = time.time() run_time = end_time - start_time return "/usr/src/app/output.pdb", run_time with gr.Blocks() as app: gr.Markdown("# Template for inference") gr.Markdown("Title, description, and other information about the model") with gr.Row(): input_sequence = gr.Textbox(lines=3, label="Input Protein sequence (FASTA)") input_ligand = gr.Textbox(lines=3, label="Input ligand SMILES") input_protein = gr.File(label="Input protein monomer") # define any options here # for automated inference the default options are used # slider_option = gr.Slider(0,10, label="Slider Option") # checkbox_option = gr.Checkbox(label="Checkbox Option") # dropdown_option = gr.Dropdown(["Option 1", "Option 2", "Option 3"], label="Radio Option") btn = gr.Button("Run Inference") gr.Examples( [ [ "SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL:SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL", "COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O", "test_out.pdb" ], ], [input_sequence, input_ligand, input_protein], ) reps = [ { "model": 0, "style": "cartoon", "color": "whiteCarbon", }, { "model": 0, "resname": "UNK", "style": "stick", "color": "greenCarbon", }, { "model": 0, "resname": "LIG", "style": "stick", "color": "greenCarbon", } ] out = Molecule3D(reps=reps) run_time = gr.Textbox(label="Runtime") btn.click(predict, inputs=[input_sequence, input_ligand, input_protein], outputs=[out, run_time]) app.launch()