Spaces:
Sleeping
Sleeping
import hydra | |
import os | |
import pathlib | |
from pathlib import Path | |
import sys | |
import gradio as gr | |
import pandas as pd | |
from rdkit import Chem | |
from rdkit.Chem import RDConfig, Descriptors, Lipinski, Crippen | |
from deepscreen.predict import predict | |
sys.path.append(os.path.join(RDConfig.RDContribDir, 'SA_Score')) | |
import sascorer | |
ROOT = Path.cwd() | |
# TODO refactor caching with LRU | |
# MOL_MAP = {} | |
# def cached_mol(smiles): | |
# if smiles not in MOL_MAP: | |
# MOL_MAP.update({smiles: Chem.MolFromSmiles(smiles)}) | |
# return MOL_MAP.get(smiles) | |
def sa_score(row): | |
return sascorer.calculateScore(Chem.MolFromSmiles(row['X1'])) | |
def mw(row): | |
return Chem.Descriptors.MolWt(Chem.MolFromSmiles(row['X1'])) | |
def hbd(row): | |
return Lipinski.NumHDonors(Chem.MolFromSmiles(row['X1'])) | |
def hba(row): | |
return Lipinski.NumHAcceptors(Chem.MolFromSmiles(row['X1'])) | |
def logp(row): | |
return Crippen.MolLogP(Chem.MolFromSmiles(row['X1'])) | |
SCORE_MAP = { | |
'SAscore': sa_score, | |
'RAscore': None, # https://github.com/reymond-group/RAscore | |
'SCScore': None, # https://pubs.acs.org/doi/10.1021/acs.jcim.7b00622 | |
'LogP': logp, # https://www.rdkit.org/docs/source/rdkit.Chem.Crippen.html | |
'MW': mw, # https://www.rdkit.org/docs/source/rdkit.Chem.Descriptors.html | |
'HBD': hbd, # https://www.rdkit.org/docs/source/rdkit.Chem.Lipinski.html | |
'HBA': hba, # https://www.rdkit.org/docs/source/rdkit.Chem.Lipinski.html | |
'TopoPSA': None, # http://mordred-descriptor.github.io/documentation/master/api/mordred.TopoPSA.html | |
} | |
FILTER_MAP = { | |
'PAINS filter': None, | |
"Lipinski's rule of five": None, # https://gist.github.com/strets123/fdc4db6d450b66345f46 | |
'ADMET filter': None, | |
'TCL filter': None | |
} | |
TASK_MAP = { | |
'Drug-target interaction': 'binary', | |
'Drug-target binding affinity': 'regression', | |
} | |
PRESET_MAP = { | |
'DeepDTA': 'deep_dta', | |
'GraphDTA': 'graph_dta' | |
} | |
TARGET_FAMILY_MAP = { | |
'Auto-detect': 'detect', | |
'Manually-labelled': 'labelled', | |
'Library-labelled': 'labelled', | |
'Kinases': 'kinases', | |
'Non-kinase enzymes': 'non-kinase_enzymes', | |
'Membrane receptors': 'membrane_receptors', | |
'Nuclear receptors': 'nuclear_receptors', | |
'Ion channels': 'ion_channels', | |
'Other protein targets': 'other_protein_targets', | |
'Kinases (auto-detected)': 'kinases', | |
'Non-kinase enzymes (auto-detected)': 'non-kinase_enzymes', | |
'Membrane receptors (auto-detected)': 'membrane_receptors', | |
'Nuclear receptors (auto-detected)': 'nuclear_receptors', | |
'Ion channels (auto-detected)': 'ion_channels', | |
'Other protein targets (auto-detected)': 'other_protein_targets', | |
'Indiscriminate': 'indiscriminate' | |
} | |
TARGET_LIBRARY_MAP = { | |
'STITCH': 'stitch.csv', | |
'Drug Repurposing Hub': 'drug_repurposing_hub.csv', | |
} | |
DRUG_LIBRARY_MAP = { | |
'ChEMBL': 'chembl.csv', | |
'DrugBank': 'drug_bank.csv', | |
} | |
MODE_LIST = [ | |
'Drug screening', | |
'Drug repurposing', | |
'Drug-target pair' | |
] | |
def predictions_to_df(predictions): | |
predictions = [pd.DataFrame(prediction) for prediction in predictions] | |
prediction_df = pd.concat(predictions, ignore_index=True) | |
return prediction_df | |
def submit_predict(predict_data, task, preset, target_family): | |
task = TASK_MAP[task] | |
preset = PRESET_MAP[preset] | |
target_family = TARGET_FAMILY_MAP[target_family] | |
match target_family: | |
case 'labelled': | |
pass # target_family_list = ... | |
case 'detect': | |
pass # target_family_list = ... | |
case _: | |
target_family_list = [target_family] | |
prediction_df = pd.DataFrame() | |
for target_family in target_family_list: | |
with hydra.initialize(version_base="1.3", config_path="configs", job_name="webserver_inference"): | |
cfg = hydra.compose( | |
config_name="webserver_inference", | |
overrides=[ | |
f"task={task}", | |
f"preset={preset}", | |
f"ckpt_path=resources/checkpoints/{preset}-{task}-{target_family}.ckpt", | |
f"data.data_file='{str(predict_data)}'", | |
] | |
) | |
predictions, _ = predict(cfg) | |
prediction_df = pd.concat([prediction_df, predictions_to_df(predictions)]) | |
return [gr.DataFrame(value=prediction_df, visible=True), gr.Tabs(selected=1)] | |
# Define a function that takes a CSV output and a list of analytical utility functions as inputs | |
def submit_report(df, score_list, filter_list): | |
# Loop through the list of functions and apply them to the dataframe | |
for filter_name in filter_list: | |
gr.Info(f'Applying {filter_name}...') | |
for score_name in score_list: | |
gr.Info(f'Calculating {score_name}...') | |
# Apply the function to the dataframe and assign the result to a new column | |
df[score_name] = df.apply(SCORE_MAP[score_name], axis=1) | |
# Return the dataframe as a table | |
return [gr.DataFrame(visible=False), gr.DataFrame(value=df, visible=True)] | |
def change_layout(mode): | |
match mode: | |
case "Drug screening": | |
return [ | |
gr.Row(visible=True), | |
gr.Row(visible=False), | |
gr.Row(visible=False), | |
gr.Dropdown(choices=[ | |
'Auto-detect', | |
'Kinases', | |
'Non-kinase enzymes', | |
'Membrane receptors', | |
'Nuclear receptors', | |
'Ion channels', | |
'Other protein targets', | |
'Indiscriminate' | |
]) | |
] | |
case "Drug repurposing": | |
return [ | |
gr.Row(visible=False), | |
gr.Row(visible=True), | |
gr.Row(visible=False), | |
gr.Dropdown(choices=[ | |
'Library-labelled', | |
'Indiscriminate' | |
]) | |
] | |
case "Drug-target pair": | |
return [ | |
gr.Row(visible=False), | |
gr.Row(visible=False), | |
gr.Row(visible=True), | |
gr.Dropdown(choices=[ | |
'Auto-detect', | |
'Manually-labelled', | |
'Indiscriminate' | |
]) | |
] | |
with gr.Blocks(theme=gr.themes.Soft(spacing_size="sm", text_size='md'), title='DeepScreen') as demo: | |
with gr.Tabs() as tabs: | |
with gr.TabItem(label='Inference', id=0) as inference: | |
gr.Markdown(''' | |
# <center>DeepScreen Inference Service</center> | |
DeepScreen for predicting drug-target interaction/binding affinity. | |
''') | |
mode = gr.Radio(label='Mode', choices=MODE_LIST, value='Drug screening') | |
with gr.Row(visible=True) as drug_screening: | |
with gr.Column(): | |
target = gr.Textbox(label='Target FASTA sequence') | |
drug_library = gr.Dropdown(label='Drug library', choices=DRUG_LIBRARY_MAP.keys()) | |
# Modify the pd df directly with df['X2'] = target | |
with gr.Row(visible=False) as drug_repurposing: | |
with gr.Column(): | |
drug = gr.Textbox(label='Drug SMILES sequence') | |
target_library = gr.Dropdown(label='Target library', choices=TARGET_LIBRARY_MAP.keys()) | |
# Modify the pd df directly with df['X1'] = drug | |
with gr.Row(visible=False) as drug_target_pair: | |
predict_data = gr.File(label='Prediction dataset file', file_count="single", type='filepath', height=50) | |
with gr.Row(visible=True): | |
task = gr.Dropdown(list(TASK_MAP.keys()), label='Task') | |
preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Preset') | |
target_family = gr.Dropdown(choices=[ | |
'Auto-detect', | |
'Kinases', | |
'Non-kinase enzymes', | |
'Membrane receptors', | |
'Nuclear receptors', | |
'Ion channels', | |
'Other protein targets', | |
'Indiscriminate' | |
], label='Target family') | |
with gr.Row(visible=True): | |
predict_btn = gr.Button("Predict", variant="primary") | |
with gr.TabItem(label='Report', id=1) as report: | |
gr.Markdown(''' | |
# <center>DeepScreen Virtual Screening Report</center> | |
Analytic report for virtual screening predictions. | |
''') | |
with gr.Row(): | |
scores = gr.CheckboxGroup(SCORE_MAP.keys(), label='Scores') | |
filters = gr.CheckboxGroup(FILTER_MAP.keys(), label='Filters') | |
with gr.Row(): | |
df_original = gr.Dataframe(type="pandas", interactive=False, height=500, visible=False) | |
df_report = gr.Dataframe(type="pandas", interactive=False, height=500, visible=False) | |
with gr.Row(): | |
clear_btn = gr.ClearButton() | |
analyze_btn = gr.Button("Report", variant="primary") | |
mode.change(change_layout, mode, [drug_screening, drug_repurposing, drug_target_pair, target_family], show_progress=False) | |
predict_btn.click(fn=submit_predict, inputs=[predict_data, task, preset, target_family], outputs=[df_original, tabs]) | |
analyze_btn.click(fn=submit_report, inputs=[df_original, scores, filters], outputs=[df_original, df_report]) | |
# js = """function () { | |
# gradioURL = window.location.href | |
# if (!gradioURL.endsWith('?__theme=light')) { | |
# window.location.replace(gradioURL + '?__theme=light'); | |
# } | |
# }""" | |
js=""" | |
() => { | |
document.body.classList.remove('dark'); | |
document.querySelector('gradio-app').style.backgroundColor = 'var(--color-background-primary)' | |
} | |
""" | |
demo.load(None, None, None, js=js) | |
demo.close() | |
demo.launch(debug=True) |