Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -23,6 +23,7 @@ from email_validator import validate_email, EmailNotValidError
|
|
23 |
import gradio as gr
|
24 |
import hydra
|
25 |
import pandas as pd
|
|
|
26 |
import requests
|
27 |
from requests.adapters import HTTPAdapter, Retry
|
28 |
from markdown import markdown
|
@@ -41,7 +42,7 @@ import panel as pn
|
|
41 |
from apscheduler.schedulers.background import BackgroundScheduler
|
42 |
from tinydb import TinyDB, Query
|
43 |
|
44 |
-
import swifter
|
45 |
from tqdm.auto import tqdm
|
46 |
|
47 |
from deepscreen.data.dti import validate_seq_str, rdkit_canonicalize, FASTA_PAT, SMILES_PAT
|
@@ -719,15 +720,12 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
|
|
719 |
gr.Warning("At least one of columns `X1` and `X2` must be in the uploaded dataset.")
|
720 |
return {analyze_btn: gr.Button(interactive=False)}
|
721 |
if 'X1' in df.columns:
|
722 |
-
df['Scaffold SMILES'] = df['X1'].
|
723 |
-
|
724 |
-
df['Scaffold'] = df['Scaffold SMILES'].swifter.progress_bar(
|
725 |
-
desc='Generating scaffold graphs...').apply(
|
726 |
lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
|
727 |
# Add a new column with RDKit molecule objects
|
728 |
if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
|
729 |
-
df['Compound'] = df['X1'].
|
730 |
-
desc='Generating molecular graphs...').apply(
|
731 |
lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
|
732 |
|
733 |
# DF_FOR_REPORT = df.copy()
|
@@ -806,19 +804,16 @@ def create_html_report(df, file=None, task=None, progress=gr.Progress(track_tqdm
|
|
806 |
elif 'Y^' in df_html.columns:
|
807 |
job = 'Interaction Pair Inference'
|
808 |
if 'Compound' in df_html.columns:
|
809 |
-
df_html['Compound'] = df_html['Compound'].
|
810 |
-
desc='Generating compound graph...').apply(
|
811 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
812 |
if 'Scaffold' in df_html.columns:
|
813 |
-
df_html['Scaffold'] = df_html['Scaffold'].
|
814 |
-
desc='Generating scaffold graph...').apply(
|
815 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
816 |
|
817 |
df_html.rename(columns=column_aliases, inplace=True)
|
818 |
df_html.index.name = 'Index'
|
819 |
if 'Target FASTA' in df_html.columns:
|
820 |
-
df_html['Target FASTA'] = df_html['Target FASTA'].
|
821 |
-
desc='Processing FASTA...').apply(
|
822 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
823 |
|
824 |
num_cols = df_html.select_dtypes('number').columns
|
@@ -836,8 +831,7 @@ def create_html_report(df, file=None, task=None, progress=gr.Progress(track_tqdm
|
|
836 |
if 'Target ID' in df_html.columns:
|
837 |
df_html.drop(['Target FASTA'], axis=1, inplace=True)
|
838 |
if 'Target FASTA' in df_html.columns:
|
839 |
-
df_html['Target FASTA'] = df_html['Target FASTA'].
|
840 |
-
desc='Processing FASTA...').apply(
|
841 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
842 |
if 'Scaffold SMILES' in df_html.columns:
|
843 |
df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
|
@@ -1091,13 +1085,11 @@ def submit_report(df, score_list, filter_list, task, progress=gr.Progress(track_
|
|
1091 |
df_report = df.copy()
|
1092 |
try:
|
1093 |
for filter_name in filter_list:
|
1094 |
-
df_report[filter_name] = df_report['Compound'].
|
1095 |
-
desc=f"Calculating {filter_name}").apply(
|
1096 |
lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x)
|
1097 |
|
1098 |
for score_name in score_list:
|
1099 |
-
df_report[score_name] = df_report['Compound'].
|
1100 |
-
desc=f"Calculating {score_name}").apply(
|
1101 |
lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x)
|
1102 |
|
1103 |
# pie_chart = None
|
@@ -1713,8 +1705,7 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
1713 |
def align_score(query):
|
1714 |
return aligner.align(processed_fasta, query).score
|
1715 |
|
1716 |
-
alignment_df['score'] = alignment_df['X2'].
|
1717 |
-
desc="Detecting protein family of the target...").apply(align_score)
|
1718 |
row = alignment_df.loc[alignment_df['score'].idxmax()]
|
1719 |
return gr.Dropdown(value=row['protein_family'],
|
1720 |
info=f"Reason: Best BLASTP score ({row['score']}) "
|
@@ -2022,13 +2013,13 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
2022 |
infer_df = pd.read_csv(drug_target_pair_upload)
|
2023 |
validate_columns(infer_df, ['X1', 'X2'])
|
2024 |
|
2025 |
-
infer_df['X1_ERR'] = infer_df['X1'].
|
2026 |
validate_seq_str, regex=SMILES_PAT)
|
2027 |
if not infer_df['X1_ERR'].isna().all():
|
2028 |
raise ValueError(
|
2029 |
f"Encountered invalid SMILES:\n{infer_df[~infer_df['X1_ERR'].isna()][['X1', 'X1_ERR']]}")
|
2030 |
|
2031 |
-
infer_df['X2_ERR'] = infer_df['X2'].
|
2032 |
validate_seq_str, regex=FASTA_PAT)
|
2033 |
if not infer_df['X2_ERR'].isna().all():
|
2034 |
raise ValueError(
|
@@ -2278,4 +2269,5 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
2278 |
|
2279 |
if __name__ == "__main__":
|
2280 |
hydra.initialize(version_base="1.3", config_path="configs", job_name="webserver_inference")
|
|
|
2281 |
demo.queue(default_concurrency_limit=None, max_size=10).launch(show_api=False)
|
|
|
23 |
import gradio as gr
|
24 |
import hydra
|
25 |
import pandas as pd
|
26 |
+
from pandarallel import pandarallel
|
27 |
import requests
|
28 |
from requests.adapters import HTTPAdapter, Retry
|
29 |
from markdown import markdown
|
|
|
42 |
from apscheduler.schedulers.background import BackgroundScheduler
|
43 |
from tinydb import TinyDB, Query
|
44 |
|
45 |
+
# import swifter
|
46 |
from tqdm.auto import tqdm
|
47 |
|
48 |
from deepscreen.data.dti import validate_seq_str, rdkit_canonicalize, FASTA_PAT, SMILES_PAT
|
|
|
720 |
gr.Warning("At least one of columns `X1` and `X2` must be in the uploaded dataset.")
|
721 |
return {analyze_btn: gr.Button(interactive=False)}
|
722 |
if 'X1' in df.columns:
|
723 |
+
df['Scaffold SMILES'] = df['X1'].parallel_apply(MurckoScaffold.MurckoScaffoldSmilesFromSmiles)
|
724 |
+
df['Scaffold'] = df['Scaffold SMILES'].parallel_apply(
|
|
|
|
|
725 |
lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
|
726 |
# Add a new column with RDKit molecule objects
|
727 |
if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
|
728 |
+
df['Compound'] = df['X1'].parallel_apply(
|
|
|
729 |
lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
|
730 |
|
731 |
# DF_FOR_REPORT = df.copy()
|
|
|
804 |
elif 'Y^' in df_html.columns:
|
805 |
job = 'Interaction Pair Inference'
|
806 |
if 'Compound' in df_html.columns:
|
807 |
+
df_html['Compound'] = df_html['Compound'].parallel_apply(
|
|
|
808 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
809 |
if 'Scaffold' in df_html.columns:
|
810 |
+
df_html['Scaffold'] = df_html['Scaffold'].parallel_apply(
|
|
|
811 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
812 |
|
813 |
df_html.rename(columns=column_aliases, inplace=True)
|
814 |
df_html.index.name = 'Index'
|
815 |
if 'Target FASTA' in df_html.columns:
|
816 |
+
df_html['Target FASTA'] = df_html['Target FASTA'].parallel_apply(
|
|
|
817 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
818 |
|
819 |
num_cols = df_html.select_dtypes('number').columns
|
|
|
831 |
if 'Target ID' in df_html.columns:
|
832 |
df_html.drop(['Target FASTA'], axis=1, inplace=True)
|
833 |
if 'Target FASTA' in df_html.columns:
|
834 |
+
df_html['Target FASTA'] = df_html['Target FASTA'].parallel_apply(
|
|
|
835 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
836 |
if 'Scaffold SMILES' in df_html.columns:
|
837 |
df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
|
|
|
1085 |
df_report = df.copy()
|
1086 |
try:
|
1087 |
for filter_name in filter_list:
|
1088 |
+
df_report[filter_name] = df_report['Compound'].parallel_apply(
|
|
|
1089 |
lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x)
|
1090 |
|
1091 |
for score_name in score_list:
|
1092 |
+
df_report[score_name] = df_report['Compound'].parallel_apply(
|
|
|
1093 |
lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x)
|
1094 |
|
1095 |
# pie_chart = None
|
|
|
1705 |
def align_score(query):
|
1706 |
return aligner.align(processed_fasta, query).score
|
1707 |
|
1708 |
+
alignment_df['score'] = alignment_df['X2'].parallel_apply(align_score)
|
|
|
1709 |
row = alignment_df.loc[alignment_df['score'].idxmax()]
|
1710 |
return gr.Dropdown(value=row['protein_family'],
|
1711 |
info=f"Reason: Best BLASTP score ({row['score']}) "
|
|
|
2013 |
infer_df = pd.read_csv(drug_target_pair_upload)
|
2014 |
validate_columns(infer_df, ['X1', 'X2'])
|
2015 |
|
2016 |
+
infer_df['X1_ERR'] = infer_df['X1'].parallel_apply(
|
2017 |
validate_seq_str, regex=SMILES_PAT)
|
2018 |
if not infer_df['X1_ERR'].isna().all():
|
2019 |
raise ValueError(
|
2020 |
f"Encountered invalid SMILES:\n{infer_df[~infer_df['X1_ERR'].isna()][['X1', 'X1_ERR']]}")
|
2021 |
|
2022 |
+
infer_df['X2_ERR'] = infer_df['X2'].parallel_apply(
|
2023 |
validate_seq_str, regex=FASTA_PAT)
|
2024 |
if not infer_df['X2_ERR'].isna().all():
|
2025 |
raise ValueError(
|
|
|
2269 |
|
2270 |
if __name__ == "__main__":
|
2271 |
hydra.initialize(version_base="1.3", config_path="configs", job_name="webserver_inference")
|
2272 |
+
pandarallel.initialize(progress_bar=True)
|
2273 |
demo.queue(default_concurrency_limit=None, max_size=10).launch(show_api=False)
|