libokj commited on
Commit
c8b558a
·
1 Parent(s): de6cad6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -23
app.py CHANGED
@@ -23,6 +23,7 @@ from email_validator import validate_email, EmailNotValidError
23
  import gradio as gr
24
  import hydra
25
  import pandas as pd
 
26
  import requests
27
  from requests.adapters import HTTPAdapter, Retry
28
  from markdown import markdown
@@ -41,7 +42,7 @@ import panel as pn
41
  from apscheduler.schedulers.background import BackgroundScheduler
42
  from tinydb import TinyDB, Query
43
 
44
- import swifter
45
  from tqdm.auto import tqdm
46
 
47
  from deepscreen.data.dti import validate_seq_str, rdkit_canonicalize, FASTA_PAT, SMILES_PAT
@@ -719,15 +720,12 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
719
  gr.Warning("At least one of columns `X1` and `X2` must be in the uploaded dataset.")
720
  return {analyze_btn: gr.Button(interactive=False)}
721
  if 'X1' in df.columns:
722
- df['Scaffold SMILES'] = df['X1'].swifter.progress_bar(
723
- desc=f"Calculating scaffold...").apply(MurckoScaffold.MurckoScaffoldSmilesFromSmiles)
724
- df['Scaffold'] = df['Scaffold SMILES'].swifter.progress_bar(
725
- desc='Generating scaffold graphs...').apply(
726
  lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
727
  # Add a new column with RDKit molecule objects
728
  if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
729
- df['Compound'] = df['X1'].swifter.progress_bar(
730
- desc='Generating molecular graphs...').apply(
731
  lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
732
 
733
  # DF_FOR_REPORT = df.copy()
@@ -806,19 +804,16 @@ def create_html_report(df, file=None, task=None, progress=gr.Progress(track_tqdm
806
  elif 'Y^' in df_html.columns:
807
  job = 'Interaction Pair Inference'
808
  if 'Compound' in df_html.columns:
809
- df_html['Compound'] = df_html['Compound'].swifter.progress_bar(
810
- desc='Generating compound graph...').apply(
811
  lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
812
  if 'Scaffold' in df_html.columns:
813
- df_html['Scaffold'] = df_html['Scaffold'].swifter.progress_bar(
814
- desc='Generating scaffold graph...').apply(
815
  lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
816
 
817
  df_html.rename(columns=column_aliases, inplace=True)
818
  df_html.index.name = 'Index'
819
  if 'Target FASTA' in df_html.columns:
820
- df_html['Target FASTA'] = df_html['Target FASTA'].swifter.progress_bar(
821
- desc='Processing FASTA...').apply(
822
  lambda x: wrap_text(x) if not pd.isna(x) else x)
823
 
824
  num_cols = df_html.select_dtypes('number').columns
@@ -836,8 +831,7 @@ def create_html_report(df, file=None, task=None, progress=gr.Progress(track_tqdm
836
  if 'Target ID' in df_html.columns:
837
  df_html.drop(['Target FASTA'], axis=1, inplace=True)
838
  if 'Target FASTA' in df_html.columns:
839
- df_html['Target FASTA'] = df_html['Target FASTA'].swifter.progress_bar(
840
- desc='Processing FASTA...').apply(
841
  lambda x: wrap_text(x) if not pd.isna(x) else x)
842
  if 'Scaffold SMILES' in df_html.columns:
843
  df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
@@ -1091,13 +1085,11 @@ def submit_report(df, score_list, filter_list, task, progress=gr.Progress(track_
1091
  df_report = df.copy()
1092
  try:
1093
  for filter_name in filter_list:
1094
- df_report[filter_name] = df_report['Compound'].swifter.progress_bar(
1095
- desc=f"Calculating {filter_name}").apply(
1096
  lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x)
1097
 
1098
  for score_name in score_list:
1099
- df_report[score_name] = df_report['Compound'].swifter.progress_bar(
1100
- desc=f"Calculating {score_name}").apply(
1101
  lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x)
1102
 
1103
  # pie_chart = None
@@ -1713,8 +1705,7 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
1713
  def align_score(query):
1714
  return aligner.align(processed_fasta, query).score
1715
 
1716
- alignment_df['score'] = alignment_df['X2'].swifter.progress_bar(
1717
- desc="Detecting protein family of the target...").apply(align_score)
1718
  row = alignment_df.loc[alignment_df['score'].idxmax()]
1719
  return gr.Dropdown(value=row['protein_family'],
1720
  info=f"Reason: Best BLASTP score ({row['score']}) "
@@ -2022,13 +2013,13 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
2022
  infer_df = pd.read_csv(drug_target_pair_upload)
2023
  validate_columns(infer_df, ['X1', 'X2'])
2024
 
2025
- infer_df['X1_ERR'] = infer_df['X1'].swifter.progress_bar(desc="Validating SMILES...").apply(
2026
  validate_seq_str, regex=SMILES_PAT)
2027
  if not infer_df['X1_ERR'].isna().all():
2028
  raise ValueError(
2029
  f"Encountered invalid SMILES:\n{infer_df[~infer_df['X1_ERR'].isna()][['X1', 'X1_ERR']]}")
2030
 
2031
- infer_df['X2_ERR'] = infer_df['X2'].swifter.progress_bar(desc="Validating FASTA...").apply(
2032
  validate_seq_str, regex=FASTA_PAT)
2033
  if not infer_df['X2_ERR'].isna().all():
2034
  raise ValueError(
@@ -2278,4 +2269,5 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
2278
 
2279
  if __name__ == "__main__":
2280
  hydra.initialize(version_base="1.3", config_path="configs", job_name="webserver_inference")
 
2281
  demo.queue(default_concurrency_limit=None, max_size=10).launch(show_api=False)
 
23
  import gradio as gr
24
  import hydra
25
  import pandas as pd
26
+ from pandarallel import pandarallel
27
  import requests
28
  from requests.adapters import HTTPAdapter, Retry
29
  from markdown import markdown
 
42
  from apscheduler.schedulers.background import BackgroundScheduler
43
  from tinydb import TinyDB, Query
44
 
45
+ # import swifter
46
  from tqdm.auto import tqdm
47
 
48
  from deepscreen.data.dti import validate_seq_str, rdkit_canonicalize, FASTA_PAT, SMILES_PAT
 
720
  gr.Warning("At least one of columns `X1` and `X2` must be in the uploaded dataset.")
721
  return {analyze_btn: gr.Button(interactive=False)}
722
  if 'X1' in df.columns:
723
+ df['Scaffold SMILES'] = df['X1'].parallel_apply(MurckoScaffold.MurckoScaffoldSmilesFromSmiles)
724
+ df['Scaffold'] = df['Scaffold SMILES'].parallel_apply(
 
 
725
  lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
726
  # Add a new column with RDKit molecule objects
727
  if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
728
+ df['Compound'] = df['X1'].parallel_apply(
 
729
  lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
730
 
731
  # DF_FOR_REPORT = df.copy()
 
804
  elif 'Y^' in df_html.columns:
805
  job = 'Interaction Pair Inference'
806
  if 'Compound' in df_html.columns:
807
+ df_html['Compound'] = df_html['Compound'].parallel_apply(
 
808
  lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
809
  if 'Scaffold' in df_html.columns:
810
+ df_html['Scaffold'] = df_html['Scaffold'].parallel_apply(
 
811
  lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
812
 
813
  df_html.rename(columns=column_aliases, inplace=True)
814
  df_html.index.name = 'Index'
815
  if 'Target FASTA' in df_html.columns:
816
+ df_html['Target FASTA'] = df_html['Target FASTA'].parallel_apply(
 
817
  lambda x: wrap_text(x) if not pd.isna(x) else x)
818
 
819
  num_cols = df_html.select_dtypes('number').columns
 
831
  if 'Target ID' in df_html.columns:
832
  df_html.drop(['Target FASTA'], axis=1, inplace=True)
833
  if 'Target FASTA' in df_html.columns:
834
+ df_html['Target FASTA'] = df_html['Target FASTA'].parallel_apply(
 
835
  lambda x: wrap_text(x) if not pd.isna(x) else x)
836
  if 'Scaffold SMILES' in df_html.columns:
837
  df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
 
1085
  df_report = df.copy()
1086
  try:
1087
  for filter_name in filter_list:
1088
+ df_report[filter_name] = df_report['Compound'].parallel_apply(
 
1089
  lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x)
1090
 
1091
  for score_name in score_list:
1092
+ df_report[score_name] = df_report['Compound'].parallel_apply(
 
1093
  lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x)
1094
 
1095
  # pie_chart = None
 
1705
  def align_score(query):
1706
  return aligner.align(processed_fasta, query).score
1707
 
1708
+ alignment_df['score'] = alignment_df['X2'].parallel_apply(align_score)
 
1709
  row = alignment_df.loc[alignment_df['score'].idxmax()]
1710
  return gr.Dropdown(value=row['protein_family'],
1711
  info=f"Reason: Best BLASTP score ({row['score']}) "
 
2013
  infer_df = pd.read_csv(drug_target_pair_upload)
2014
  validate_columns(infer_df, ['X1', 'X2'])
2015
 
2016
+ infer_df['X1_ERR'] = infer_df['X1'].parallel_apply(
2017
  validate_seq_str, regex=SMILES_PAT)
2018
  if not infer_df['X1_ERR'].isna().all():
2019
  raise ValueError(
2020
  f"Encountered invalid SMILES:\n{infer_df[~infer_df['X1_ERR'].isna()][['X1', 'X1_ERR']]}")
2021
 
2022
+ infer_df['X2_ERR'] = infer_df['X2'].parallel_apply(
2023
  validate_seq_str, regex=FASTA_PAT)
2024
  if not infer_df['X2_ERR'].isna().all():
2025
  raise ValueError(
 
2269
 
2270
  if __name__ == "__main__":
2271
  hydra.initialize(version_base="1.3", config_path="configs", job_name="webserver_inference")
2272
+ pandarallel.initialize(progress_bar=True)
2273
  demo.queue(default_concurrency_limit=None, max_size=10).launch(show_api=False)