libokj commited on
Commit
891f1d5
·
1 Parent(s): 7f274be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -31
app.py CHANGED
@@ -199,7 +199,7 @@ def mol_to_pharm3d(mol, mode='html'):
199
 
200
  feats = FEAT_FACTORY.GetFeaturesForMol(mol)
201
 
202
- view = View3DmolCell(width=400, height=250)
203
  for feat in feats:
204
  pos = feat.GetPos()
205
  color = _featColors.get(feat.GetFamily(), (.5, .5, .5))
@@ -861,12 +861,12 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
861
  orig_df['Target Family'] = None
862
  if orig_df['Target Family'].isna().any():
863
  orig_df.loc[orig_df['Target Family'].isna(), 'Target Family'] = (
864
- orig_df.loc[orig_df['Target Family'].isna(), 'X2'].swifter.apply(detect_family)
865
  )
866
  orig_df['Target Family'] = orig_df['Target Family'].str.capitalize()
867
  detect_family.cache_clear()
868
 
869
- orig_df['X1^'] = orig_df['X1'].swifter.apply(rdkit_canonicalize)
870
 
871
  orig_df = orig_df.merge(df_training[['X1^', 'X2', 'Y']], on=['X1^', 'X2'], how='left', indicator=False)
872
  annotated_df = orig_df[~orig_df['Y'].isna()].copy()
@@ -979,7 +979,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
979
  if "Calculate Max. Tanimoto Similarity between the Hit Compound and Known Ligands of the Input Target" in opts:
980
  x2 = prediction_df['X2'].iloc[0]
981
  pos_compounds_df = df_training.loc[(df_training['X2'] == x2) & (df_training['Y'] == 1)].copy()
982
- pos_compounds_df['FP'] = pos_compounds_df['X1'].swifter.apply(smiles_to_ecfp)
983
 
984
  @cache
985
  def max_sim(smiles):
@@ -988,13 +988,13 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
988
  prediction_df[[
989
  'Max. Tanimoto Similarity to Known Ligands',
990
  'Max. Sim. Ligand'
991
- ]] = prediction_df['X1'].swifter.apply(max_sim).apply(pd.Series)
992
 
993
  max_sim.cache_clear()
994
 
995
  if "Calculate Max. Sequence Identity between the Input Target and Known Targets of Hit Compound" in opts:
996
  x2 = prediction_df['X2'].iloc[0]
997
- prediction_df['X1^'] = prediction_df['X1'].swifter.apply(rdkit_canonicalize)
998
 
999
  @cache
1000
  def max_id(compound):
@@ -1003,7 +1003,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
1003
 
1004
  prediction_df[['Max. Sequence Identity to Known Targets of Hit Compound',
1005
  'Max. Id. Target']] = (
1006
- prediction_df['X1^'].swifter.apply(max_id).apply(pd.Series)
1007
  )
1008
  prediction_df.drop(['X1^'], axis=1, inplace=True)
1009
 
@@ -1012,7 +1012,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
1012
  # Advanced options for Target Protein Identification
1013
  if "Calculate Max. Tanimoto Similarity between the Input Compound and Compounds in the Training Set" in opts:
1014
  x1 = rdkit_canonicalize(prediction_df['X1'].iloc[0])
1015
- prediction_df['FP'] = prediction_df['X1'].swifter.apply(smiles_to_ecfp)
1016
 
1017
  prediction_df[[
1018
  'Max. Tanimoto Similarity to Training Compounds',
@@ -1030,7 +1030,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
1030
  prediction_df[[
1031
  'Max. Sequence Identity to Known Targets of Input Compound',
1032
  'Max. Id. Target'
1033
- ]] = prediction_df['X2'].swifter.apply(max_id).apply(pd.Series)
1034
 
1035
  max_id.cache_clear()
1036
 
@@ -1046,7 +1046,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
1046
  prediction_df[[
1047
  'Max. Tanimoto Similarity to Known Ligands of Identified Target',
1048
  'Max. Sim. Ligand'
1049
- ]] = prediction_df['X2'].swifter.apply(max_sim).apply(pd.Series)
1050
 
1051
  max_sim.cache_clear()
1052
 
@@ -1100,10 +1100,10 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
1100
 
1101
  if 'X1' in df.columns:
1102
  if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
1103
- df['Compound'] = df['X1'].swifter.apply(
1104
  lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
1105
- df['Scaffold'] = df['Compound'].swifter.apply(MurckoScaffold.GetScaffoldForMol)
1106
- df['Scaffold SMILES'] = df['Scaffold'].swifter.apply(lambda x: Chem.MolToSmiles(x))
1107
  df['Pharmacophore'] = None
1108
  if task == 'Compound-Protein Binding Affinity':
1109
  # Convert Y^ from pIC50 to IC50
@@ -1121,9 +1121,10 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
1121
 
1122
  def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(track_tqdm=True)):
1123
  df_html = df.copy(deep=True)
 
1124
  column_aliases = COLUMN_ALIASES.copy()
1125
  cols_left = list(pd.Index([
1126
- 'ID1', 'ID2', 'Compound', 'Scaffold', 'Pharmacophore', 'X1', 'Scaffold SMILES', 'X2', 'Y^'
1127
  ]).intersection(df_html.columns))
1128
  # cols_right = list(pd.Index(['X1', 'X2']).intersection(df_html.columns))
1129
  # df_html = df_html[cols_left + (df_html.columns.drop(cols_left + cols_right).tolist()) + cols_right]
@@ -1151,17 +1152,17 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
1151
  columns_unique = None
1152
 
1153
  if 'Exclude Pharmacophore 3D' not in opts:
1154
- df_html['Pharmacophore'] = df_html['Compound'].swifter.apply(
1155
  lambda x: mol_to_pharm3d(x) if not pd.isna(x) else x)
1156
 
1157
  if 'Compound' in df_html.columns and 'Exclude Molecular Graph' not in opts:
1158
- df_html['Compound'] = df_html['Compound'].swifter.apply(
1159
  lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
1160
  else:
1161
  df_html.drop(['Compound'], axis=1, inplace=True)
1162
 
1163
  if 'Scaffold' in df_html.columns and 'Exclude Scaffold Graph' not in opts:
1164
- df_html['Scaffold'] = df_html['Scaffold'].swifter.apply(
1165
  lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
1166
  else:
1167
  df_html.drop(['Scaffold'], axis=1, inplace=True)
@@ -1196,7 +1197,7 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
1196
  df_html.rename(columns=column_aliases, inplace=True)
1197
  df_html.index.name = 'Index'
1198
  if 'Target FASTA' in df_html.columns:
1199
- df_html['Target FASTA'] = df_html['Target FASTA'].swifter.apply(
1200
  lambda x: wrap_text(x) if not pd.isna(x) else x)
1201
 
1202
  num_cols = df_html.select_dtypes('number').columns
@@ -1207,8 +1208,6 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
1207
  if columns_unique is not None:
1208
  unique_df = df_html.loc[:, columns_unique].iloc[[0]].copy()
1209
  df_html = df_html.loc[:, ~columns_unique]
1210
- df_html.dropna(how='all', axis=1, inplace=True)
1211
- unique_df.dropna(how='all', axis=1, inplace=True)
1212
 
1213
  if not file:
1214
  if 'Compound ID' in df_html.columns:
@@ -1216,7 +1215,7 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
1216
  if 'Target ID' in df_html.columns:
1217
  df_html.drop(['Target FASTA'], axis=1, inplace=True)
1218
  if 'Target FASTA' in df_html.columns:
1219
- df_html['Target FASTA'] = df_html['Target FASTA'].swifter.apply(
1220
  lambda x: wrap_text(x) if not pd.isna(x) else x)
1221
  if 'Scaffold SMILES' in df_html.columns:
1222
  df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
@@ -1300,10 +1299,9 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
1300
  report_table = pn.widgets.Tabulator(
1301
  df_html, formatters=formatters,
1302
  frozen_columns=[
1303
- 'Index', 'Target ID', 'Compound ID', 'Compound Name', 'Compound'
1304
  ],
1305
- disabled=True, sizing_mode='stretch_both', pagination='local', page_size=30
1306
- )
1307
 
1308
  for i, col in enumerate(num_cols):
1309
  cmap = sns.light_palette(num_col_colors[i], as_cmap=True)
@@ -1340,7 +1338,6 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
1340
 
1341
  .tabulator-cell {
1342
  overflow: visible !important;
1343
- align-content: center !important;
1344
  }
1345
 
1346
  .tabulator-cell:hover {
@@ -1384,7 +1381,7 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
1384
  raw_css=[panel_css],
1385
  js_files={'panel_custom': 'static/panel.js', '3Dmol': 'static/3Dmol-min.js'},
1386
  # js_modules={'3Dmol': 'static/3Dmol-min.js'},
1387
- inline=True,
1388
  )
1389
 
1390
  template = pn.template.VanillaTemplate(
@@ -1493,11 +1490,11 @@ def submit_report(df, score_list, filter_list, task, progress=gr.Progress(track_
1493
  df_report = df.copy()
1494
  try:
1495
  for filter_name in filter_list:
1496
- df_report[filter_name] = df_report['Compound'].swifter.apply(
1497
  lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x)
1498
 
1499
  for score_name in score_list:
1500
- df_report[score_name] = df_report['Compound'].swifter.apply(
1501
  lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x)
1502
 
1503
  return (create_html_report(df_report, file=None, task=task), df_report,
@@ -2183,7 +2180,7 @@ higher similarities usually correspond to greater prediction confidence.<br>
2183
  alignment = aligner.align(processed_fasta, query)
2184
  return alignment.score / max(len(processed_fasta), len(query))
2185
 
2186
- alignment_df['score'] = alignment_df['X2'].swifter.apply(align_score)
2187
  row = alignment_df.loc[alignment_df['score'].idxmax()]
2188
  family = str(row['Target Family']).title()
2189
  return gr.Dropdown(value=family,
@@ -2515,13 +2512,13 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
2515
  infer_df = pd.read_csv(drug_target_pair_upload)
2516
  validate_columns(infer_df, ['X1', 'X2'])
2517
 
2518
- infer_df['X1_ERR'] = infer_df['X1'].swifter.apply(
2519
  validate_seq_str, regex=SMILES_PAT)
2520
  if not infer_df['X1_ERR'].isna().all():
2521
  raise ValueError(
2522
  f"Encountered invalid SMILES:\n{infer_df[~infer_df['X1_ERR'].isna()][['X1', 'X1_ERR']]}")
2523
 
2524
- infer_df['X2_ERR'] = infer_df['X2'].swifter.apply(
2525
  validate_seq_str, regex=FASTA_PAT)
2526
  if not infer_df['X2_ERR'].isna().all():
2527
  raise ValueError(
 
199
 
200
  feats = FEAT_FACTORY.GetFeaturesForMol(mol)
201
 
202
+ view = View3DmolCell(width=320, height=200)
203
  for feat in feats:
204
  pos = feat.GetPos()
205
  color = _featColors.get(feat.GetFamily(), (.5, .5, .5))
 
861
  orig_df['Target Family'] = None
862
  if orig_df['Target Family'].isna().any():
863
  orig_df.loc[orig_df['Target Family'].isna(), 'Target Family'] = (
864
+ orig_df.loc[orig_df['Target Family'].isna(), 'X2'].parallel_apply(detect_family)
865
  )
866
  orig_df['Target Family'] = orig_df['Target Family'].str.capitalize()
867
  detect_family.cache_clear()
868
 
869
+ orig_df['X1^'] = orig_df['X1'].parallel_apply(rdkit_canonicalize)
870
 
871
  orig_df = orig_df.merge(df_training[['X1^', 'X2', 'Y']], on=['X1^', 'X2'], how='left', indicator=False)
872
  annotated_df = orig_df[~orig_df['Y'].isna()].copy()
 
979
  if "Calculate Max. Tanimoto Similarity between the Hit Compound and Known Ligands of the Input Target" in opts:
980
  x2 = prediction_df['X2'].iloc[0]
981
  pos_compounds_df = df_training.loc[(df_training['X2'] == x2) & (df_training['Y'] == 1)].copy()
982
+ pos_compounds_df['FP'] = pos_compounds_df['X1'].parallel_apply(smiles_to_ecfp)
983
 
984
  @cache
985
  def max_sim(smiles):
 
988
  prediction_df[[
989
  'Max. Tanimoto Similarity to Known Ligands',
990
  'Max. Sim. Ligand'
991
+ ]] = prediction_df['X1'].parallel_apply(max_sim).apply(pd.Series)
992
 
993
  max_sim.cache_clear()
994
 
995
  if "Calculate Max. Sequence Identity between the Input Target and Known Targets of Hit Compound" in opts:
996
  x2 = prediction_df['X2'].iloc[0]
997
+ prediction_df['X1^'] = prediction_df['X1'].parallel_apply(rdkit_canonicalize)
998
 
999
  @cache
1000
  def max_id(compound):
 
1003
 
1004
  prediction_df[['Max. Sequence Identity to Known Targets of Hit Compound',
1005
  'Max. Id. Target']] = (
1006
+ prediction_df['X1^'].parallel_apply(max_id).apply(pd.Series)
1007
  )
1008
  prediction_df.drop(['X1^'], axis=1, inplace=True)
1009
 
 
1012
  # Advanced options for Target Protein Identification
1013
  if "Calculate Max. Tanimoto Similarity between the Input Compound and Compounds in the Training Set" in opts:
1014
  x1 = rdkit_canonicalize(prediction_df['X1'].iloc[0])
1015
+ prediction_df['FP'] = prediction_df['X1'].parallel_apply(smiles_to_ecfp)
1016
 
1017
  prediction_df[[
1018
  'Max. Tanimoto Similarity to Training Compounds',
 
1030
  prediction_df[[
1031
  'Max. Sequence Identity to Known Targets of Input Compound',
1032
  'Max. Id. Target'
1033
+ ]] = prediction_df['X2'].parallel_apply(max_id).apply(pd.Series)
1034
 
1035
  max_id.cache_clear()
1036
 
 
1046
  prediction_df[[
1047
  'Max. Tanimoto Similarity to Known Ligands of Identified Target',
1048
  'Max. Sim. Ligand'
1049
+ ]] = prediction_df['X2'].parallel_apply(max_sim).apply(pd.Series)
1050
 
1051
  max_sim.cache_clear()
1052
 
 
1100
 
1101
  if 'X1' in df.columns:
1102
  if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
1103
+ df['Compound'] = df['X1'].parallel_apply(
1104
  lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
1105
+ df['Scaffold'] = df['Compound'].parallel_apply(MurckoScaffold.GetScaffoldForMol)
1106
+ df['Scaffold SMILES'] = df['Scaffold'].parallel_apply(lambda x: Chem.MolToSmiles(x))
1107
  df['Pharmacophore'] = None
1108
  if task == 'Compound-Protein Binding Affinity':
1109
  # Convert Y^ from pIC50 to IC50
 
1121
 
1122
  def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(track_tqdm=True)):
1123
  df_html = df.copy(deep=True)
1124
+ df_html.dropna(how='all', axis=1, inplace=True)
1125
  column_aliases = COLUMN_ALIASES.copy()
1126
  cols_left = list(pd.Index([
1127
+ 'ID1', 'ID2', 'Compound', 'Scaffold', 'Pharmacophore', 'X1', 'Scaffold SMILES', 'X2', 'Y^'
1128
  ]).intersection(df_html.columns))
1129
  # cols_right = list(pd.Index(['X1', 'X2']).intersection(df_html.columns))
1130
  # df_html = df_html[cols_left + (df_html.columns.drop(cols_left + cols_right).tolist()) + cols_right]
 
1152
  columns_unique = None
1153
 
1154
  if 'Exclude Pharmacophore 3D' not in opts:
1155
+ df_html['Pharmacophore'] = df_html['Compound'].parallel_apply(
1156
  lambda x: mol_to_pharm3d(x) if not pd.isna(x) else x)
1157
 
1158
  if 'Compound' in df_html.columns and 'Exclude Molecular Graph' not in opts:
1159
+ df_html['Compound'] = df_html['Compound'].parallel_apply(
1160
  lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
1161
  else:
1162
  df_html.drop(['Compound'], axis=1, inplace=True)
1163
 
1164
  if 'Scaffold' in df_html.columns and 'Exclude Scaffold Graph' not in opts:
1165
+ df_html['Scaffold'] = df_html['Scaffold'].parallel_apply(
1166
  lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
1167
  else:
1168
  df_html.drop(['Scaffold'], axis=1, inplace=True)
 
1197
  df_html.rename(columns=column_aliases, inplace=True)
1198
  df_html.index.name = 'Index'
1199
  if 'Target FASTA' in df_html.columns:
1200
+ df_html['Target FASTA'] = df_html['Target FASTA'].parallel_apply(
1201
  lambda x: wrap_text(x) if not pd.isna(x) else x)
1202
 
1203
  num_cols = df_html.select_dtypes('number').columns
 
1208
  if columns_unique is not None:
1209
  unique_df = df_html.loc[:, columns_unique].iloc[[0]].copy()
1210
  df_html = df_html.loc[:, ~columns_unique]
 
 
1211
 
1212
  if not file:
1213
  if 'Compound ID' in df_html.columns:
 
1215
  if 'Target ID' in df_html.columns:
1216
  df_html.drop(['Target FASTA'], axis=1, inplace=True)
1217
  if 'Target FASTA' in df_html.columns:
1218
+ df_html['Target FASTA'] = df_html['Target FASTA'].parallel_apply(
1219
  lambda x: wrap_text(x) if not pd.isna(x) else x)
1220
  if 'Scaffold SMILES' in df_html.columns:
1221
  df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
 
1299
  report_table = pn.widgets.Tabulator(
1300
  df_html, formatters=formatters,
1301
  frozen_columns=[
1302
+ 'Index', 'Target ID', 'Compound ID', 'Compound'
1303
  ],
1304
+ disabled=True, sizing_mode='stretch_both', pagination='local', page_size=30)
 
1305
 
1306
  for i, col in enumerate(num_cols):
1307
  cmap = sns.light_palette(num_col_colors[i], as_cmap=True)
 
1338
 
1339
  .tabulator-cell {
1340
  overflow: visible !important;
 
1341
  }
1342
 
1343
  .tabulator-cell:hover {
 
1381
  raw_css=[panel_css],
1382
  js_files={'panel_custom': 'static/panel.js', '3Dmol': 'static/3Dmol-min.js'},
1383
  # js_modules={'3Dmol': 'static/3Dmol-min.js'},
1384
+ inline=True
1385
  )
1386
 
1387
  template = pn.template.VanillaTemplate(
 
1490
  df_report = df.copy()
1491
  try:
1492
  for filter_name in filter_list:
1493
+ df_report[filter_name] = df_report['Compound'].parallel_apply(
1494
  lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x)
1495
 
1496
  for score_name in score_list:
1497
+ df_report[score_name] = df_report['Compound'].parallel_apply(
1498
  lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x)
1499
 
1500
  return (create_html_report(df_report, file=None, task=task), df_report,
 
2180
  alignment = aligner.align(processed_fasta, query)
2181
  return alignment.score / max(len(processed_fasta), len(query))
2182
 
2183
+ alignment_df['score'] = alignment_df['X2'].parallel_apply(align_score)
2184
  row = alignment_df.loc[alignment_df['score'].idxmax()]
2185
  family = str(row['Target Family']).title()
2186
  return gr.Dropdown(value=family,
 
2512
  infer_df = pd.read_csv(drug_target_pair_upload)
2513
  validate_columns(infer_df, ['X1', 'X2'])
2514
 
2515
+ infer_df['X1_ERR'] = infer_df['X1'].parallel_apply(
2516
  validate_seq_str, regex=SMILES_PAT)
2517
  if not infer_df['X1_ERR'].isna().all():
2518
  raise ValueError(
2519
  f"Encountered invalid SMILES:\n{infer_df[~infer_df['X1_ERR'].isna()][['X1', 'X1_ERR']]}")
2520
 
2521
+ infer_df['X2_ERR'] = infer_df['X2'].parallel_apply(
2522
  validate_seq_str, regex=FASTA_PAT)
2523
  if not infer_df['X2_ERR'].isna().all():
2524
  raise ValueError(