libokj commited on
Commit
4ca35f6
·
1 Parent(s): 396277e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -20
app.py CHANGED
@@ -173,6 +173,10 @@ TASK_MAP = {
173
  TASK_METRIC_MAP = {
174
  'DTI': 'AUROC',
175
  'DTA': 'CI',
 
 
 
 
176
  }
177
 
178
  PRESET_MAP = {
@@ -744,7 +748,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, state):
744
  orig_df = orig_df.merge(df_training[['X1', 'X2', 'Y']], on=['X1', 'X2'], how='left', indicator=False)
745
  annotated_df = orig_df[~orig_df['Y'].isna()].copy()
746
  annotated_df.rename(columns={'Y': 'Y^'}, inplace=True)
747
- annotated_df['Prediction Source'] = 'Training Data'
748
  # Resave the unannotated data
749
  unannotated_df = orig_df[orig_df['Y'].isna()].drop(['Y', 'Target Family'], axis=1)
750
  if not unannotated_df.empty:
@@ -775,7 +779,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, state):
775
 
776
  predictions, _ = predict(cfg)
777
  predictions = pd.concat([pd.DataFrame(prediction) for prediction in predictions], ignore_index=True)
778
- predictions['Prediction Source'] = f'{preset} ({target_family})'
779
  prediction_df = pd.concat([prediction_df, predictions])
780
 
781
  else:
@@ -811,7 +815,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, state):
811
 
812
  predictions, _ = predict(cfg)
813
  predictions = pd.concat([pd.DataFrame(prediction) for prediction in predictions], ignore_index=True)
814
- predictions['Prediction Source'] = f'{preset} ({family})'
815
  prediction_df = pd.concat([prediction_df, predictions])
816
 
817
  prediction_df = prediction_df.merge(orig_df, on=['X1', 'X2'], how='left', indicator=False)
@@ -903,6 +907,10 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
903
  df['Scaffold'] = df['Compound'].parallel_apply(MurckoScaffold.GetScaffoldForMol)
904
  df['Scaffold SMILES'] = df['Scaffold'].parallel_apply(lambda x: Chem.MolToSmiles(x))
905
 
 
 
 
 
906
 
907
  # DF_FOR_REPORT = df.copy()
908
 
@@ -932,21 +940,19 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
932
  df_html = df.copy(deep=True)
933
  column_aliases = COLUMN_ALIASES.copy()
934
  cols_left = list(pd.Index(
935
- ['ID1', 'Compound', 'Scaffold', 'Scaffold SMILES', 'ID2', 'Y', 'Y^']).intersection(df_html.columns))
936
  cols_right = list(pd.Index(['X1', 'X2']).intersection(df_html.columns))
937
  df_html = df_html[cols_left + (df_html.columns.drop(cols_left + cols_right).tolist()) + cols_right]
938
 
939
  if isinstance(task, str):
940
  column_aliases.update({
941
- 'Y': 'Actual Interaction Probability' if task == 'Compound-Protein Interaction'
942
- else 'Actual Binding Affinity pIC50 [nM]',
943
- 'Y^': 'Predicted Interaction Probability' if task == 'Compound-Protein Interaction'
944
- else 'Predicted Binding Affinity (pIC50 [nM])'
945
  })
946
 
947
- ascending = True if column_aliases['Y^'] == 'Predicted Binding Affinity' else False
948
  df_html = df_html.sort_values(
949
- [col for col in ['Y', 'Y^'] if col in df_html.columns], ascending=ascending
950
  )
951
 
952
  if not file:
@@ -1016,7 +1022,7 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
1016
  lambda x: wrap_text(x) if not pd.isna(x) else x)
1017
  if 'Scaffold SMILES' in df_html.columns:
1018
  df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
1019
- styled_df = df_html.style.format(precision=3)
1020
 
1021
  for i, col in enumerate(num_cols):
1022
  if col in df_html.columns:
@@ -1457,7 +1463,7 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
1457
  "Interaction prediction provides you binding probability score between the target of "
1458
  "interest and each compound in the library, "
1459
  "while affinity prediction directly estimates their binding strength measured using "
1460
- "pIC<sub>50</sub> in units of nM."
1461
  )
1462
  drug_screen_task = gr.Dropdown(
1463
  list(TASK_MAP.keys()),
@@ -1561,7 +1567,8 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
1561
  HelpTip(
1562
  "Interaction prediction provides you binding probability score between the target of "
1563
  "interest and each compound in the library, while affinity prediction directly "
1564
- "estimates their binding strength measured using pIC<sub>50</sub> in units of nM."
 
1565
  )
1566
  target_identify_task = gr.Dropdown(
1567
  list(TASK_MAP.keys()),
@@ -1694,7 +1701,7 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
1694
  "Interaction prediction provides you binding probability score "
1695
  "between the target of interest and each compound in the library, "
1696
  "while affinity prediction directly estimates their binding strength "
1697
- "measured using pIC<sub>50</sub> in units of nM."
1698
  )
1699
  pair_infer_task = gr.Dropdown(
1700
  list(TASK_MAP.keys()),
@@ -2059,7 +2066,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
2059
  gr.Warning('Please enter a valid SMILES for model recommendation.')
2060
  return None
2061
  if family == 'Family-Specific Auto-Recommendation':
2062
- return None
2063
 
2064
  if family == 'General':
2065
  seen_compounds = pd.read_csv(
@@ -2456,9 +2463,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
2456
 
2457
 
2458
  def inquire_task(df):
2459
- if 'Y' in df.columns:
2460
- label = 'actual CPI/CPA labels (`Y`)'
2461
- elif 'Y^' in df.columns:
2462
  label = 'predicted CPI/CPA labels (`Y^`)'
2463
  else:
2464
  return {analyze_btn: gr.Button(interactive=True),
@@ -2519,9 +2524,9 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
2519
  y_colname = 'Y^'
2520
  if isinstance(task, str):
2521
  if task == 'Compound-Protein Interaction':
2522
- y_colname = 'Y^_prob'
2523
  elif task == 'Compound-Protein Binding Affinity':
2524
- y_colname = 'Y^_pIC50'
2525
  try:
2526
  now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
2527
  filename = f"{SERVER_DATA_DIR}/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.csv"
 
173
  TASK_METRIC_MAP = {
174
  'DTI': 'AUROC',
175
  'DTA': 'CI',
176
+ 'Compound-Protein Interaction': 'AUROC',
177
+ 'Compound-Protein Binding Affinity': 'CI',
178
+ 'CPI': 'DTI',
179
+ 'CPA': 'DTA',
180
  }
181
 
182
  PRESET_MAP = {
 
748
  orig_df = orig_df.merge(df_training[['X1', 'X2', 'Y']], on=['X1', 'X2'], how='left', indicator=False)
749
  annotated_df = orig_df[~orig_df['Y'].isna()].copy()
750
  annotated_df.rename(columns={'Y': 'Y^'}, inplace=True)
751
+ annotated_df['Source'] = 'Database'
752
  # Resave the unannotated data
753
  unannotated_df = orig_df[orig_df['Y'].isna()].drop(['Y', 'Target Family'], axis=1)
754
  if not unannotated_df.empty:
 
779
 
780
  predictions, _ = predict(cfg)
781
  predictions = pd.concat([pd.DataFrame(prediction) for prediction in predictions], ignore_index=True)
782
+ predictions['Source'] = f'Predicted ({preset} {target_family})'
783
  prediction_df = pd.concat([prediction_df, predictions])
784
 
785
  else:
 
815
 
816
  predictions, _ = predict(cfg)
817
  predictions = pd.concat([pd.DataFrame(prediction) for prediction in predictions], ignore_index=True)
818
+ predictions['Source'] = f'Predicted ({preset} {family})'
819
  prediction_df = pd.concat([prediction_df, predictions])
820
 
821
  prediction_df = prediction_df.merge(orig_df, on=['X1', 'X2'], how='left', indicator=False)
 
907
  df['Scaffold'] = df['Compound'].parallel_apply(MurckoScaffold.GetScaffoldForMol)
908
  df['Scaffold SMILES'] = df['Scaffold'].parallel_apply(lambda x: Chem.MolToSmiles(x))
909
 
910
+ if task == 'Compound-Protein Binding Affinity':
911
+ # Convert Y^ from pIC50 to IC50
912
+ if 'Y^' in df.columns:
913
+ df['Y^'] = 10 ** (-df['Y^'])
914
 
915
  # DF_FOR_REPORT = df.copy()
916
 
 
940
  df_html = df.copy(deep=True)
941
  column_aliases = COLUMN_ALIASES.copy()
942
  cols_left = list(pd.Index(
943
+ ['ID1', 'Compound', 'Scaffold', 'Scaffold SMILES', 'ID2', 'Y^']).intersection(df_html.columns))
944
  cols_right = list(pd.Index(['X1', 'X2']).intersection(df_html.columns))
945
  df_html = df_html[cols_left + (df_html.columns.drop(cols_left + cols_right).tolist()) + cols_right]
946
 
947
  if isinstance(task, str):
948
  column_aliases.update({
949
+ 'Y^': 'Interaction Probability' if task == 'Compound-Protein Interaction'
950
+ else 'Binding Affinity (IC50 [nM])'
 
 
951
  })
952
 
953
+ ascending = True if column_aliases['Y^'] == 'Binding Affinity (IC50 [nM])' else False
954
  df_html = df_html.sort_values(
955
+ [col for col in ['Y^'] if col in df_html.columns], ascending=ascending
956
  )
957
 
958
  if not file:
 
1022
  lambda x: wrap_text(x) if not pd.isna(x) else x)
1023
  if 'Scaffold SMILES' in df_html.columns:
1024
  df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
1025
+ styled_df = df_html.fillna('').style.format(precision=3)
1026
 
1027
  for i, col in enumerate(num_cols):
1028
  if col in df_html.columns:
 
1463
  "Interaction prediction provides you binding probability score between the target of "
1464
  "interest and each compound in the library, "
1465
  "while affinity prediction directly estimates their binding strength measured using "
1466
+ "half maximal inhibitory concentration (IC<sub>50</sub>) in units of nM."
1467
  )
1468
  drug_screen_task = gr.Dropdown(
1469
  list(TASK_MAP.keys()),
 
1567
  HelpTip(
1568
  "Interaction prediction provides you binding probability score between the target of "
1569
  "interest and each compound in the library, while affinity prediction directly "
1570
+ "estimates their binding strength measured using "
1571
+ "half maximal inhibitory concentration (IC<sub>50</sub>) in units of nM."
1572
  )
1573
  target_identify_task = gr.Dropdown(
1574
  list(TASK_MAP.keys()),
 
1701
  "Interaction prediction provides you binding probability score "
1702
  "between the target of interest and each compound in the library, "
1703
  "while affinity prediction directly estimates their binding strength "
1704
+ "measured using half maximal inhibitory concentration (IC<sub>50</sub>) in units of nM."
1705
  )
1706
  pair_infer_task = gr.Dropdown(
1707
  list(TASK_MAP.keys()),
 
2066
  gr.Warning('Please enter a valid SMILES for model recommendation.')
2067
  return None
2068
  if family == 'Family-Specific Auto-Recommendation':
2069
+ return 'Family-Specific Auto-Recommendation'
2070
 
2071
  if family == 'General':
2072
  seen_compounds = pd.read_csv(
 
2463
 
2464
 
2465
  def inquire_task(df):
2466
+ if 'Y^' in df.columns:
 
 
2467
  label = 'predicted CPI/CPA labels (`Y^`)'
2468
  else:
2469
  return {analyze_btn: gr.Button(interactive=True),
 
2524
  y_colname = 'Y^'
2525
  if isinstance(task, str):
2526
  if task == 'Compound-Protein Interaction':
2527
+ y_colname = 'Y_prob'
2528
  elif task == 'Compound-Protein Binding Affinity':
2529
+ y_colname = 'Y_IC50'
2530
  try:
2531
  now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
2532
  filename = f"{SERVER_DATA_DIR}/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.csv"