DeepSEQreen_NAR_fb

Sleeping

App Files Files Community

libokj commited on Apr 18, 2024

Commit

4ca35f6

1 Parent(s): 396277e

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -20

app.py CHANGED Viewed

@@ -173,6 +173,10 @@ TASK_MAP = {
 TASK_METRIC_MAP = {
     'DTI': 'AUROC',
     'DTA': 'CI',
 }
 PRESET_MAP = {
@@ -744,7 +748,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, state):
     orig_df = orig_df.merge(df_training[['X1', 'X2', 'Y']], on=['X1', 'X2'], how='left', indicator=False)
     annotated_df = orig_df[~orig_df['Y'].isna()].copy()
     annotated_df.rename(columns={'Y': 'Y^'}, inplace=True)
-    annotated_df['Prediction Source'] = 'Training Data'
     # Resave the unannotated data
     unannotated_df = orig_df[orig_df['Y'].isna()].drop(['Y', 'Target Family'], axis=1)
     if not unannotated_df.empty:
@@ -775,7 +779,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, state):
             predictions, _ = predict(cfg)
             predictions = pd.concat([pd.DataFrame(prediction) for prediction in predictions], ignore_index=True)
-            predictions['Prediction Source'] = f'{preset} ({target_family})'
             prediction_df = pd.concat([prediction_df, predictions])
         else:
@@ -811,7 +815,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, state):
                 predictions, _ = predict(cfg)
                 predictions = pd.concat([pd.DataFrame(prediction) for prediction in predictions], ignore_index=True)
-                predictions['Prediction Source'] = f'{preset} ({family})'
                 prediction_df = pd.concat([prediction_df, predictions])
         prediction_df = prediction_df.merge(orig_df, on=['X1', 'X2'], how='left', indicator=False)
@@ -903,6 +907,10 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
             df['Scaffold'] = df['Compound'].parallel_apply(MurckoScaffold.GetScaffoldForMol)
             df['Scaffold SMILES'] = df['Scaffold'].parallel_apply(lambda x: Chem.MolToSmiles(x))
         # DF_FOR_REPORT = df.copy()
@@ -932,21 +940,19 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
     df_html = df.copy(deep=True)
     column_aliases = COLUMN_ALIASES.copy()
     cols_left = list(pd.Index(
-        ['ID1', 'Compound', 'Scaffold', 'Scaffold SMILES', 'ID2', 'Y', 'Y^']).intersection(df_html.columns))
     cols_right = list(pd.Index(['X1', 'X2']).intersection(df_html.columns))
     df_html = df_html[cols_left + (df_html.columns.drop(cols_left + cols_right).tolist()) + cols_right]
     if isinstance(task, str):
         column_aliases.update({
-            'Y': 'Actual Interaction Probability' if task == 'Compound-Protein Interaction'
-            else 'Actual Binding Affinity pIC50 [nM]',
-            'Y^': 'Predicted Interaction Probability' if task == 'Compound-Protein Interaction'
-            else 'Predicted Binding Affinity (pIC50 [nM])'
         })
-    ascending = True if column_aliases['Y^'] == 'Predicted Binding Affinity' else False
     df_html = df_html.sort_values(
-        [col for col in ['Y', 'Y^'] if col in df_html.columns], ascending=ascending
     )
     if not file:
@@ -1016,7 +1022,7 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
                 lambda x: wrap_text(x) if not pd.isna(x) else x)
         if 'Scaffold SMILES' in df_html.columns:
             df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
-        styled_df = df_html.style.format(precision=3)
         for i, col in enumerate(num_cols):
             if col in df_html.columns:
@@ -1457,7 +1463,7 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
                         "Interaction prediction provides you binding probability score between the target of "
                         "interest and each compound in the library, "
                         "while affinity prediction directly estimates their binding strength measured using "
-                        "pIC<sub>50</sub> in units of nM."
                     )
                     drug_screen_task = gr.Dropdown(
                         list(TASK_MAP.keys()),
@@ -1561,7 +1567,8 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
                         HelpTip(
                             "Interaction prediction provides you binding probability score between the target of "
                             "interest and each compound in the library, while affinity prediction directly "
-                            "estimates their binding strength measured using pIC<sub>50</sub> in units of nM."
                         )
                         target_identify_task = gr.Dropdown(
                             list(TASK_MAP.keys()),
@@ -1694,7 +1701,7 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
                         "Interaction prediction provides you binding probability score "
                         "between the target of interest and each compound in the library, "
                         "while affinity prediction directly estimates their binding strength "
-                        "measured using pIC<sub>50</sub> in units of nM."
                     )
                     pair_infer_task = gr.Dropdown(
                         list(TASK_MAP.keys()),
@@ -2059,7 +2066,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
             gr.Warning('Please enter a valid SMILES for model recommendation.')
             return None
         if family == 'Family-Specific Auto-Recommendation':
-            return None
         if family == 'General':
             seen_compounds = pd.read_csv(
@@ -2456,9 +2463,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
     def inquire_task(df):
-        if 'Y' in df.columns:
-            label = 'actual CPI/CPA labels (`Y`)'
-        elif 'Y^' in df.columns:
             label = 'predicted CPI/CPA labels (`Y^`)'
         else:
             return {analyze_btn: gr.Button(interactive=True),
@@ -2519,9 +2524,9 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
         y_colname = 'Y^'
         if isinstance(task, str):
             if task == 'Compound-Protein Interaction':
-                y_colname = 'Y^_prob'
             elif task == 'Compound-Protein Binding Affinity':
-                y_colname = 'Y^_pIC50'
         try:
             now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
             filename = f"{SERVER_DATA_DIR}/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.csv"

 TASK_METRIC_MAP = {
     'DTI': 'AUROC',
     'DTA': 'CI',
+    'Compound-Protein Interaction': 'AUROC',
+    'Compound-Protein Binding Affinity': 'CI',
+    'CPI': 'DTI',
+    'CPA': 'DTA',
 }
 PRESET_MAP = {
     orig_df = orig_df.merge(df_training[['X1', 'X2', 'Y']], on=['X1', 'X2'], how='left', indicator=False)
     annotated_df = orig_df[~orig_df['Y'].isna()].copy()
     annotated_df.rename(columns={'Y': 'Y^'}, inplace=True)
+    annotated_df['Source'] = 'Database'
     # Resave the unannotated data
     unannotated_df = orig_df[orig_df['Y'].isna()].drop(['Y', 'Target Family'], axis=1)
     if not unannotated_df.empty:
             predictions, _ = predict(cfg)
             predictions = pd.concat([pd.DataFrame(prediction) for prediction in predictions], ignore_index=True)
+            predictions['Source'] = f'Predicted ({preset} {target_family})'
             prediction_df = pd.concat([prediction_df, predictions])
         else:
                 predictions, _ = predict(cfg)
                 predictions = pd.concat([pd.DataFrame(prediction) for prediction in predictions], ignore_index=True)
+                predictions['Source'] = f'Predicted ({preset} {family})'
                 prediction_df = pd.concat([prediction_df, predictions])
         prediction_df = prediction_df.merge(orig_df, on=['X1', 'X2'], how='left', indicator=False)
             df['Scaffold'] = df['Compound'].parallel_apply(MurckoScaffold.GetScaffoldForMol)
             df['Scaffold SMILES'] = df['Scaffold'].parallel_apply(lambda x: Chem.MolToSmiles(x))
+        if task == 'Compound-Protein Binding Affinity':
+            # Convert Y^ from pIC50 to IC50
+            if 'Y^' in df.columns:
+                df['Y^'] = 10 ** (-df['Y^'])
         # DF_FOR_REPORT = df.copy()
     df_html = df.copy(deep=True)
     column_aliases = COLUMN_ALIASES.copy()
     cols_left = list(pd.Index(
+        ['ID1', 'Compound', 'Scaffold', 'Scaffold SMILES', 'ID2', 'Y^']).intersection(df_html.columns))
     cols_right = list(pd.Index(['X1', 'X2']).intersection(df_html.columns))
     df_html = df_html[cols_left + (df_html.columns.drop(cols_left + cols_right).tolist()) + cols_right]
     if isinstance(task, str):
         column_aliases.update({
+            'Y^': 'Interaction Probability' if task == 'Compound-Protein Interaction'
+            else 'Binding Affinity (IC50 [nM])'
         })
+    ascending = True if column_aliases['Y^'] == 'Binding Affinity (IC50 [nM])' else False
     df_html = df_html.sort_values(
+        [col for col in ['Y^'] if col in df_html.columns], ascending=ascending
     )
     if not file:
                 lambda x: wrap_text(x) if not pd.isna(x) else x)
         if 'Scaffold SMILES' in df_html.columns:
             df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
+        styled_df = df_html.fillna('').style.format(precision=3)
         for i, col in enumerate(num_cols):
             if col in df_html.columns:
                         "Interaction prediction provides you binding probability score between the target of "
                         "interest and each compound in the library, "
                         "while affinity prediction directly estimates their binding strength measured using "
+                        "half maximal inhibitory concentration (IC<sub>50</sub>) in units of nM."
                     )
                     drug_screen_task = gr.Dropdown(
                         list(TASK_MAP.keys()),
                         HelpTip(
                             "Interaction prediction provides you binding probability score between the target of "
                             "interest and each compound in the library, while affinity prediction directly "
+                            "estimates their binding strength measured using "
+                            "half maximal inhibitory concentration (IC<sub>50</sub>) in units of nM."
                         )
                         target_identify_task = gr.Dropdown(
                             list(TASK_MAP.keys()),
                         "Interaction prediction provides you binding probability score "
                         "between the target of interest and each compound in the library, "
                         "while affinity prediction directly estimates their binding strength "
+                        "measured using half maximal inhibitory concentration (IC<sub>50</sub>) in units of nM."
                     )
                     pair_infer_task = gr.Dropdown(
                         list(TASK_MAP.keys()),
             gr.Warning('Please enter a valid SMILES for model recommendation.')
             return None
         if family == 'Family-Specific Auto-Recommendation':
+            return 'Family-Specific Auto-Recommendation'
         if family == 'General':
             seen_compounds = pd.read_csv(
     def inquire_task(df):
+        if 'Y^' in df.columns:
             label = 'predicted CPI/CPA labels (`Y^`)'
         else:
             return {analyze_btn: gr.Button(interactive=True),
         y_colname = 'Y^'
         if isinstance(task, str):
             if task == 'Compound-Protein Interaction':
+                y_colname = 'Y_prob'
             elif task == 'Compound-Protein Binding Affinity':
+                y_colname = 'Y_IC50'
         try:
             now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
             filename = f"{SERVER_DATA_DIR}/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.csv"