Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -199,7 +199,7 @@ def mol_to_pharm3d(mol, mode='html'):
|
|
199 |
|
200 |
feats = FEAT_FACTORY.GetFeaturesForMol(mol)
|
201 |
|
202 |
-
view = View3DmolCell(width=
|
203 |
for feat in feats:
|
204 |
pos = feat.GetPos()
|
205 |
color = _featColors.get(feat.GetFamily(), (.5, .5, .5))
|
@@ -861,12 +861,12 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
|
|
861 |
orig_df['Target Family'] = None
|
862 |
if orig_df['Target Family'].isna().any():
|
863 |
orig_df.loc[orig_df['Target Family'].isna(), 'Target Family'] = (
|
864 |
-
orig_df.loc[orig_df['Target Family'].isna(), 'X2'].
|
865 |
)
|
866 |
orig_df['Target Family'] = orig_df['Target Family'].str.capitalize()
|
867 |
detect_family.cache_clear()
|
868 |
|
869 |
-
orig_df['X1^'] = orig_df['X1'].
|
870 |
|
871 |
orig_df = orig_df.merge(df_training[['X1^', 'X2', 'Y']], on=['X1^', 'X2'], how='left', indicator=False)
|
872 |
annotated_df = orig_df[~orig_df['Y'].isna()].copy()
|
@@ -979,7 +979,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
|
|
979 |
if "Calculate Max. Tanimoto Similarity between the Hit Compound and Known Ligands of the Input Target" in opts:
|
980 |
x2 = prediction_df['X2'].iloc[0]
|
981 |
pos_compounds_df = df_training.loc[(df_training['X2'] == x2) & (df_training['Y'] == 1)].copy()
|
982 |
-
pos_compounds_df['FP'] = pos_compounds_df['X1'].
|
983 |
|
984 |
@cache
|
985 |
def max_sim(smiles):
|
@@ -988,13 +988,13 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
|
|
988 |
prediction_df[[
|
989 |
'Max. Tanimoto Similarity to Known Ligands',
|
990 |
'Max. Sim. Ligand'
|
991 |
-
]] = prediction_df['X1'].
|
992 |
|
993 |
max_sim.cache_clear()
|
994 |
|
995 |
if "Calculate Max. Sequence Identity between the Input Target and Known Targets of Hit Compound" in opts:
|
996 |
x2 = prediction_df['X2'].iloc[0]
|
997 |
-
prediction_df['X1^'] = prediction_df['X1'].
|
998 |
|
999 |
@cache
|
1000 |
def max_id(compound):
|
@@ -1003,7 +1003,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
|
|
1003 |
|
1004 |
prediction_df[['Max. Sequence Identity to Known Targets of Hit Compound',
|
1005 |
'Max. Id. Target']] = (
|
1006 |
-
prediction_df['X1^'].
|
1007 |
)
|
1008 |
prediction_df.drop(['X1^'], axis=1, inplace=True)
|
1009 |
|
@@ -1012,7 +1012,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
|
|
1012 |
# Advanced options for Target Protein Identification
|
1013 |
if "Calculate Max. Tanimoto Similarity between the Input Compound and Compounds in the Training Set" in opts:
|
1014 |
x1 = rdkit_canonicalize(prediction_df['X1'].iloc[0])
|
1015 |
-
prediction_df['FP'] = prediction_df['X1'].
|
1016 |
|
1017 |
prediction_df[[
|
1018 |
'Max. Tanimoto Similarity to Training Compounds',
|
@@ -1030,7 +1030,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
|
|
1030 |
prediction_df[[
|
1031 |
'Max. Sequence Identity to Known Targets of Input Compound',
|
1032 |
'Max. Id. Target'
|
1033 |
-
]] = prediction_df['X2'].
|
1034 |
|
1035 |
max_id.cache_clear()
|
1036 |
|
@@ -1046,7 +1046,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
|
|
1046 |
prediction_df[[
|
1047 |
'Max. Tanimoto Similarity to Known Ligands of Identified Target',
|
1048 |
'Max. Sim. Ligand'
|
1049 |
-
]] = prediction_df['X2'].
|
1050 |
|
1051 |
max_sim.cache_clear()
|
1052 |
|
@@ -1100,10 +1100,10 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
|
|
1100 |
|
1101 |
if 'X1' in df.columns:
|
1102 |
if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
|
1103 |
-
df['Compound'] = df['X1'].
|
1104 |
lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
|
1105 |
-
df['Scaffold'] = df['Compound'].
|
1106 |
-
df['Scaffold SMILES'] = df['Scaffold'].
|
1107 |
df['Pharmacophore'] = None
|
1108 |
if task == 'Compound-Protein Binding Affinity':
|
1109 |
# Convert Y^ from pIC50 to IC50
|
@@ -1121,9 +1121,10 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
|
|
1121 |
|
1122 |
def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(track_tqdm=True)):
|
1123 |
df_html = df.copy(deep=True)
|
|
|
1124 |
column_aliases = COLUMN_ALIASES.copy()
|
1125 |
cols_left = list(pd.Index([
|
1126 |
-
|
1127 |
]).intersection(df_html.columns))
|
1128 |
# cols_right = list(pd.Index(['X1', 'X2']).intersection(df_html.columns))
|
1129 |
# df_html = df_html[cols_left + (df_html.columns.drop(cols_left + cols_right).tolist()) + cols_right]
|
@@ -1151,17 +1152,17 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
1151 |
columns_unique = None
|
1152 |
|
1153 |
if 'Exclude Pharmacophore 3D' not in opts:
|
1154 |
-
df_html['Pharmacophore'] = df_html['Compound'].
|
1155 |
lambda x: mol_to_pharm3d(x) if not pd.isna(x) else x)
|
1156 |
|
1157 |
if 'Compound' in df_html.columns and 'Exclude Molecular Graph' not in opts:
|
1158 |
-
df_html['Compound'] = df_html['Compound'].
|
1159 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
1160 |
else:
|
1161 |
df_html.drop(['Compound'], axis=1, inplace=True)
|
1162 |
|
1163 |
if 'Scaffold' in df_html.columns and 'Exclude Scaffold Graph' not in opts:
|
1164 |
-
df_html['Scaffold'] = df_html['Scaffold'].
|
1165 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
1166 |
else:
|
1167 |
df_html.drop(['Scaffold'], axis=1, inplace=True)
|
@@ -1196,7 +1197,7 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
1196 |
df_html.rename(columns=column_aliases, inplace=True)
|
1197 |
df_html.index.name = 'Index'
|
1198 |
if 'Target FASTA' in df_html.columns:
|
1199 |
-
df_html['Target FASTA'] = df_html['Target FASTA'].
|
1200 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
1201 |
|
1202 |
num_cols = df_html.select_dtypes('number').columns
|
@@ -1207,8 +1208,6 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
1207 |
if columns_unique is not None:
|
1208 |
unique_df = df_html.loc[:, columns_unique].iloc[[0]].copy()
|
1209 |
df_html = df_html.loc[:, ~columns_unique]
|
1210 |
-
df_html.dropna(how='all', axis=1, inplace=True)
|
1211 |
-
unique_df.dropna(how='all', axis=1, inplace=True)
|
1212 |
|
1213 |
if not file:
|
1214 |
if 'Compound ID' in df_html.columns:
|
@@ -1216,7 +1215,7 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
1216 |
if 'Target ID' in df_html.columns:
|
1217 |
df_html.drop(['Target FASTA'], axis=1, inplace=True)
|
1218 |
if 'Target FASTA' in df_html.columns:
|
1219 |
-
df_html['Target FASTA'] = df_html['Target FASTA'].
|
1220 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
1221 |
if 'Scaffold SMILES' in df_html.columns:
|
1222 |
df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
|
@@ -1300,10 +1299,9 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
1300 |
report_table = pn.widgets.Tabulator(
|
1301 |
df_html, formatters=formatters,
|
1302 |
frozen_columns=[
|
1303 |
-
'Index', 'Target ID', 'Compound ID', 'Compound
|
1304 |
],
|
1305 |
-
disabled=True, sizing_mode='stretch_both', pagination='local', page_size=30
|
1306 |
-
)
|
1307 |
|
1308 |
for i, col in enumerate(num_cols):
|
1309 |
cmap = sns.light_palette(num_col_colors[i], as_cmap=True)
|
@@ -1340,7 +1338,6 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
1340 |
|
1341 |
.tabulator-cell {
|
1342 |
overflow: visible !important;
|
1343 |
-
align-content: center !important;
|
1344 |
}
|
1345 |
|
1346 |
.tabulator-cell:hover {
|
@@ -1384,7 +1381,7 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
1384 |
raw_css=[panel_css],
|
1385 |
js_files={'panel_custom': 'static/panel.js', '3Dmol': 'static/3Dmol-min.js'},
|
1386 |
# js_modules={'3Dmol': 'static/3Dmol-min.js'},
|
1387 |
-
inline=True
|
1388 |
)
|
1389 |
|
1390 |
template = pn.template.VanillaTemplate(
|
@@ -1493,11 +1490,11 @@ def submit_report(df, score_list, filter_list, task, progress=gr.Progress(track_
|
|
1493 |
df_report = df.copy()
|
1494 |
try:
|
1495 |
for filter_name in filter_list:
|
1496 |
-
df_report[filter_name] = df_report['Compound'].
|
1497 |
lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x)
|
1498 |
|
1499 |
for score_name in score_list:
|
1500 |
-
df_report[score_name] = df_report['Compound'].
|
1501 |
lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x)
|
1502 |
|
1503 |
return (create_html_report(df_report, file=None, task=task), df_report,
|
@@ -2183,7 +2180,7 @@ higher similarities usually correspond to greater prediction confidence.<br>
|
|
2183 |
alignment = aligner.align(processed_fasta, query)
|
2184 |
return alignment.score / max(len(processed_fasta), len(query))
|
2185 |
|
2186 |
-
alignment_df['score'] = alignment_df['X2'].
|
2187 |
row = alignment_df.loc[alignment_df['score'].idxmax()]
|
2188 |
family = str(row['Target Family']).title()
|
2189 |
return gr.Dropdown(value=family,
|
@@ -2515,13 +2512,13 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
2515 |
infer_df = pd.read_csv(drug_target_pair_upload)
|
2516 |
validate_columns(infer_df, ['X1', 'X2'])
|
2517 |
|
2518 |
-
infer_df['X1_ERR'] = infer_df['X1'].
|
2519 |
validate_seq_str, regex=SMILES_PAT)
|
2520 |
if not infer_df['X1_ERR'].isna().all():
|
2521 |
raise ValueError(
|
2522 |
f"Encountered invalid SMILES:\n{infer_df[~infer_df['X1_ERR'].isna()][['X1', 'X1_ERR']]}")
|
2523 |
|
2524 |
-
infer_df['X2_ERR'] = infer_df['X2'].
|
2525 |
validate_seq_str, regex=FASTA_PAT)
|
2526 |
if not infer_df['X2_ERR'].isna().all():
|
2527 |
raise ValueError(
|
|
|
199 |
|
200 |
feats = FEAT_FACTORY.GetFeaturesForMol(mol)
|
201 |
|
202 |
+
view = View3DmolCell(width=320, height=200)
|
203 |
for feat in feats:
|
204 |
pos = feat.GetPos()
|
205 |
color = _featColors.get(feat.GetFamily(), (.5, .5, .5))
|
|
|
861 |
orig_df['Target Family'] = None
|
862 |
if orig_df['Target Family'].isna().any():
|
863 |
orig_df.loc[orig_df['Target Family'].isna(), 'Target Family'] = (
|
864 |
+
orig_df.loc[orig_df['Target Family'].isna(), 'X2'].parallel_apply(detect_family)
|
865 |
)
|
866 |
orig_df['Target Family'] = orig_df['Target Family'].str.capitalize()
|
867 |
detect_family.cache_clear()
|
868 |
|
869 |
+
orig_df['X1^'] = orig_df['X1'].parallel_apply(rdkit_canonicalize)
|
870 |
|
871 |
orig_df = orig_df.merge(df_training[['X1^', 'X2', 'Y']], on=['X1^', 'X2'], how='left', indicator=False)
|
872 |
annotated_df = orig_df[~orig_df['Y'].isna()].copy()
|
|
|
979 |
if "Calculate Max. Tanimoto Similarity between the Hit Compound and Known Ligands of the Input Target" in opts:
|
980 |
x2 = prediction_df['X2'].iloc[0]
|
981 |
pos_compounds_df = df_training.loc[(df_training['X2'] == x2) & (df_training['Y'] == 1)].copy()
|
982 |
+
pos_compounds_df['FP'] = pos_compounds_df['X1'].parallel_apply(smiles_to_ecfp)
|
983 |
|
984 |
@cache
|
985 |
def max_sim(smiles):
|
|
|
988 |
prediction_df[[
|
989 |
'Max. Tanimoto Similarity to Known Ligands',
|
990 |
'Max. Sim. Ligand'
|
991 |
+
]] = prediction_df['X1'].parallel_apply(max_sim).apply(pd.Series)
|
992 |
|
993 |
max_sim.cache_clear()
|
994 |
|
995 |
if "Calculate Max. Sequence Identity between the Input Target and Known Targets of Hit Compound" in opts:
|
996 |
x2 = prediction_df['X2'].iloc[0]
|
997 |
+
prediction_df['X1^'] = prediction_df['X1'].parallel_apply(rdkit_canonicalize)
|
998 |
|
999 |
@cache
|
1000 |
def max_id(compound):
|
|
|
1003 |
|
1004 |
prediction_df[['Max. Sequence Identity to Known Targets of Hit Compound',
|
1005 |
'Max. Id. Target']] = (
|
1006 |
+
prediction_df['X1^'].parallel_apply(max_id).apply(pd.Series)
|
1007 |
)
|
1008 |
prediction_df.drop(['X1^'], axis=1, inplace=True)
|
1009 |
|
|
|
1012 |
# Advanced options for Target Protein Identification
|
1013 |
if "Calculate Max. Tanimoto Similarity between the Input Compound and Compounds in the Training Set" in opts:
|
1014 |
x1 = rdkit_canonicalize(prediction_df['X1'].iloc[0])
|
1015 |
+
prediction_df['FP'] = prediction_df['X1'].parallel_apply(smiles_to_ecfp)
|
1016 |
|
1017 |
prediction_df[[
|
1018 |
'Max. Tanimoto Similarity to Training Compounds',
|
|
|
1030 |
prediction_df[[
|
1031 |
'Max. Sequence Identity to Known Targets of Input Compound',
|
1032 |
'Max. Id. Target'
|
1033 |
+
]] = prediction_df['X2'].parallel_apply(max_id).apply(pd.Series)
|
1034 |
|
1035 |
max_id.cache_clear()
|
1036 |
|
|
|
1046 |
prediction_df[[
|
1047 |
'Max. Tanimoto Similarity to Known Ligands of Identified Target',
|
1048 |
'Max. Sim. Ligand'
|
1049 |
+
]] = prediction_df['X2'].parallel_apply(max_sim).apply(pd.Series)
|
1050 |
|
1051 |
max_sim.cache_clear()
|
1052 |
|
|
|
1100 |
|
1101 |
if 'X1' in df.columns:
|
1102 |
if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
|
1103 |
+
df['Compound'] = df['X1'].parallel_apply(
|
1104 |
lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
|
1105 |
+
df['Scaffold'] = df['Compound'].parallel_apply(MurckoScaffold.GetScaffoldForMol)
|
1106 |
+
df['Scaffold SMILES'] = df['Scaffold'].parallel_apply(lambda x: Chem.MolToSmiles(x))
|
1107 |
df['Pharmacophore'] = None
|
1108 |
if task == 'Compound-Protein Binding Affinity':
|
1109 |
# Convert Y^ from pIC50 to IC50
|
|
|
1121 |
|
1122 |
def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(track_tqdm=True)):
|
1123 |
df_html = df.copy(deep=True)
|
1124 |
+
df_html.dropna(how='all', axis=1, inplace=True)
|
1125 |
column_aliases = COLUMN_ALIASES.copy()
|
1126 |
cols_left = list(pd.Index([
|
1127 |
+
'ID1', 'ID2', 'Compound', 'Scaffold', 'Pharmacophore', 'X1', 'Scaffold SMILES', 'X2', 'Y^'
|
1128 |
]).intersection(df_html.columns))
|
1129 |
# cols_right = list(pd.Index(['X1', 'X2']).intersection(df_html.columns))
|
1130 |
# df_html = df_html[cols_left + (df_html.columns.drop(cols_left + cols_right).tolist()) + cols_right]
|
|
|
1152 |
columns_unique = None
|
1153 |
|
1154 |
if 'Exclude Pharmacophore 3D' not in opts:
|
1155 |
+
df_html['Pharmacophore'] = df_html['Compound'].parallel_apply(
|
1156 |
lambda x: mol_to_pharm3d(x) if not pd.isna(x) else x)
|
1157 |
|
1158 |
if 'Compound' in df_html.columns and 'Exclude Molecular Graph' not in opts:
|
1159 |
+
df_html['Compound'] = df_html['Compound'].parallel_apply(
|
1160 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
1161 |
else:
|
1162 |
df_html.drop(['Compound'], axis=1, inplace=True)
|
1163 |
|
1164 |
if 'Scaffold' in df_html.columns and 'Exclude Scaffold Graph' not in opts:
|
1165 |
+
df_html['Scaffold'] = df_html['Scaffold'].parallel_apply(
|
1166 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
1167 |
else:
|
1168 |
df_html.drop(['Scaffold'], axis=1, inplace=True)
|
|
|
1197 |
df_html.rename(columns=column_aliases, inplace=True)
|
1198 |
df_html.index.name = 'Index'
|
1199 |
if 'Target FASTA' in df_html.columns:
|
1200 |
+
df_html['Target FASTA'] = df_html['Target FASTA'].parallel_apply(
|
1201 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
1202 |
|
1203 |
num_cols = df_html.select_dtypes('number').columns
|
|
|
1208 |
if columns_unique is not None:
|
1209 |
unique_df = df_html.loc[:, columns_unique].iloc[[0]].copy()
|
1210 |
df_html = df_html.loc[:, ~columns_unique]
|
|
|
|
|
1211 |
|
1212 |
if not file:
|
1213 |
if 'Compound ID' in df_html.columns:
|
|
|
1215 |
if 'Target ID' in df_html.columns:
|
1216 |
df_html.drop(['Target FASTA'], axis=1, inplace=True)
|
1217 |
if 'Target FASTA' in df_html.columns:
|
1218 |
+
df_html['Target FASTA'] = df_html['Target FASTA'].parallel_apply(
|
1219 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
1220 |
if 'Scaffold SMILES' in df_html.columns:
|
1221 |
df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
|
|
|
1299 |
report_table = pn.widgets.Tabulator(
|
1300 |
df_html, formatters=formatters,
|
1301 |
frozen_columns=[
|
1302 |
+
'Index', 'Target ID', 'Compound ID', 'Compound'
|
1303 |
],
|
1304 |
+
disabled=True, sizing_mode='stretch_both', pagination='local', page_size=30)
|
|
|
1305 |
|
1306 |
for i, col in enumerate(num_cols):
|
1307 |
cmap = sns.light_palette(num_col_colors[i], as_cmap=True)
|
|
|
1338 |
|
1339 |
.tabulator-cell {
|
1340 |
overflow: visible !important;
|
|
|
1341 |
}
|
1342 |
|
1343 |
.tabulator-cell:hover {
|
|
|
1381 |
raw_css=[panel_css],
|
1382 |
js_files={'panel_custom': 'static/panel.js', '3Dmol': 'static/3Dmol-min.js'},
|
1383 |
# js_modules={'3Dmol': 'static/3Dmol-min.js'},
|
1384 |
+
inline=True
|
1385 |
)
|
1386 |
|
1387 |
template = pn.template.VanillaTemplate(
|
|
|
1490 |
df_report = df.copy()
|
1491 |
try:
|
1492 |
for filter_name in filter_list:
|
1493 |
+
df_report[filter_name] = df_report['Compound'].parallel_apply(
|
1494 |
lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x)
|
1495 |
|
1496 |
for score_name in score_list:
|
1497 |
+
df_report[score_name] = df_report['Compound'].parallel_apply(
|
1498 |
lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x)
|
1499 |
|
1500 |
return (create_html_report(df_report, file=None, task=task), df_report,
|
|
|
2180 |
alignment = aligner.align(processed_fasta, query)
|
2181 |
return alignment.score / max(len(processed_fasta), len(query))
|
2182 |
|
2183 |
+
alignment_df['score'] = alignment_df['X2'].parallel_apply(align_score)
|
2184 |
row = alignment_df.loc[alignment_df['score'].idxmax()]
|
2185 |
family = str(row['Target Family']).title()
|
2186 |
return gr.Dropdown(value=family,
|
|
|
2512 |
infer_df = pd.read_csv(drug_target_pair_upload)
|
2513 |
validate_columns(infer_df, ['X1', 'X2'])
|
2514 |
|
2515 |
+
infer_df['X1_ERR'] = infer_df['X1'].parallel_apply(
|
2516 |
validate_seq_str, regex=SMILES_PAT)
|
2517 |
if not infer_df['X1_ERR'].isna().all():
|
2518 |
raise ValueError(
|
2519 |
f"Encountered invalid SMILES:\n{infer_df[~infer_df['X1_ERR'].isna()][['X1', 'X1_ERR']]}")
|
2520 |
|
2521 |
+
infer_df['X2_ERR'] = infer_df['X2'].parallel_apply(
|
2522 |
validate_seq_str, regex=FASTA_PAT)
|
2523 |
if not infer_df['X2_ERR'].isna().all():
|
2524 |
raise ValueError(
|