Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -45,7 +45,7 @@ import panel as pn
|
|
45 |
from apscheduler.schedulers.background import BackgroundScheduler
|
46 |
from tinydb import TinyDB, Query
|
47 |
|
48 |
-
|
49 |
from tqdm.auto import tqdm
|
50 |
|
51 |
from deepscreen.data.dti import validate_seq_str, rdkit_canonicalize, FASTA_PAT, SMILES_PAT
|
@@ -160,7 +160,7 @@ visibility: hidden
|
|
160 |
|
161 |
|
162 |
class View3DmolCell(py3Dmol.view):
|
163 |
-
def __init__(self, width=
|
164 |
divid = "3dmolviewer_UNIQUEID"
|
165 |
self.uniqueid = None
|
166 |
if isinstance(width, int):
|
@@ -861,12 +861,12 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
|
|
861 |
orig_df['Target Family'] = None
|
862 |
if orig_df['Target Family'].isna().any():
|
863 |
orig_df.loc[orig_df['Target Family'].isna(), 'Target Family'] = (
|
864 |
-
orig_df.loc[orig_df['Target Family'].isna(), 'X2'].
|
865 |
)
|
866 |
orig_df['Target Family'] = orig_df['Target Family'].str.capitalize()
|
867 |
detect_family.cache_clear()
|
868 |
|
869 |
-
orig_df['X1^'] = orig_df['X1'].
|
870 |
|
871 |
orig_df = orig_df.merge(df_training[['X1^', 'X2', 'Y']], on=['X1^', 'X2'], how='left', indicator=False)
|
872 |
annotated_df = orig_df[~orig_df['Y'].isna()].copy()
|
@@ -979,7 +979,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
|
|
979 |
if "Calculate Max. Tanimoto Similarity between the Hit Compound and Known Ligands of the Input Target" in opts:
|
980 |
x2 = prediction_df['X2'].iloc[0]
|
981 |
pos_compounds_df = df_training.loc[(df_training['X2'] == x2) & (df_training['Y'] == 1)].copy()
|
982 |
-
pos_compounds_df['FP'] = pos_compounds_df['X1'].
|
983 |
|
984 |
@cache
|
985 |
def max_sim(smiles):
|
@@ -988,13 +988,13 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
|
|
988 |
prediction_df[[
|
989 |
'Max. Tanimoto Similarity to Known Ligands',
|
990 |
'Max. Sim. Ligand'
|
991 |
-
]] = prediction_df['X1'].
|
992 |
|
993 |
max_sim.cache_clear()
|
994 |
|
995 |
if "Calculate Max. Sequence Identity between the Input Target and Known Targets of Hit Compound" in opts:
|
996 |
x2 = prediction_df['X2'].iloc[0]
|
997 |
-
prediction_df['X1^'] = prediction_df['X1'].
|
998 |
|
999 |
@cache
|
1000 |
def max_id(compound):
|
@@ -1003,7 +1003,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
|
|
1003 |
|
1004 |
prediction_df[['Max. Sequence Identity to Known Targets of Hit Compound',
|
1005 |
'Max. Id. Target']] = (
|
1006 |
-
prediction_df['X1^'].
|
1007 |
)
|
1008 |
prediction_df.drop(['X1^'], axis=1, inplace=True)
|
1009 |
|
@@ -1012,7 +1012,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
|
|
1012 |
# Advanced options for Target Protein Identification
|
1013 |
if "Calculate Max. Tanimoto Similarity between the Input Compound and Compounds in the Training Set" in opts:
|
1014 |
x1 = rdkit_canonicalize(prediction_df['X1'].iloc[0])
|
1015 |
-
prediction_df['FP'] = prediction_df['X1'].
|
1016 |
|
1017 |
prediction_df[[
|
1018 |
'Max. Tanimoto Similarity to Training Compounds',
|
@@ -1030,7 +1030,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
|
|
1030 |
prediction_df[[
|
1031 |
'Max. Sequence Identity to Known Targets of Input Compound',
|
1032 |
'Max. Id. Target'
|
1033 |
-
]] = prediction_df['X2'].
|
1034 |
|
1035 |
max_id.cache_clear()
|
1036 |
|
@@ -1046,7 +1046,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
|
|
1046 |
prediction_df[[
|
1047 |
'Max. Tanimoto Similarity to Known Ligands of Identified Target',
|
1048 |
'Max. Sim. Ligand'
|
1049 |
-
]] = prediction_df['X2'].
|
1050 |
|
1051 |
max_sim.cache_clear()
|
1052 |
|
@@ -1100,10 +1100,10 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
|
|
1100 |
|
1101 |
if 'X1' in df.columns:
|
1102 |
if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
|
1103 |
-
df['Compound'] = df['X1'].
|
1104 |
lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
|
1105 |
-
df['Scaffold'] = df['Compound'].
|
1106 |
-
df['Scaffold SMILES'] = df['Scaffold'].
|
1107 |
df['Pharmacophore'] = None
|
1108 |
if task == 'Compound-Protein Binding Affinity':
|
1109 |
# Convert Y^ from pIC50 to IC50
|
@@ -1121,10 +1121,9 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
|
|
1121 |
|
1122 |
def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(track_tqdm=True)):
|
1123 |
df_html = df.copy(deep=True)
|
1124 |
-
df_html.dropna(how='all', axis=1, inplace=True)
|
1125 |
column_aliases = COLUMN_ALIASES.copy()
|
1126 |
cols_left = list(pd.Index([
|
1127 |
-
|
1128 |
]).intersection(df_html.columns))
|
1129 |
# cols_right = list(pd.Index(['X1', 'X2']).intersection(df_html.columns))
|
1130 |
# df_html = df_html[cols_left + (df_html.columns.drop(cols_left + cols_right).tolist()) + cols_right]
|
@@ -1152,17 +1151,17 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
1152 |
columns_unique = None
|
1153 |
|
1154 |
if 'Exclude Pharmacophore 3D' not in opts:
|
1155 |
-
df_html['Pharmacophore'] = df_html['Compound'].
|
1156 |
lambda x: mol_to_pharm3d(x) if not pd.isna(x) else x)
|
1157 |
|
1158 |
if 'Compound' in df_html.columns and 'Exclude Molecular Graph' not in opts:
|
1159 |
-
df_html['Compound'] = df_html['Compound'].
|
1160 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
1161 |
else:
|
1162 |
df_html.drop(['Compound'], axis=1, inplace=True)
|
1163 |
|
1164 |
if 'Scaffold' in df_html.columns and 'Exclude Scaffold Graph' not in opts:
|
1165 |
-
df_html['Scaffold'] = df_html['Scaffold'].
|
1166 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
1167 |
else:
|
1168 |
df_html.drop(['Scaffold'], axis=1, inplace=True)
|
@@ -1197,7 +1196,7 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
1197 |
df_html.rename(columns=column_aliases, inplace=True)
|
1198 |
df_html.index.name = 'Index'
|
1199 |
if 'Target FASTA' in df_html.columns:
|
1200 |
-
df_html['Target FASTA'] = df_html['Target FASTA'].
|
1201 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
1202 |
|
1203 |
num_cols = df_html.select_dtypes('number').columns
|
@@ -1208,6 +1207,8 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
1208 |
if columns_unique is not None:
|
1209 |
unique_df = df_html.loc[:, columns_unique].iloc[[0]].copy()
|
1210 |
df_html = df_html.loc[:, ~columns_unique]
|
|
|
|
|
1211 |
|
1212 |
if not file:
|
1213 |
if 'Compound ID' in df_html.columns:
|
@@ -1215,11 +1216,17 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
1215 |
if 'Target ID' in df_html.columns:
|
1216 |
df_html.drop(['Target FASTA'], axis=1, inplace=True)
|
1217 |
if 'Target FASTA' in df_html.columns:
|
1218 |
-
df_html['Target FASTA'] = df_html['Target FASTA'].
|
1219 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
1220 |
if 'Scaffold SMILES' in df_html.columns:
|
1221 |
df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
|
1222 |
|
|
|
|
|
|
|
|
|
|
|
|
|
1223 |
styled_df = df_html.fillna('').style.format(precision=3)
|
1224 |
|
1225 |
for i, col in enumerate(num_cols):
|
@@ -1293,9 +1300,10 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
1293 |
report_table = pn.widgets.Tabulator(
|
1294 |
df_html, formatters=formatters,
|
1295 |
frozen_columns=[
|
1296 |
-
'Index', 'Target ID', 'Compound ID', 'Compound'
|
1297 |
],
|
1298 |
-
disabled=True, sizing_mode='stretch_both', pagination='local', page_size=30
|
|
|
1299 |
|
1300 |
for i, col in enumerate(num_cols):
|
1301 |
cmap = sns.light_palette(num_col_colors[i], as_cmap=True)
|
@@ -1332,6 +1340,7 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
1332 |
|
1333 |
.tabulator-cell {
|
1334 |
overflow: visible !important;
|
|
|
1335 |
}
|
1336 |
|
1337 |
.tabulator-cell:hover {
|
@@ -1375,7 +1384,7 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
1375 |
raw_css=[panel_css],
|
1376 |
js_files={'panel_custom': 'static/panel.js', '3Dmol': 'static/3Dmol-min.js'},
|
1377 |
# js_modules={'3Dmol': 'static/3Dmol-min.js'},
|
1378 |
-
inline=True
|
1379 |
)
|
1380 |
|
1381 |
template = pn.template.VanillaTemplate(
|
@@ -1484,11 +1493,11 @@ def submit_report(df, score_list, filter_list, task, progress=gr.Progress(track_
|
|
1484 |
df_report = df.copy()
|
1485 |
try:
|
1486 |
for filter_name in filter_list:
|
1487 |
-
df_report[filter_name] = df_report['Compound'].
|
1488 |
lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x)
|
1489 |
|
1490 |
for score_name in score_list:
|
1491 |
-
df_report[score_name] = df_report['Compound'].
|
1492 |
lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x)
|
1493 |
|
1494 |
return (create_html_report(df_report, file=None, task=task), df_report,
|
@@ -2174,7 +2183,7 @@ higher similarities usually correspond to greater prediction confidence.<br>
|
|
2174 |
alignment = aligner.align(processed_fasta, query)
|
2175 |
return alignment.score / max(len(processed_fasta), len(query))
|
2176 |
|
2177 |
-
alignment_df['score'] = alignment_df['X2'].
|
2178 |
row = alignment_df.loc[alignment_df['score'].idxmax()]
|
2179 |
family = str(row['Target Family']).title()
|
2180 |
return gr.Dropdown(value=family,
|
@@ -2506,13 +2515,13 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
2506 |
infer_df = pd.read_csv(drug_target_pair_upload)
|
2507 |
validate_columns(infer_df, ['X1', 'X2'])
|
2508 |
|
2509 |
-
infer_df['X1_ERR'] = infer_df['X1'].
|
2510 |
validate_seq_str, regex=SMILES_PAT)
|
2511 |
if not infer_df['X1_ERR'].isna().all():
|
2512 |
raise ValueError(
|
2513 |
f"Encountered invalid SMILES:\n{infer_df[~infer_df['X1_ERR'].isna()][['X1', 'X1_ERR']]}")
|
2514 |
|
2515 |
-
infer_df['X2_ERR'] = infer_df['X2'].
|
2516 |
validate_seq_str, regex=FASTA_PAT)
|
2517 |
if not infer_df['X2_ERR'].isna().all():
|
2518 |
raise ValueError(
|
@@ -2818,12 +2827,16 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
2818 |
|
2819 |
csv_generate.click(
|
2820 |
lambda: gr.File(visible=True), outputs=csv_download_file,
|
2821 |
-
).then(
|
2822 |
-
|
|
|
|
|
2823 |
html_generate.click(
|
2824 |
lambda: gr.File(visible=True), outputs=html_download_file,
|
2825 |
-
).then(
|
2826 |
-
|
|
|
|
|
2827 |
|
2828 |
if __name__ == "__main__":
|
2829 |
pandarallel.initialize()
|
|
|
45 |
from apscheduler.schedulers.background import BackgroundScheduler
|
46 |
from tinydb import TinyDB, Query
|
47 |
|
48 |
+
import swifter
|
49 |
from tqdm.auto import tqdm
|
50 |
|
51 |
from deepscreen.data.dti import validate_seq_str, rdkit_canonicalize, FASTA_PAT, SMILES_PAT
|
|
|
160 |
|
161 |
|
162 |
class View3DmolCell(py3Dmol.view):
|
163 |
+
def __init__(self, width=320, height=200):
|
164 |
divid = "3dmolviewer_UNIQUEID"
|
165 |
self.uniqueid = None
|
166 |
if isinstance(width, int):
|
|
|
861 |
orig_df['Target Family'] = None
|
862 |
if orig_df['Target Family'].isna().any():
|
863 |
orig_df.loc[orig_df['Target Family'].isna(), 'Target Family'] = (
|
864 |
+
orig_df.loc[orig_df['Target Family'].isna(), 'X2'].swifter.apply(detect_family)
|
865 |
)
|
866 |
orig_df['Target Family'] = orig_df['Target Family'].str.capitalize()
|
867 |
detect_family.cache_clear()
|
868 |
|
869 |
+
orig_df['X1^'] = orig_df['X1'].swifter.apply(rdkit_canonicalize)
|
870 |
|
871 |
orig_df = orig_df.merge(df_training[['X1^', 'X2', 'Y']], on=['X1^', 'X2'], how='left', indicator=False)
|
872 |
annotated_df = orig_df[~orig_df['Y'].isna()].copy()
|
|
|
979 |
if "Calculate Max. Tanimoto Similarity between the Hit Compound and Known Ligands of the Input Target" in opts:
|
980 |
x2 = prediction_df['X2'].iloc[0]
|
981 |
pos_compounds_df = df_training.loc[(df_training['X2'] == x2) & (df_training['Y'] == 1)].copy()
|
982 |
+
pos_compounds_df['FP'] = pos_compounds_df['X1'].swifter.apply(smiles_to_ecfp)
|
983 |
|
984 |
@cache
|
985 |
def max_sim(smiles):
|
|
|
988 |
prediction_df[[
|
989 |
'Max. Tanimoto Similarity to Known Ligands',
|
990 |
'Max. Sim. Ligand'
|
991 |
+
]] = prediction_df['X1'].swifter.apply(max_sim).apply(pd.Series)
|
992 |
|
993 |
max_sim.cache_clear()
|
994 |
|
995 |
if "Calculate Max. Sequence Identity between the Input Target and Known Targets of Hit Compound" in opts:
|
996 |
x2 = prediction_df['X2'].iloc[0]
|
997 |
+
prediction_df['X1^'] = prediction_df['X1'].swifter.apply(rdkit_canonicalize)
|
998 |
|
999 |
@cache
|
1000 |
def max_id(compound):
|
|
|
1003 |
|
1004 |
prediction_df[['Max. Sequence Identity to Known Targets of Hit Compound',
|
1005 |
'Max. Id. Target']] = (
|
1006 |
+
prediction_df['X1^'].swifter.apply(max_id).apply(pd.Series)
|
1007 |
)
|
1008 |
prediction_df.drop(['X1^'], axis=1, inplace=True)
|
1009 |
|
|
|
1012 |
# Advanced options for Target Protein Identification
|
1013 |
if "Calculate Max. Tanimoto Similarity between the Input Compound and Compounds in the Training Set" in opts:
|
1014 |
x1 = rdkit_canonicalize(prediction_df['X1'].iloc[0])
|
1015 |
+
prediction_df['FP'] = prediction_df['X1'].swifter.apply(smiles_to_ecfp)
|
1016 |
|
1017 |
prediction_df[[
|
1018 |
'Max. Tanimoto Similarity to Training Compounds',
|
|
|
1030 |
prediction_df[[
|
1031 |
'Max. Sequence Identity to Known Targets of Input Compound',
|
1032 |
'Max. Id. Target'
|
1033 |
+
]] = prediction_df['X2'].swifter.apply(max_id).apply(pd.Series)
|
1034 |
|
1035 |
max_id.cache_clear()
|
1036 |
|
|
|
1046 |
prediction_df[[
|
1047 |
'Max. Tanimoto Similarity to Known Ligands of Identified Target',
|
1048 |
'Max. Sim. Ligand'
|
1049 |
+
]] = prediction_df['X2'].swifter.apply(max_sim).apply(pd.Series)
|
1050 |
|
1051 |
max_sim.cache_clear()
|
1052 |
|
|
|
1100 |
|
1101 |
if 'X1' in df.columns:
|
1102 |
if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
|
1103 |
+
df['Compound'] = df['X1'].swifter.apply(
|
1104 |
lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
|
1105 |
+
df['Scaffold'] = df['Compound'].swifter.apply(MurckoScaffold.GetScaffoldForMol)
|
1106 |
+
df['Scaffold SMILES'] = df['Scaffold'].swifter.apply(lambda x: Chem.MolToSmiles(x))
|
1107 |
df['Pharmacophore'] = None
|
1108 |
if task == 'Compound-Protein Binding Affinity':
|
1109 |
# Convert Y^ from pIC50 to IC50
|
|
|
1121 |
|
1122 |
def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(track_tqdm=True)):
|
1123 |
df_html = df.copy(deep=True)
|
|
|
1124 |
column_aliases = COLUMN_ALIASES.copy()
|
1125 |
cols_left = list(pd.Index([
|
1126 |
+
'ID1', 'ID2', 'Compound', 'Scaffold', 'Pharmacophore', 'X1', 'Scaffold SMILES', 'X2', 'Y^'
|
1127 |
]).intersection(df_html.columns))
|
1128 |
# cols_right = list(pd.Index(['X1', 'X2']).intersection(df_html.columns))
|
1129 |
# df_html = df_html[cols_left + (df_html.columns.drop(cols_left + cols_right).tolist()) + cols_right]
|
|
|
1151 |
columns_unique = None
|
1152 |
|
1153 |
if 'Exclude Pharmacophore 3D' not in opts:
|
1154 |
+
df_html['Pharmacophore'] = df_html['Compound'].swifter.apply(
|
1155 |
lambda x: mol_to_pharm3d(x) if not pd.isna(x) else x)
|
1156 |
|
1157 |
if 'Compound' in df_html.columns and 'Exclude Molecular Graph' not in opts:
|
1158 |
+
df_html['Compound'] = df_html['Compound'].swifter.apply(
|
1159 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
1160 |
else:
|
1161 |
df_html.drop(['Compound'], axis=1, inplace=True)
|
1162 |
|
1163 |
if 'Scaffold' in df_html.columns and 'Exclude Scaffold Graph' not in opts:
|
1164 |
+
df_html['Scaffold'] = df_html['Scaffold'].swifter.apply(
|
1165 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
1166 |
else:
|
1167 |
df_html.drop(['Scaffold'], axis=1, inplace=True)
|
|
|
1196 |
df_html.rename(columns=column_aliases, inplace=True)
|
1197 |
df_html.index.name = 'Index'
|
1198 |
if 'Target FASTA' in df_html.columns:
|
1199 |
+
df_html['Target FASTA'] = df_html['Target FASTA'].swifter.apply(
|
1200 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
1201 |
|
1202 |
num_cols = df_html.select_dtypes('number').columns
|
|
|
1207 |
if columns_unique is not None:
|
1208 |
unique_df = df_html.loc[:, columns_unique].iloc[[0]].copy()
|
1209 |
df_html = df_html.loc[:, ~columns_unique]
|
1210 |
+
df_html.dropna(how='all', axis=1, inplace=True)
|
1211 |
+
unique_df.dropna(how='all', axis=1, inplace=True)
|
1212 |
|
1213 |
if not file:
|
1214 |
if 'Compound ID' in df_html.columns:
|
|
|
1216 |
if 'Target ID' in df_html.columns:
|
1217 |
df_html.drop(['Target FASTA'], axis=1, inplace=True)
|
1218 |
if 'Target FASTA' in df_html.columns:
|
1219 |
+
df_html['Target FASTA'] = df_html['Target FASTA'].swifter.apply(
|
1220 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
1221 |
if 'Scaffold SMILES' in df_html.columns:
|
1222 |
df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
|
1223 |
|
1224 |
+
# FIXME: Temporarily drop pharmacophore column before an image solution is found
|
1225 |
+
if 'Pharmacophore' in df_html.columns:
|
1226 |
+
df_html.drop(['Pharmacophore'], axis=1, inplace=True)
|
1227 |
+
if unique_df is not None and 'Pharmacophore' in unique_df.columns:
|
1228 |
+
unique_df.drop(['Pharmacophore'], axis=1, inplace=True)
|
1229 |
+
|
1230 |
styled_df = df_html.fillna('').style.format(precision=3)
|
1231 |
|
1232 |
for i, col in enumerate(num_cols):
|
|
|
1300 |
report_table = pn.widgets.Tabulator(
|
1301 |
df_html, formatters=formatters,
|
1302 |
frozen_columns=[
|
1303 |
+
'Index', 'Target ID', 'Compound ID', 'Compound Name', 'Compound'
|
1304 |
],
|
1305 |
+
disabled=True, sizing_mode='stretch_both', pagination='local', page_size=30
|
1306 |
+
)
|
1307 |
|
1308 |
for i, col in enumerate(num_cols):
|
1309 |
cmap = sns.light_palette(num_col_colors[i], as_cmap=True)
|
|
|
1340 |
|
1341 |
.tabulator-cell {
|
1342 |
overflow: visible !important;
|
1343 |
+
align-content: center !important;
|
1344 |
}
|
1345 |
|
1346 |
.tabulator-cell:hover {
|
|
|
1384 |
raw_css=[panel_css],
|
1385 |
js_files={'panel_custom': 'static/panel.js', '3Dmol': 'static/3Dmol-min.js'},
|
1386 |
# js_modules={'3Dmol': 'static/3Dmol-min.js'},
|
1387 |
+
inline=True,
|
1388 |
)
|
1389 |
|
1390 |
template = pn.template.VanillaTemplate(
|
|
|
1493 |
df_report = df.copy()
|
1494 |
try:
|
1495 |
for filter_name in filter_list:
|
1496 |
+
df_report[filter_name] = df_report['Compound'].swifter.apply(
|
1497 |
lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x)
|
1498 |
|
1499 |
for score_name in score_list:
|
1500 |
+
df_report[score_name] = df_report['Compound'].swifter.apply(
|
1501 |
lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x)
|
1502 |
|
1503 |
return (create_html_report(df_report, file=None, task=task), df_report,
|
|
|
2183 |
alignment = aligner.align(processed_fasta, query)
|
2184 |
return alignment.score / max(len(processed_fasta), len(query))
|
2185 |
|
2186 |
+
alignment_df['score'] = alignment_df['X2'].swifter.apply(align_score)
|
2187 |
row = alignment_df.loc[alignment_df['score'].idxmax()]
|
2188 |
family = str(row['Target Family']).title()
|
2189 |
return gr.Dropdown(value=family,
|
|
|
2515 |
infer_df = pd.read_csv(drug_target_pair_upload)
|
2516 |
validate_columns(infer_df, ['X1', 'X2'])
|
2517 |
|
2518 |
+
infer_df['X1_ERR'] = infer_df['X1'].swifter.apply(
|
2519 |
validate_seq_str, regex=SMILES_PAT)
|
2520 |
if not infer_df['X1_ERR'].isna().all():
|
2521 |
raise ValueError(
|
2522 |
f"Encountered invalid SMILES:\n{infer_df[~infer_df['X1_ERR'].isna()][['X1', 'X1_ERR']]}")
|
2523 |
|
2524 |
+
infer_df['X2_ERR'] = infer_df['X2'].swifter.apply(
|
2525 |
validate_seq_str, regex=FASTA_PAT)
|
2526 |
if not infer_df['X2_ERR'].isna().all():
|
2527 |
raise ValueError(
|
|
|
2827 |
|
2828 |
csv_generate.click(
|
2829 |
lambda: gr.File(visible=True), outputs=csv_download_file,
|
2830 |
+
).then(
|
2831 |
+
fn=create_csv_report_file, inputs=[report_df, file_for_report, report_task, csv_sep],
|
2832 |
+
outputs=csv_download_file, show_progress='full'
|
2833 |
+
)
|
2834 |
html_generate.click(
|
2835 |
lambda: gr.File(visible=True), outputs=html_download_file,
|
2836 |
+
).then(
|
2837 |
+
fn=create_html_report_file, inputs=[report_df, file_for_report, report_task, html_opts],
|
2838 |
+
outputs=html_download_file, show_progress='full'
|
2839 |
+
)
|
2840 |
|
2841 |
if __name__ == "__main__":
|
2842 |
pandarallel.initialize()
|