Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -45,7 +45,7 @@ import panel as pn
|
|
45 |
from apscheduler.schedulers.background import BackgroundScheduler
|
46 |
from tinydb import TinyDB, Query
|
47 |
|
48 |
-
|
49 |
from tqdm.auto import tqdm
|
50 |
|
51 |
from deepscreen.data.dti import validate_seq_str, rdkit_canonicalize, FASTA_PAT, SMILES_PAT
|
@@ -156,7 +156,6 @@ display: inline-block !important;
|
|
156 |
footer {
|
157 |
visibility: hidden
|
158 |
}
|
159 |
-
|
160 |
"""
|
161 |
|
162 |
|
@@ -192,7 +191,11 @@ def rgb_to_hex(rgb):
|
|
192 |
def mol_to_pharm3d(mol, mode='html'):
|
193 |
if mol is None:
|
194 |
return
|
195 |
-
AllChem.Compute2DCoords(mol)
|
|
|
|
|
|
|
|
|
196 |
|
197 |
feats = FEAT_FACTORY.GetFeaturesForMol(mol)
|
198 |
|
@@ -291,13 +294,23 @@ COLUMN_ALIASES = {
|
|
291 |
}
|
292 |
|
293 |
DRUG_SCRENN_CPI_OPTS = [
|
294 |
-
'
|
295 |
-
'
|
296 |
-
'
|
297 |
]
|
298 |
|
299 |
DRUG_SCRENN_CPA_OPTS = [
|
300 |
-
'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
301 |
]
|
302 |
|
303 |
pd.set_option('display.float_format', '{:.3f}'.format)
|
@@ -383,6 +396,13 @@ def max_tanimoto_similarity(smi, seen_smiles_with_fp):
|
|
383 |
return {'Max. Tanimoto Similarity': sims[idx], 'Max. Tanimoto Similarity Compound': compound}
|
384 |
|
385 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
386 |
def max_sequence_identity(seq, seen_fastas):
|
387 |
if seq is None or seen_fastas is None or seen_fastas.empty:
|
388 |
return {'Max. Sequence Identity': 0, 'Max. Sequence Identity Target': None}
|
@@ -395,20 +415,12 @@ def max_sequence_identity(seq, seen_fastas):
|
|
395 |
target = id2
|
396 |
return {'Max. Sequence Identity': 1, 'Max. Sequence Identity Target': target}
|
397 |
|
398 |
-
|
399 |
-
aligner.mode = 'local'
|
400 |
max_iden = 0
|
401 |
target = None
|
402 |
for fasta in seen_fastas['X2'].values:
|
403 |
-
|
404 |
-
|
405 |
-
if identity == 1:
|
406 |
-
target = fasta
|
407 |
-
if 'ID2' in seen_fastas.columns:
|
408 |
-
id2 = seen_fastas.loc[seen_fastas['X2'] == fasta, 'ID2'].values[0]
|
409 |
-
if pd.notnull(id2) and id2 != '':
|
410 |
-
target = id2
|
411 |
-
return {'Max. Sequence Identity': 1, 'Max. Sequence Identity Target': target}
|
412 |
if identity > max_iden:
|
413 |
max_iden = identity
|
414 |
target = fasta
|
@@ -416,7 +428,10 @@ def max_sequence_identity(seq, seen_fastas):
|
|
416 |
id2 = seen_fastas.loc[seen_fastas['X2'] == fasta, 'ID2'].values[0]
|
417 |
if pd.notnull(id2) and id2 != '':
|
418 |
target = id2
|
|
|
|
|
419 |
|
|
|
420 |
return {'Max. Sequence Identity': max_iden, 'Max. Sequence Identity Target': target}
|
421 |
|
422 |
|
@@ -846,12 +861,12 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
|
|
846 |
orig_df['Target Family'] = None
|
847 |
if orig_df['Target Family'].isna().any():
|
848 |
orig_df.loc[orig_df['Target Family'].isna(), 'Target Family'] = (
|
849 |
-
orig_df.loc[orig_df['Target Family'].isna(), 'X2'].
|
850 |
)
|
851 |
orig_df['Target Family'] = orig_df['Target Family'].str.capitalize()
|
852 |
detect_family.cache_clear()
|
853 |
|
854 |
-
orig_df['X1^'] = orig_df['X1'].
|
855 |
|
856 |
orig_df = orig_df.merge(df_training[['X1^', 'X2', 'Y']], on=['X1^', 'X2'], how='left', indicator=False)
|
857 |
annotated_df = orig_df[~orig_df['Y'].isna()].copy()
|
@@ -952,66 +967,88 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
|
|
952 |
df_list = [prediction_df, annotated_df]
|
953 |
prediction_df = pd.concat([df for df in df_list if not df.empty], ignore_index=True)
|
954 |
|
955 |
-
|
956 |
-
|
957 |
-
|
958 |
-
family_smiles_df['FP'] = family_smiles_df['X1'].parallel_apply(smiles_to_ecfp)
|
959 |
-
|
960 |
-
@cache
|
961 |
-
def max_sim(smi):
|
962 |
-
return max_tanimoto_similarity(smi, family_smiles_df)['Max. Tanimoto Similarity']
|
963 |
|
964 |
-
|
965 |
-
|
966 |
-
|
967 |
-
|
968 |
-
max_sim.cache_clear()
|
969 |
|
970 |
-
if "
|
971 |
x2 = prediction_df['X2'].iloc[0]
|
972 |
pos_compounds_df = df_training.loc[(df_training['X2'] == x2) & (df_training['Y'] == 1)].copy()
|
973 |
-
pos_compounds_df['FP'] = pos_compounds_df['X1'].apply(smiles_to_ecfp)
|
974 |
|
975 |
@cache
|
976 |
def max_sim(smiles):
|
977 |
return max_tanimoto_similarity(smiles, seen_smiles_with_fp=pos_compounds_df)
|
978 |
|
979 |
-
prediction_df[[
|
980 |
-
|
981 |
-
|
982 |
-
)
|
|
|
983 |
max_sim.cache_clear()
|
984 |
|
985 |
-
if "
|
986 |
x2 = prediction_df['X2'].iloc[0]
|
987 |
-
prediction_df['X1^'] = prediction_df['X1'].
|
988 |
|
989 |
@cache
|
990 |
-
def
|
991 |
-
|
992 |
-
return max_sequence_identity(x2, seen_fastas=
|
993 |
|
994 |
-
prediction_df[['Max. Sequence Identity to Known
|
995 |
-
'Max.
|
996 |
-
prediction_df['X1^'].
|
997 |
)
|
998 |
prediction_df.drop(['X1^'], axis=1, inplace=True)
|
999 |
|
1000 |
-
|
1001 |
|
1002 |
-
|
1003 |
-
|
1004 |
-
|
|
|
1005 |
|
1006 |
-
|
1007 |
-
|
1008 |
-
|
|
|
1009 |
|
1010 |
-
|
1011 |
-
|
1012 |
-
|
1013 |
-
|
1014 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1015 |
|
1016 |
prediction_df.drop(['N'], axis=1).to_csv(predictions_file, index=False, na_rep='')
|
1017 |
status = "COMPLETED"
|
@@ -1063,10 +1100,10 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
|
|
1063 |
|
1064 |
if 'X1' in df.columns:
|
1065 |
if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
|
1066 |
-
df['Compound'] = df['X1'].
|
1067 |
lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
|
1068 |
-
df['Scaffold'] = df['Compound'].
|
1069 |
-
df['Scaffold SMILES'] = df['Scaffold'].
|
1070 |
|
1071 |
if task == 'Compound-Protein Binding Affinity':
|
1072 |
# Convert Y^ from pIC50 to IC50
|
@@ -1114,17 +1151,17 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
1114 |
columns_unique = None
|
1115 |
|
1116 |
if 'Exclude Pharmacophore 3D' not in opts:
|
1117 |
-
df_html['Pharmacophore'] = df_html['Compound'].
|
1118 |
lambda x: mol_to_pharm3d(x) if not pd.isna(x) else x)
|
1119 |
|
1120 |
if 'Compound' in df_html.columns and 'Exclude Molecular Graph' not in opts:
|
1121 |
-
df_html['Compound'] = df_html['Compound'].
|
1122 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
1123 |
else:
|
1124 |
df_html.drop(['Compound'], axis=1, inplace=True)
|
1125 |
|
1126 |
if 'Scaffold' in df_html.columns and 'Exclude Scaffold Graph' not in opts:
|
1127 |
-
df_html['Scaffold'] = df_html['Scaffold'].
|
1128 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
1129 |
else:
|
1130 |
df_html.drop(['Scaffold'], axis=1, inplace=True)
|
@@ -1138,15 +1175,20 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
1138 |
if any(col in df_html.columns for col in ['Y^', 'Y']):
|
1139 |
job = 'Target Protein Identification'
|
1140 |
category = 'Target Family'
|
1141 |
-
columns_unique = df_html.columns.isin(
|
1142 |
-
|
|
|
|
|
|
|
1143 |
|
1144 |
elif n_compound >= 2 and n_protein == 1:
|
1145 |
unique_entity = 'Target of Interest'
|
1146 |
if any(col in df_html.columns for col in ['Y^', 'Y']):
|
1147 |
job = 'Drug Hit Screening'
|
1148 |
category = 'Scaffold SMILES'
|
1149 |
-
columns_unique = df_html.columns.isin(
|
|
|
|
|
1150 |
|
1151 |
elif 'Y^' in df_html.columns:
|
1152 |
job = 'Interaction Pair Inference'
|
@@ -1154,7 +1196,7 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
1154 |
df_html.rename(columns=column_aliases, inplace=True)
|
1155 |
df_html.index.name = 'Index'
|
1156 |
if 'Target FASTA' in df_html.columns:
|
1157 |
-
df_html['Target FASTA'] = df_html['Target FASTA'].
|
1158 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
1159 |
|
1160 |
num_cols = df_html.select_dtypes('number').columns
|
@@ -1172,7 +1214,7 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
1172 |
if 'Target ID' in df_html.columns:
|
1173 |
df_html.drop(['Target FASTA'], axis=1, inplace=True)
|
1174 |
if 'Target FASTA' in df_html.columns:
|
1175 |
-
df_html['Target FASTA'] = df_html['Target FASTA'].
|
1176 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
1177 |
if 'Scaffold SMILES' in df_html.columns:
|
1178 |
df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
|
@@ -1248,9 +1290,9 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
1248 |
|
1249 |
report_table = pn.widgets.Tabulator(
|
1250 |
df_html, formatters=formatters,
|
1251 |
-
frozen_columns=[
|
1252 |
-
'Target ID', 'Compound ID', 'Compound'
|
1253 |
-
]
|
1254 |
disabled=True, sizing_mode='stretch_both', pagination='local', page_size=30)
|
1255 |
|
1256 |
for i, col in enumerate(num_cols):
|
@@ -1279,71 +1321,15 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
1279 |
# Remove keys with empty values
|
1280 |
pie_charts = {k: v for k, v in pie_charts.items() if any(v)}
|
1281 |
|
1282 |
-
|
1283 |
-
|
1284 |
-
|
1285 |
-
|
1286 |
-
|
1287 |
-
|
1288 |
-
|
1289 |
-
|
1290 |
-
|
1291 |
-
}
|
1292 |
-
|
1293 |
-
.tabulator-cell:hover {
|
1294 |
-
z-index: 1000 !important;
|
1295 |
-
}
|
1296 |
-
|
1297 |
-
.tabulator-cell.tabulator-frozen:hover {
|
1298 |
-
z-index: 1000 !important;
|
1299 |
-
}
|
1300 |
-
|
1301 |
-
.image-zoom-viewer {
|
1302 |
-
display: inline-block;
|
1303 |
-
overflow: visible;
|
1304 |
-
z-index: 1000;
|
1305 |
-
}
|
1306 |
-
|
1307 |
-
.image-zoom-viewer::after {
|
1308 |
-
content: "";
|
1309 |
-
top: 0;
|
1310 |
-
left: 0;
|
1311 |
-
width: 100%;
|
1312 |
-
height: 100%;
|
1313 |
-
pointer-events: none;
|
1314 |
-
}
|
1315 |
-
|
1316 |
-
.image-zoom-viewer:hover::after {
|
1317 |
-
pointer-events: all;
|
1318 |
-
}
|
1319 |
-
|
1320 |
-
/* When hovering over the container, scale its child (the SVG) */
|
1321 |
-
.tabulator-cell:hover .image-zoom-viewer svg {
|
1322 |
-
padding: 3px;
|
1323 |
-
position: absolute;
|
1324 |
-
background-color: rgba(250, 250, 250, 0.854);
|
1325 |
-
box-shadow: 0 0 10px rgba(0, 0, 0, 0.618);
|
1326 |
-
border-radius: 3px;
|
1327 |
-
transform: scale(3); /* Scale up the SVG */
|
1328 |
-
transition: transform 0.3s ease;
|
1329 |
-
pointer-events: none; /* Prevents the SVG from blocking mouse interactions */
|
1330 |
-
z-index: 1000;
|
1331 |
-
}
|
1332 |
-
|
1333 |
-
.image-zoom-viewer svg {
|
1334 |
-
display: block; /* SVG is a block-level element for proper scaling */
|
1335 |
-
z-index: 1000;
|
1336 |
-
}
|
1337 |
-
|
1338 |
-
.image-zoom-viewer:hover {
|
1339 |
-
z-index: 1000;
|
1340 |
-
}
|
1341 |
-
"""
|
1342 |
-
|
1343 |
-
pn.extension(raw_css=[pn_css], js_files={
|
1344 |
-
'3Dmol': './3Dmol-min.js',
|
1345 |
-
'panel_custom': './panel.js',
|
1346 |
-
})
|
1347 |
|
1348 |
template = pn.template.VanillaTemplate(
|
1349 |
title=f'DeepSEQreen {job} Report',
|
@@ -1359,7 +1345,7 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
1359 |
if unique_df is not None:
|
1360 |
unique_table = pn.widgets.Tabulator(unique_df, formatters=formatters, sizing_mode='stretch_width',
|
1361 |
show_index=False, disabled=True,
|
1362 |
-
frozen_columns=['Compound ID', 'Compound', '
|
1363 |
# if pie_charts:
|
1364 |
# unique_table.width = 640
|
1365 |
stats_pane.append(pn.Column(f'### {unique_entity}', unique_table))
|
@@ -1451,11 +1437,11 @@ def submit_report(df, score_list, filter_list, task, progress=gr.Progress(track_
|
|
1451 |
df_report = df.copy()
|
1452 |
try:
|
1453 |
for filter_name in filter_list:
|
1454 |
-
df_report[filter_name] = df_report['Compound'].
|
1455 |
lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x)
|
1456 |
|
1457 |
for score_name in score_list:
|
1458 |
-
df_report[score_name] = df_report['Compound'].
|
1459 |
lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x)
|
1460 |
|
1461 |
return (create_html_report(df_report, file=None, task=task), df_report,
|
@@ -1667,16 +1653,25 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
1667 |
label='OR Upload Your Own Library', variant='primary')
|
1668 |
drug_library_upload = gr.File(label='Custom compound library file', visible=False)
|
1669 |
|
1670 |
-
|
1671 |
-
|
1672 |
-
|
1673 |
-
|
1674 |
-
|
1675 |
-
|
1676 |
-
|
1677 |
-
|
1678 |
-
|
1679 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1680 |
with gr.Row():
|
1681 |
with gr.Column():
|
1682 |
drug_screen_email = gr.Textbox(
|
@@ -1777,14 +1772,24 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
1777 |
target_library_upload_btn = gr.UploadButton(
|
1778 |
label='OR Upload Your Own Library', variant='primary')
|
1779 |
target_library_upload = gr.File(label='Custom target library file', visible=False)
|
1780 |
-
|
1781 |
-
|
1782 |
-
|
1783 |
-
|
1784 |
-
|
1785 |
-
|
1786 |
-
|
1787 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1788 |
with gr.Row():
|
1789 |
with gr.Column():
|
1790 |
target_identify_email = gr.Textbox(
|
@@ -1823,9 +1828,11 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
1823 |
label='Step 1. Select Pair Input Type and Input',
|
1824 |
value='Upload a CSV file containing paired compound-protein data')
|
1825 |
with gr.Column() as pair_upload:
|
1826 |
-
gr.File(
|
1827 |
-
|
1828 |
-
|
|
|
|
|
1829 |
with gr.Row():
|
1830 |
infer_csv_prompt = gr.Button(
|
1831 |
value="Upload Your Own Dataset Below",
|
@@ -1833,27 +1840,50 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
1833 |
with gr.Column():
|
1834 |
infer_pair = gr.File(
|
1835 |
label='Upload CSV File Containing Paired Records',
|
1836 |
-
file_count="single",
|
|
|
|
|
|
|
1837 |
with gr.Column(visible=False) as pair_generate:
|
1838 |
with gr.Row():
|
1839 |
-
gr.File(
|
1840 |
-
|
1841 |
-
|
1842 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
1843 |
with gr.Row():
|
1844 |
-
gr.File(
|
1845 |
-
|
1846 |
-
|
1847 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
1848 |
with gr.Row():
|
1849 |
infer_library_prompt = gr.Button(
|
1850 |
value="Upload Your Own Libraries Below",
|
1851 |
-
visible=False,
|
|
|
|
|
1852 |
with gr.Row():
|
1853 |
-
infer_drug = gr.File(
|
1854 |
-
|
1855 |
-
|
1856 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
1857 |
|
1858 |
with gr.Row():
|
1859 |
with gr.Column(min_width=200):
|
@@ -1862,10 +1892,12 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
1862 |
"If the proteins in the target library of interest "
|
1863 |
"all belong to the same protein family, manually selecting the family is supported."
|
1864 |
)
|
|
|
1865 |
pair_infer_target_family = gr.Dropdown(
|
1866 |
choices=list(TARGET_FAMILY_MAP.keys()),
|
1867 |
value='General',
|
1868 |
-
label='Step 2. Select Target Family (Optional)'
|
|
|
1869 |
|
1870 |
with gr.Column(min_width=200):
|
1871 |
HelpTip(
|
@@ -1877,15 +1909,17 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
1877 |
pair_infer_task = gr.Dropdown(
|
1878 |
list(TASK_MAP.keys()),
|
1879 |
label='Step 3. Select a Prediction Task',
|
1880 |
-
value='Compound-Protein Interaction'
|
|
|
1881 |
|
1882 |
with gr.Column(min_width=200):
|
1883 |
-
HelpTip(
|
1884 |
-
|
1885 |
-
|
1886 |
pair_infer_preset = gr.Dropdown(
|
1887 |
list(PRESET_MAP.keys()),
|
1888 |
-
label='Step 4. Select a Preset Model'
|
|
|
1889 |
# infer_preset_recommend_btn = gr.Button(value='OR Let Us Recommend for You',
|
1890 |
# variant='primary')
|
1891 |
pair_infer_opts = gr.CheckboxGroup(visible=False)
|
@@ -2093,7 +2127,7 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
2093 |
alignment = aligner.align(processed_fasta, query)
|
2094 |
return alignment.score / max(len(processed_fasta), len(query))
|
2095 |
|
2096 |
-
alignment_df['score'] = alignment_df['X2'].
|
2097 |
row = alignment_df.loc[alignment_df['score'].idxmax()]
|
2098 |
family = str(row['Target Family']).title()
|
2099 |
return gr.Dropdown(value=family,
|
@@ -2119,6 +2153,12 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
2119 |
show_progress='hidden'
|
2120 |
)
|
2121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
2122 |
|
2123 |
def example_fill(input_type):
|
2124 |
return {target_id: 'Q16539',
|
@@ -2419,13 +2459,13 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
2419 |
infer_df = pd.read_csv(drug_target_pair_upload)
|
2420 |
validate_columns(infer_df, ['X1', 'X2'])
|
2421 |
|
2422 |
-
infer_df['X1_ERR'] = infer_df['X1'].
|
2423 |
validate_seq_str, regex=SMILES_PAT)
|
2424 |
if not infer_df['X1_ERR'].isna().all():
|
2425 |
raise ValueError(
|
2426 |
f"Encountered invalid SMILES:\n{infer_df[~infer_df['X1_ERR'].isna()][['X1', 'X1_ERR']]}")
|
2427 |
|
2428 |
-
infer_df['X2_ERR'] = infer_df['X2'].
|
2429 |
validate_seq_str, regex=FASTA_PAT)
|
2430 |
if not infer_df['X2_ERR'].isna().all():
|
2431 |
raise ValueError(
|
@@ -2757,7 +2797,7 @@ if __name__ == "__main__":
|
|
2757 |
db.update({'status': 'FAILED'}, Job.id == job['id'])
|
2758 |
|
2759 |
scheduler = BackgroundScheduler()
|
2760 |
-
scheduler.add_job(check_expiry, 'interval', hours=1)
|
2761 |
scheduler.start()
|
2762 |
|
2763 |
demo.queue(default_concurrency_limit=None, max_size=10).launch(show_api=False)
|
|
|
45 |
from apscheduler.schedulers.background import BackgroundScheduler
|
46 |
from tinydb import TinyDB, Query
|
47 |
|
48 |
+
import swifter
|
49 |
from tqdm.auto import tqdm
|
50 |
|
51 |
from deepscreen.data.dti import validate_seq_str, rdkit_canonicalize, FASTA_PAT, SMILES_PAT
|
|
|
156 |
footer {
|
157 |
visibility: hidden
|
158 |
}
|
|
|
159 |
"""
|
160 |
|
161 |
|
|
|
191 |
def mol_to_pharm3d(mol, mode='html'):
|
192 |
if mol is None:
|
193 |
return
|
194 |
+
# AllChem.Compute2DCoords(mol)
|
195 |
+
mol = Chem.AddHs(mol)
|
196 |
+
params = AllChem.ETKDGv3()
|
197 |
+
params.randomSeed = 0xf00d # for reproducibility
|
198 |
+
AllChem.EmbedMolecule(mol, params)
|
199 |
|
200 |
feats = FEAT_FACTORY.GetFeaturesForMol(mol)
|
201 |
|
|
|
294 |
}
|
295 |
|
296 |
DRUG_SCRENN_CPI_OPTS = [
|
297 |
+
'Calculate Max. Sequence Identity between the Input Target and Targets in the Training Set',
|
298 |
+
'Calculate Max. Tanimoto Similarity between the Hit Compound and Known Ligands of the Input Target',
|
299 |
+
'Calculate Max. Sequence Identity between the Input Target and Known Targets of Hit Compound',
|
300 |
]
|
301 |
|
302 |
DRUG_SCRENN_CPA_OPTS = [
|
303 |
+
'Calculate Max. Sequence Identity between the Input Target and Targets in the Training Set',
|
304 |
+
]
|
305 |
+
|
306 |
+
TARGET_IDENTIFY_CPI_OPTS = [
|
307 |
+
'Calculate Max. Tanimoto Similarity between the Input Compound and Compounds in the Training Set',
|
308 |
+
'Calculate Max. Sequence Identity between the Identified Target and Known Targets of the Input Compound',
|
309 |
+
'Calculate Max. Tanimoto Similarity between the Input Compound and Known Ligands of the Identified Target',
|
310 |
+
]
|
311 |
+
|
312 |
+
TARGET_IDENTIFY_CPA_OPTS = [
|
313 |
+
'Calculate Max. Tanimoto Similarity between the Input Compound and Compounds in the Training Set',
|
314 |
]
|
315 |
|
316 |
pd.set_option('display.float_format', '{:.3f}'.format)
|
|
|
396 |
return {'Max. Tanimoto Similarity': sims[idx], 'Max. Tanimoto Similarity Compound': compound}
|
397 |
|
398 |
|
399 |
+
def alignment_score(query, target):
|
400 |
+
aligner = PairwiseAligner()
|
401 |
+
aligner.mode = 'local'
|
402 |
+
alignment = aligner.align(query, target)
|
403 |
+
return alignment.score / max(len(query), len(target))
|
404 |
+
|
405 |
+
|
406 |
def max_sequence_identity(seq, seen_fastas):
|
407 |
if seq is None or seen_fastas is None or seen_fastas.empty:
|
408 |
return {'Max. Sequence Identity': 0, 'Max. Sequence Identity Target': None}
|
|
|
415 |
target = id2
|
416 |
return {'Max. Sequence Identity': 1, 'Max. Sequence Identity Target': target}
|
417 |
|
418 |
+
cached_alignment_score = cache(alignment_score)
|
|
|
419 |
max_iden = 0
|
420 |
target = None
|
421 |
for fasta in seen_fastas['X2'].values:
|
422 |
+
identity = cached_alignment_score(seq, fasta)
|
423 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
424 |
if identity > max_iden:
|
425 |
max_iden = identity
|
426 |
target = fasta
|
|
|
428 |
id2 = seen_fastas.loc[seen_fastas['X2'] == fasta, 'ID2'].values[0]
|
429 |
if pd.notnull(id2) and id2 != '':
|
430 |
target = id2
|
431 |
+
if max_iden == 1:
|
432 |
+
break
|
433 |
|
434 |
+
cached_alignment_score.cache_clear()
|
435 |
return {'Max. Sequence Identity': max_iden, 'Max. Sequence Identity Target': target}
|
436 |
|
437 |
|
|
|
861 |
orig_df['Target Family'] = None
|
862 |
if orig_df['Target Family'].isna().any():
|
863 |
orig_df.loc[orig_df['Target Family'].isna(), 'Target Family'] = (
|
864 |
+
orig_df.loc[orig_df['Target Family'].isna(), 'X2'].swifter.apply(detect_family)
|
865 |
)
|
866 |
orig_df['Target Family'] = orig_df['Target Family'].str.capitalize()
|
867 |
detect_family.cache_clear()
|
868 |
|
869 |
+
orig_df['X1^'] = orig_df['X1'].swifter.apply(rdkit_canonicalize)
|
870 |
|
871 |
orig_df = orig_df.merge(df_training[['X1^', 'X2', 'Y']], on=['X1^', 'X2'], how='left', indicator=False)
|
872 |
annotated_df = orig_df[~orig_df['Y'].isna()].copy()
|
|
|
967 |
df_list = [prediction_df, annotated_df]
|
968 |
prediction_df = pd.concat([df for df in df_list if not df.empty], ignore_index=True)
|
969 |
|
970 |
+
# Advanced options for Drug Hit Screening
|
971 |
+
if "Calculate Max. Sequence Identity between the Input Target and Targets in the Training Set" in opts:
|
972 |
+
x2 = prediction_df['X2'].iloc[0]
|
|
|
|
|
|
|
|
|
|
|
973 |
|
974 |
+
prediction_df[[
|
975 |
+
'Max. Sequence Identity to Training Targets',
|
976 |
+
'Max. Id. Training Target'
|
977 |
+
]] = pd.Series(max_sequence_identity(x2, df_training))
|
|
|
978 |
|
979 |
+
if "Calculate Max. Tanimoto Similarity between the Hit Compound and Known Ligands of the Input Target" in opts:
|
980 |
x2 = prediction_df['X2'].iloc[0]
|
981 |
pos_compounds_df = df_training.loc[(df_training['X2'] == x2) & (df_training['Y'] == 1)].copy()
|
982 |
+
pos_compounds_df['FP'] = pos_compounds_df['X1'].swifter.apply(smiles_to_ecfp)
|
983 |
|
984 |
@cache
|
985 |
def max_sim(smiles):
|
986 |
return max_tanimoto_similarity(smiles, seen_smiles_with_fp=pos_compounds_df)
|
987 |
|
988 |
+
prediction_df[[
|
989 |
+
'Max. Tanimoto Similarity to Known Ligands',
|
990 |
+
'Max. Sim. Ligand'
|
991 |
+
]] = prediction_df['X1'].swifter.apply(max_sim).apply(pd.Series)
|
992 |
+
|
993 |
max_sim.cache_clear()
|
994 |
|
995 |
+
if "Calculate Max. Sequence Identity between the Input Target and Known Targets of Hit Compound" in opts:
|
996 |
x2 = prediction_df['X2'].iloc[0]
|
997 |
+
prediction_df['X1^'] = prediction_df['X1'].swifter.apply(rdkit_canonicalize)
|
998 |
|
999 |
@cache
|
1000 |
+
def max_id(compound):
|
1001 |
+
pos_targets_df = df_training.loc[df_training['X1'] == compound]
|
1002 |
+
return max_sequence_identity(x2, seen_fastas=pos_targets_df)
|
1003 |
|
1004 |
+
prediction_df[['Max. Sequence Identity to Known Targets of Hit Compound',
|
1005 |
+
'Max. Id. Target']] = (
|
1006 |
+
prediction_df['X1^'].swifter.apply(max_id).apply(pd.Series)
|
1007 |
)
|
1008 |
prediction_df.drop(['X1^'], axis=1, inplace=True)
|
1009 |
|
1010 |
+
max_id.cache_clear()
|
1011 |
|
1012 |
+
# Advanced options for Target Protein Identification
|
1013 |
+
if "Calculate Max. Tanimoto Similarity between the Input Compound and Compounds in the Training Set" in opts:
|
1014 |
+
x1 = rdkit_canonicalize(prediction_df['X1'].iloc[0])
|
1015 |
+
prediction_df['FP'] = prediction_df['X1'].swifter.apply(smiles_to_ecfp)
|
1016 |
|
1017 |
+
prediction_df[[
|
1018 |
+
'Max. Tanimoto Similarity to Training Compounds',
|
1019 |
+
'Max. Sim. Training Compound'
|
1020 |
+
]] = pd.Series(max_tanimoto_similarity(x1, df_training))
|
1021 |
|
1022 |
+
if "Calculate Max. Sequence Identity between the Identified Target and Known Targets of the Input Compound" in opts:
|
1023 |
+
x1 = rdkit_canonicalize(prediction_df['X1'].iloc[0])
|
1024 |
+
pos_targets_df = df_training.loc[(df_training['X1'] == x1) & (df_training['Y'] == 1)].copy()
|
1025 |
+
|
1026 |
+
@cache
|
1027 |
+
def max_id(fasta):
|
1028 |
+
return max_sequence_identity(fasta, seen_fastas=pos_targets_df)
|
1029 |
+
|
1030 |
+
prediction_df[[
|
1031 |
+
'Max. Sequence Identity to Known Targets of Input Compound',
|
1032 |
+
'Max. Id. Target'
|
1033 |
+
]] = prediction_df['X2'].swifter.apply(max_id).apply(pd.Series)
|
1034 |
+
|
1035 |
+
max_id.cache_clear()
|
1036 |
+
|
1037 |
+
if "Calculate Max. Tanimoto Similarity between the Input Compound and Known Ligands of the Identified Target" in opts:
|
1038 |
+
x1 = rdkit_canonicalize(prediction_df['X1'].iloc[0])
|
1039 |
+
|
1040 |
+
@cache
|
1041 |
+
def max_sim(fasta):
|
1042 |
+
pos_targets_df = df_training.loc[(df_training['X2'] == fasta) & (df_training['Y'] == 1)].copy()
|
1043 |
+
pos_targets_df['FP'] = pos_targets_df['X1'].swifter.apply(smiles_to_ecfp)
|
1044 |
+
return max_tanimoto_similarity(x1, seen_smiles_with_fp=pos_targets_df)
|
1045 |
+
|
1046 |
+
prediction_df[[
|
1047 |
+
'Max. Tanimoto Similarity to Known Ligands of Identified Target',
|
1048 |
+
'Max. Sim. Ligand'
|
1049 |
+
]] = prediction_df['X2'].swifter.apply(max_sim).apply(pd.Series)
|
1050 |
+
|
1051 |
+
max_sim.cache_clear()
|
1052 |
|
1053 |
prediction_df.drop(['N'], axis=1).to_csv(predictions_file, index=False, na_rep='')
|
1054 |
status = "COMPLETED"
|
|
|
1100 |
|
1101 |
if 'X1' in df.columns:
|
1102 |
if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
|
1103 |
+
df['Compound'] = df['X1'].swifter.apply(
|
1104 |
lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
|
1105 |
+
df['Scaffold'] = df['Compound'].swifter.apply(MurckoScaffold.GetScaffoldForMol)
|
1106 |
+
df['Scaffold SMILES'] = df['Scaffold'].swifter.apply(lambda x: Chem.MolToSmiles(x))
|
1107 |
|
1108 |
if task == 'Compound-Protein Binding Affinity':
|
1109 |
# Convert Y^ from pIC50 to IC50
|
|
|
1151 |
columns_unique = None
|
1152 |
|
1153 |
if 'Exclude Pharmacophore 3D' not in opts:
|
1154 |
+
df_html['Pharmacophore'] = df_html['Compound'].swifter.apply(
|
1155 |
lambda x: mol_to_pharm3d(x) if not pd.isna(x) else x)
|
1156 |
|
1157 |
if 'Compound' in df_html.columns and 'Exclude Molecular Graph' not in opts:
|
1158 |
+
df_html['Compound'] = df_html['Compound'].swifter.apply(
|
1159 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
1160 |
else:
|
1161 |
df_html.drop(['Compound'], axis=1, inplace=True)
|
1162 |
|
1163 |
if 'Scaffold' in df_html.columns and 'Exclude Scaffold Graph' not in opts:
|
1164 |
+
df_html['Scaffold'] = df_html['Scaffold'].swifter.apply(
|
1165 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
1166 |
else:
|
1167 |
df_html.drop(['Scaffold'], axis=1, inplace=True)
|
|
|
1175 |
if any(col in df_html.columns for col in ['Y^', 'Y']):
|
1176 |
job = 'Target Protein Identification'
|
1177 |
category = 'Target Family'
|
1178 |
+
columns_unique = df_html.columns.isin(
|
1179 |
+
['ID1', 'Pharmacophore', 'Compound', 'Scaffold', 'X1', 'Scaffold SMILES',
|
1180 |
+
'Max. Tanimoto Similarity to Training Compounds', 'Max. Sim. Training Compound']
|
1181 |
+
+ list(FILTER_MAP.keys()) + list(SCORE_MAP.keys())
|
1182 |
+
)
|
1183 |
|
1184 |
elif n_compound >= 2 and n_protein == 1:
|
1185 |
unique_entity = 'Target of Interest'
|
1186 |
if any(col in df_html.columns for col in ['Y^', 'Y']):
|
1187 |
job = 'Drug Hit Screening'
|
1188 |
category = 'Scaffold SMILES'
|
1189 |
+
columns_unique = df_html.columns.isin(
|
1190 |
+
['X2', 'ID2', 'Max. Sequence Identity to Training Targets', 'Max. Id. Training Target']
|
1191 |
+
)
|
1192 |
|
1193 |
elif 'Y^' in df_html.columns:
|
1194 |
job = 'Interaction Pair Inference'
|
|
|
1196 |
df_html.rename(columns=column_aliases, inplace=True)
|
1197 |
df_html.index.name = 'Index'
|
1198 |
if 'Target FASTA' in df_html.columns:
|
1199 |
+
df_html['Target FASTA'] = df_html['Target FASTA'].swifter.apply(
|
1200 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
1201 |
|
1202 |
num_cols = df_html.select_dtypes('number').columns
|
|
|
1214 |
if 'Target ID' in df_html.columns:
|
1215 |
df_html.drop(['Target FASTA'], axis=1, inplace=True)
|
1216 |
if 'Target FASTA' in df_html.columns:
|
1217 |
+
df_html['Target FASTA'] = df_html['Target FASTA'].swifter.apply(
|
1218 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
1219 |
if 'Scaffold SMILES' in df_html.columns:
|
1220 |
df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
|
|
|
1290 |
|
1291 |
report_table = pn.widgets.Tabulator(
|
1292 |
df_html, formatters=formatters,
|
1293 |
+
frozen_columns=[
|
1294 |
+
'Index', 'Target ID', 'Compound ID', 'Compound'
|
1295 |
+
],
|
1296 |
disabled=True, sizing_mode='stretch_both', pagination='local', page_size=30)
|
1297 |
|
1298 |
for i, col in enumerate(num_cols):
|
|
|
1321 |
# Remove keys with empty values
|
1322 |
pie_charts = {k: v for k, v in pie_charts.items() if any(v)}
|
1323 |
|
1324 |
+
pn.extension(
|
1325 |
+
css_files=[
|
1326 |
+
'./static/panel.css',
|
1327 |
+
],
|
1328 |
+
js_files={
|
1329 |
+
'3Dmol': './static/3Dmol-min.js',
|
1330 |
+
'panel_custom': './static/panel.js'
|
1331 |
+
}
|
1332 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1333 |
|
1334 |
template = pn.template.VanillaTemplate(
|
1335 |
title=f'DeepSEQreen {job} Report',
|
|
|
1345 |
if unique_df is not None:
|
1346 |
unique_table = pn.widgets.Tabulator(unique_df, formatters=formatters, sizing_mode='stretch_width',
|
1347 |
show_index=False, disabled=True,
|
1348 |
+
frozen_columns=['Compound ID', 'Compound', 'Target ID'])
|
1349 |
# if pie_charts:
|
1350 |
# unique_table.width = 640
|
1351 |
stats_pane.append(pn.Column(f'### {unique_entity}', unique_table))
|
|
|
1437 |
df_report = df.copy()
|
1438 |
try:
|
1439 |
for filter_name in filter_list:
|
1440 |
+
df_report[filter_name] = df_report['Compound'].swifter.apply(
|
1441 |
lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x)
|
1442 |
|
1443 |
for score_name in score_list:
|
1444 |
+
df_report[score_name] = df_report['Compound'].swifter.apply(
|
1445 |
lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x)
|
1446 |
|
1447 |
return (create_html_report(df_report, file=None, task=task), df_report,
|
|
|
1653 |
label='OR Upload Your Own Library', variant='primary')
|
1654 |
drug_library_upload = gr.File(label='Custom compound library file', visible=False)
|
1655 |
|
1656 |
+
with gr.Column():
|
1657 |
+
HelpTip("""
|
1658 |
+
<b>Max. Sequence Identity between the Input Target and Targets in the Training Set</b>:
|
1659 |
+
this serves as an indicator of the predictioon applicability/reliability –
|
1660 |
+
higher similarities indicate more reliable predictions (preferably > 0.85).<br>
|
1661 |
+
<b>Max. Tanimoto Similarity between the Hit Compound and Known Ligands of the Input Target</b>:
|
1662 |
+
this serves as an indicator of both the confidence level and novelty of the predicted hit compounds –
|
1663 |
+
higher similarities suggest greater confidence, while lower Tanimoto similarities may indicate the novelty
|
1664 |
+
of the identified hit compounds compared to known drugs or true interacting compounds of the input target.<br>
|
1665 |
+
<b>Max. Sequence Identity between the Input Target and Known Targets of Hit Compound</b>:
|
1666 |
+
this serves as an additional indicator of the confidence level of the predicted hit compounds –
|
1667 |
+
higher identities usually lead to greater confidence in the predictions.<br>
|
1668 |
+
""")
|
1669 |
+
drug_screen_opts = gr.CheckboxGroup(
|
1670 |
+
label="Step 6. Select Additional Options",
|
1671 |
+
choices=DRUG_SCRENN_CPI_OPTS,
|
1672 |
+
info="Experimental features - may increase the job computation time."
|
1673 |
+
"See the Help Tip on the right or the Documentation for detailed explanation."
|
1674 |
+
)
|
1675 |
with gr.Row():
|
1676 |
with gr.Column():
|
1677 |
drug_screen_email = gr.Textbox(
|
|
|
1772 |
target_library_upload_btn = gr.UploadButton(
|
1773 |
label='OR Upload Your Own Library', variant='primary')
|
1774 |
target_library_upload = gr.File(label='Custom target library file', visible=False)
|
1775 |
+
with gr.Column():
|
1776 |
+
HelpTip("""
|
1777 |
+
<b>Max. Tanimoto Similarity between the Input Compound and Compounds in the Training Set</b>:
|
1778 |
+
this serves as an indicator of prediction applicability and reliability –
|
1779 |
+
higher similarities indicates more reliable predictions (ideally > 0.85).<br>
|
1780 |
+
<b>Max. Sequence Identity between the Identified Target and Known Targets of the Input Compound</b>:
|
1781 |
+
this serves as an indicator of prediction confidence for the potential targets –
|
1782 |
+
higher similarities typically imply higher confidence levels.<br>
|
1783 |
+
<b>Max. Tanimoto Similarity between the Input Compound and Known Ligands of the Identified Target</b>:
|
1784 |
+
this serves as an additional indicator of the confidence level in the predicted potential targets –
|
1785 |
+
higher similarities usually correspond to greater prediction confidence.<br>
|
1786 |
+
""")
|
1787 |
+
target_identify_opts = gr.CheckboxGroup(
|
1788 |
+
choices=TARGET_IDENTIFY_CPI_OPTS,
|
1789 |
+
label='Step 6. Select Additional Options',
|
1790 |
+
info="Experimental features - may increase the job computation time. "
|
1791 |
+
"See the Help Tip on the right or the Documentation for detailed explanation."
|
1792 |
+
)
|
1793 |
with gr.Row():
|
1794 |
with gr.Column():
|
1795 |
target_identify_email = gr.Textbox(
|
|
|
1828 |
label='Step 1. Select Pair Input Type and Input',
|
1829 |
value='Upload a CSV file containing paired compound-protein data')
|
1830 |
with gr.Column() as pair_upload:
|
1831 |
+
gr.File(
|
1832 |
+
label="Example CSV dataset",
|
1833 |
+
value="data/examples/interaction_pair_inference.csv",
|
1834 |
+
interactive=False
|
1835 |
+
)
|
1836 |
with gr.Row():
|
1837 |
infer_csv_prompt = gr.Button(
|
1838 |
value="Upload Your Own Dataset Below",
|
|
|
1840 |
with gr.Column():
|
1841 |
infer_pair = gr.File(
|
1842 |
label='Upload CSV File Containing Paired Records',
|
1843 |
+
file_count="single",
|
1844 |
+
type='filepath',
|
1845 |
+
visible=True
|
1846 |
+
)
|
1847 |
with gr.Column(visible=False) as pair_generate:
|
1848 |
with gr.Row():
|
1849 |
+
gr.File(
|
1850 |
+
label='Example SDF compound library',
|
1851 |
+
value='data/examples/compound_library.sdf',
|
1852 |
+
interactive=False
|
1853 |
+
)
|
1854 |
+
gr.File(
|
1855 |
+
label='Example FASTA target library',
|
1856 |
+
value='data/examples/target_library.fasta',
|
1857 |
+
interactive=False
|
1858 |
+
)
|
1859 |
with gr.Row():
|
1860 |
+
gr.File(
|
1861 |
+
label='Example CSV compound library',
|
1862 |
+
value='data/examples/compound_library.csv',
|
1863 |
+
interactive=False
|
1864 |
+
)
|
1865 |
+
gr.File(
|
1866 |
+
label='Example CSV target library',
|
1867 |
+
value='data/examples/target_library.csv',
|
1868 |
+
interactive=False
|
1869 |
+
)
|
1870 |
with gr.Row():
|
1871 |
infer_library_prompt = gr.Button(
|
1872 |
value="Upload Your Own Libraries Below",
|
1873 |
+
visible=False,
|
1874 |
+
variant='secondary'
|
1875 |
+
)
|
1876 |
with gr.Row():
|
1877 |
+
infer_drug = gr.File(
|
1878 |
+
label='Upload SDF/CSV File Containing Multiple Compounds',
|
1879 |
+
file_count="single",
|
1880 |
+
type='filepath'
|
1881 |
+
)
|
1882 |
+
infer_target = gr.File(
|
1883 |
+
label='Upload FASTA/CSV File Containing Multiple Targets',
|
1884 |
+
file_count="single",
|
1885 |
+
type='filepath'
|
1886 |
+
)
|
1887 |
|
1888 |
with gr.Row():
|
1889 |
with gr.Column(min_width=200):
|
|
|
1892 |
"If the proteins in the target library of interest "
|
1893 |
"all belong to the same protein family, manually selecting the family is supported."
|
1894 |
)
|
1895 |
+
|
1896 |
pair_infer_target_family = gr.Dropdown(
|
1897 |
choices=list(TARGET_FAMILY_MAP.keys()),
|
1898 |
value='General',
|
1899 |
+
label='Step 2. Select Target Family (Optional)'
|
1900 |
+
)
|
1901 |
|
1902 |
with gr.Column(min_width=200):
|
1903 |
HelpTip(
|
|
|
1909 |
pair_infer_task = gr.Dropdown(
|
1910 |
list(TASK_MAP.keys()),
|
1911 |
label='Step 3. Select a Prediction Task',
|
1912 |
+
value='Compound-Protein Interaction'
|
1913 |
+
)
|
1914 |
|
1915 |
with gr.Column(min_width=200):
|
1916 |
+
HelpTip(
|
1917 |
+
"Select your preferred model. Please refer to documentation for detailed benchmark results."
|
1918 |
+
)
|
1919 |
pair_infer_preset = gr.Dropdown(
|
1920 |
list(PRESET_MAP.keys()),
|
1921 |
+
label='Step 4. Select a Preset Model'
|
1922 |
+
)
|
1923 |
# infer_preset_recommend_btn = gr.Button(value='OR Let Us Recommend for You',
|
1924 |
# variant='primary')
|
1925 |
pair_infer_opts = gr.CheckboxGroup(visible=False)
|
|
|
2127 |
alignment = aligner.align(processed_fasta, query)
|
2128 |
return alignment.score / max(len(processed_fasta), len(query))
|
2129 |
|
2130 |
+
alignment_df['score'] = alignment_df['X2'].swifter.apply(align_score)
|
2131 |
row = alignment_df.loc[alignment_df['score'].idxmax()]
|
2132 |
family = str(row['Target Family']).title()
|
2133 |
return gr.Dropdown(value=family,
|
|
|
2153 |
show_progress='hidden'
|
2154 |
)
|
2155 |
|
2156 |
+
target_identify_task.select(
|
2157 |
+
fn=lambda task, opts: gr.CheckboxGroup(choices=TARGET_IDENTIFY_CPA_OPTS)
|
2158 |
+
if task == 'Compound-Protein Binding Affinity' else gr.CheckboxGroup(choices=DRUG_SCRENN_CPI_OPTS),
|
2159 |
+
inputs=[target_identify_task, target_identify_opts], outputs=target_identify_opts,
|
2160 |
+
show_progress='hidden'
|
2161 |
+
)
|
2162 |
|
2163 |
def example_fill(input_type):
|
2164 |
return {target_id: 'Q16539',
|
|
|
2459 |
infer_df = pd.read_csv(drug_target_pair_upload)
|
2460 |
validate_columns(infer_df, ['X1', 'X2'])
|
2461 |
|
2462 |
+
infer_df['X1_ERR'] = infer_df['X1'].swifter.apply(
|
2463 |
validate_seq_str, regex=SMILES_PAT)
|
2464 |
if not infer_df['X1_ERR'].isna().all():
|
2465 |
raise ValueError(
|
2466 |
f"Encountered invalid SMILES:\n{infer_df[~infer_df['X1_ERR'].isna()][['X1', 'X1_ERR']]}")
|
2467 |
|
2468 |
+
infer_df['X2_ERR'] = infer_df['X2'].swifter.apply(
|
2469 |
validate_seq_str, regex=FASTA_PAT)
|
2470 |
if not infer_df['X2_ERR'].isna().all():
|
2471 |
raise ValueError(
|
|
|
2797 |
db.update({'status': 'FAILED'}, Job.id == job['id'])
|
2798 |
|
2799 |
scheduler = BackgroundScheduler()
|
2800 |
+
scheduler.add_job(check_expiry, 'interval', hours=1, timezone=pytz.utc)
|
2801 |
scheduler.start()
|
2802 |
|
2803 |
demo.queue(default_concurrency_limit=None, max_size=10).launch(show_api=False)
|