libokj commited on
Commit
9b0c1e4
·
1 Parent(s): d113635

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -2030
app.py CHANGED
@@ -1,2043 +1,50 @@
1
- from datetime import datetime
2
- import hashlib
3
- import itertools
4
- import json
5
- import textwrap
6
- import threading
7
- from math import pi
8
- from uuid import uuid4
9
 
10
- import io
11
- import os
12
- import pathlib
13
- from pathlib import Path
14
- import sys
15
 
16
- import numpy as np
17
- from Bio import SeqIO
18
- from Bio.Align import PairwiseAligner
19
- # from email_validator import validate_email
20
- import gradio as gr
21
- import hydra
22
- import pandas as pd
23
- import requests
24
- from rdkit.Chem.PandasTools import _MolPlusFingerprint
25
- from rdkit.Chem.rdMolDescriptors import CalcNumRotatableBonds, CalcNumHeavyAtoms, CalcNumAtoms, CalcTPSA
26
- from requests.adapters import HTTPAdapter, Retry
27
- from rdkit import Chem
28
- from rdkit.Chem import RDConfig, Descriptors, Draw, Lipinski, Crippen, PandasTools
29
- from rdkit.Chem.Scaffolds import MurckoScaffold
30
- import seaborn as sns
31
 
32
- from bokeh.models import Legend, NumberFormatter, BooleanFormatter, HTMLTemplateFormatter, LegendItem
33
- from bokeh.palettes import Category20c_20
34
- from bokeh.plotting import figure
35
- from bokeh.transform import cumsum
36
- from bokeh.resources import INLINE
37
- import panel as pn
38
 
39
- import swifter
40
- from tqdm.auto import tqdm
41
 
42
- from deepscreen.data.dti import validate_seq_str, rdkit_canonicalize, FASTA_PAT, SMILES_PAT
43
- from deepscreen.predict import predict
 
 
 
 
44
 
45
- sys.path.append(os.path.join(RDConfig.RDContribDir, 'SA_Score'))
46
- import sascorer
47
 
48
- ROOT = Path.cwd()
49
-
50
- # DF_FOR_REPORT = pd.DataFrame()
51
-
52
- pd.set_option('display.float_format', '{:.3f}'.format)
53
- PandasTools.molRepresentation = 'svg'
54
- PandasTools.drawOptions = Draw.rdMolDraw2D.MolDrawOptions()
55
- PandasTools.drawOptions.clearBackground = False
56
- PandasTools.drawOptions.bondLineWidth = 1
57
- PandasTools.drawOptions.explicitMethyl = True
58
- PandasTools.drawOptions.singleColourWedgeBonds = True
59
- PandasTools.drawOptions.useCDKAtomPalette()
60
- PandasTools.molSize = (128, 80)
61
-
62
- SESSION = requests.Session()
63
- ADAPTER = HTTPAdapter(max_retries=Retry(total=5, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504]))
64
- SESSION.mount('http://', ADAPTER)
65
- SESSION.mount('https://', ADAPTER)
66
-
67
- # SCHEDULER = BackgroundScheduler()
68
-
69
- UNIPROT_ENDPOINT = 'https://rest.uniprot.org/uniprotkb/{query}'
70
-
71
- CUSTOM_DATASET_MAX_LEN = 10_000
72
-
73
- CSS = """
74
- .help-tip {
75
- position: absolute;
76
- display: inline-block;
77
- top: 16px;
78
- right: 0px;
79
- text-align: center;
80
- border-radius: 40%;
81
- /* border: 2px solid darkred; background-color: #8B0000;*/
82
- width: 24px;
83
- height: 24px;
84
- font-size: 16px;
85
- line-height: 26px;
86
- cursor: default;
87
- transition: all 0.5s cubic-bezier(0.55, 0, 0.1, 1);
88
- z-index: 100 !important;
89
- }
90
-
91
- .help-tip:hover {
92
- cursor: pointer;
93
- /*background-color: #ccc;*/
94
- }
95
-
96
- .help-tip:before {
97
- content: '?';
98
- font-weight: 700;
99
- color: #8B0000;
100
- z-index: 100 !important;
101
- }
102
-
103
- .help-tip p {
104
- visibility: hidden;
105
- opacity: 0;
106
- text-align: left;
107
- background-color: #EFDDE3;
108
- padding: 20px;
109
- width: 300px;
110
- position: absolute;
111
- border-radius: 4px;
112
- right: -4px;
113
- color: #494F5A;
114
- font-size: 13px;
115
- line-height: normal;
116
- transform: scale(0.7);
117
- transform-origin: 100% 0%;
118
- transition: all 0.5s cubic-bezier(0.55, 0, 0.1, 1);
119
- z-index: 100;
120
- }
121
-
122
- .help-tip:hover p {
123
- cursor: default;
124
- visibility: visible;
125
- opacity: 1;
126
- transform: scale(1.0);
127
- }
128
-
129
- .help-tip p:before {
130
- position: absolute;
131
- content: '';
132
- width: 0;
133
- height: 0;
134
- border: 6px solid transparent;
135
- border-bottom-color: #EFDDE3;
136
- right: 10px;
137
- top: -12px;
138
- }
139
-
140
- .help-tip p:after {
141
- width: 100%;
142
- height: 40px;
143
- content: '';
144
- position: absolute;
145
- top: -5px;
146
- left: 0;
147
- }
148
-
149
- .upload_button {
150
- background-color: #008000;
151
- }
152
-
153
- .absolute {
154
- position: absolute;
155
- }
156
-
157
- .example {
158
- padding: 0;
159
- background: none;
160
- border: none;
161
- text-decoration: underline;
162
- box-shadow: none;
163
- text-align: left !important;
164
- display: inline-block !important;
165
- }
166
-
167
- footer {
168
- visibility: hidden
169
- }
170
 
 
171
  """
 
 
172
 
 
 
 
 
 
 
 
173
 
174
- class HelpTip:
175
- def __new__(cls, text):
176
- return gr.HTML(
177
- # elem_classes="absolute",
178
- value=f'<div class="help-tip"><p>{text}</p>',
179
- )
180
-
181
-
182
- def sa_score(mol):
183
- return sascorer.calculateScore(mol)
184
-
185
-
186
- def mw(mol):
187
- return Chem.Descriptors.MolWt(mol)
188
-
189
-
190
- def mr(mol):
191
- return Crippen.MolMR(mol)
192
-
193
-
194
- def hbd(mol):
195
- return Lipinski.NumHDonors(mol)
196
-
197
-
198
- def hba(mol):
199
- return Lipinski.NumHAcceptors(mol)
200
-
201
-
202
- def logp(mol):
203
- return Crippen.MolLogP(mol)
204
-
205
-
206
- def atom(mol):
207
- return CalcNumAtoms(mol)
208
-
209
-
210
- def heavy_atom(mol):
211
- return CalcNumHeavyAtoms(mol)
212
-
213
-
214
- def rotatable_bond(mol):
215
- return CalcNumRotatableBonds((mol))
216
-
217
-
218
- def tpsa(mol):
219
- return CalcTPSA((mol))
220
-
221
-
222
- def lipinski(mol):
223
- """
224
- Lipinski's rules:
225
- Hydrogen bond donors <= 5
226
- Hydrogen bond acceptors <= 10
227
- Molecular weight <= 500 daltons
228
- logP <= 5
229
- """
230
- if hbd(mol) > 5:
231
- return False
232
- elif hba(mol) > 10:
233
- return False
234
- elif mw(mol) > 500:
235
- return False
236
- elif logp(mol) > 5:
237
- return False
238
- else:
239
- return True
240
-
241
-
242
- def reos(mol):
243
- """
244
- Rapid Elimination Of Swill filter:
245
- Molecular weight between 200 and 500
246
- LogP between -5.0 and +5.0
247
- H-bond donor count between 0 and 5
248
- H-bond acceptor count between 0 and 10
249
- Formal charge between -2 and +2
250
- Rotatable bond count between 0 and 8
251
- Heavy atom count between 15 and 50
252
- """
253
- if not 200 < mw(mol) < 500:
254
- return False
255
- elif not -5.0 < logp(mol) < 5.0:
256
- return False
257
- elif not 0 < hbd(mol) < 5:
258
- return False
259
- elif not 0 < hba(mol) < 10:
260
- return False
261
- elif not 0 < rotatable_bond(mol) < 8:
262
- return False
263
- elif not 15 < heavy_atom(mol) < 50:
264
- return False
265
- else:
266
- return True
267
-
268
-
269
- def ghose(mol):
270
- """
271
- Ghose drug like filter:
272
- Molecular weight between 160 and 480
273
- LogP between -0.4 and +5.6
274
- Atom count between 20 and 70
275
- Molar refractivity between 40 and 130
276
- """
277
- if not 160 < mw(mol) < 480:
278
- return False
279
- elif not -0.4 < logp(mol) < 5.6:
280
- return False
281
- elif not 20 < atom(mol) < 70:
282
- return False
283
- elif not 40 < mr(mol) < 130:
284
- return False
285
- else:
286
- return True
287
-
288
-
289
- def veber(mol):
290
- """
291
- The Veber filter is a rule of thumb filter for orally active drugs described in
292
- Veber et al., J Med Chem. 2002; 45(12): 2615-23.:
293
- Rotatable bonds <= 10
294
- Topological polar surface area <= 140
295
- """
296
- if not rotatable_bond(mol) <= 10:
297
- return False
298
- elif not tpsa(mol) <= 140:
299
- return False
300
- else:
301
- return True
302
-
303
-
304
- def rule_of_three(mol):
305
- """
306
- Rule of Three filter (Congreve et al., Drug Discov. Today. 8 (19): 876–7, (2003).):
307
- Molecular weight <= 300
308
- LogP <= 3
309
- H-bond donor <= 3
310
- H-bond acceptor count <= 3
311
- Rotatable bond count <= 3
312
- """
313
- if not mw(mol) <= 300:
314
- return False
315
- elif not logp(mol) <= 3:
316
- return False
317
- elif not hbd(mol) <= 3:
318
- return False
319
- elif not hba(mol) <= 3:
320
- return False
321
- elif not rotatable_bond(mol) <= 3:
322
- return False
323
- else:
324
- return True
325
-
326
-
327
- # def smarts_filter():
328
- # alerts = Chem.MolFromSmarts("enter one smart here")
329
- # detected_alerts = []
330
- # for smiles in data['X1']:
331
- # mol = Chem.MolFromSmiles(smiles)
332
- # detected_alerts.append(mol.HasSubstructMatch(alerts))
333
-
334
-
335
- SCORE_MAP = {
336
- 'SAscore': sa_score,
337
- 'LogP': logp,
338
- 'Molecular Weight': mw,
339
- 'Number of Heavy Atoms': heavy_atom,
340
- 'Molar Refractivity': mr,
341
- 'H-Bond Donor Count': hbd,
342
- 'H-Bond Acceptor Count': hba,
343
- 'Rotatable Bond Count': rotatable_bond,
344
- 'Topological Polar Surface Area': tpsa,
345
- }
346
-
347
- FILTER_MAP = {
348
- # TODO support number_of_violations
349
- 'REOS': reos,
350
- "Lipinski's Rule of Five": lipinski,
351
- 'Ghose': ghose,
352
- 'Rule of Three': rule_of_three,
353
- 'Veber': veber,
354
- # 'PAINS': pains,
355
- }
356
-
357
- TASK_MAP = {
358
- 'Compound-protein interaction': 'DTI',
359
- 'Compound-protein binding affinity': 'DTA',
360
- }
361
-
362
- TASK_METRIC_MAP = {
363
- 'DTI': 'AUROC',
364
- 'DTA': 'CI',
365
- }
366
-
367
- PRESET_MAP = {
368
- 'DeepDTA': 'deep_dta',
369
- 'DeepConvDTI': 'deep_conv_dti',
370
- 'GraphDTA': 'graph_dta',
371
- 'MGraphDTA': 'm_graph_dta',
372
- 'HyperAttentionDTI': 'hyper_attention_dti',
373
- 'MolTrans': 'mol_trans',
374
- 'TransformerCPI': 'transformer_cpi',
375
- 'TransformerCPI2': 'transformer_cpi_2',
376
- 'DrugBAN': 'drug_ban',
377
- 'DrugVQA-Seq': 'drug_vqa'
378
- }
379
-
380
- TARGET_FAMILY_MAP = {
381
- 'General': 'general',
382
- 'Kinase': 'kinase',
383
- 'Non-Kinase Enzyme': 'non_kinase_enzyme',
384
- 'Membrane Receptor': 'membrane_receptor',
385
- 'Nuclear Receptor': 'nuclear_receptor',
386
- 'Ion Channel': 'ion_channel',
387
- 'Others': 'others',
388
- }
389
-
390
- TARGET_LIBRARY_MAP = {
391
- 'DrugBank (Human)': 'drugbank_targets.csv',
392
- 'ChEMBL33 (Human)': 'ChEMBL33_human_proteins.csv',
393
- }
394
-
395
- DRUG_LIBRARY_MAP = {
396
- 'DrugBank (Human)': 'drugbank_compounds.csv',
397
- 'Drug Repurposing Hub': 'drug_repurposing_hub.csv'
398
- }
399
-
400
- COLUMN_ALIASES = {
401
- 'X1': 'Compound SMILES',
402
- 'X2': 'Target FASTA',
403
- 'ID1': 'Compound ID',
404
- 'ID2': 'Target ID',
405
- 'Y': 'Actual CPI/CPA',
406
- 'Y^': 'Predicted CPI/CPA',
407
- }
408
-
409
-
410
- def validate_columns(df, mandatory_cols):
411
- missing_cols = [col for col in mandatory_cols if col not in df.columns]
412
- if missing_cols:
413
- error_message = (f"The following mandatory columns are missing "
414
- f"in the uploaded dataset: {str(mandatory_cols).strip('[]')}.")
415
- raise ValueError(error_message)
416
- else:
417
- return
418
-
419
-
420
- def process_target_fasta(sequence):
421
- try:
422
- if sequence:
423
- lines = sequence.strip().split("\n")
424
- if lines[0].startswith(">"):
425
- lines = lines[1:]
426
- return ''.join(lines).split(">")[0]
427
- # record = list(SeqIO.parse(io.StringIO(sequence), "fasta"))[0]
428
- # return str(record.seq)
429
- else:
430
- raise ValueError('Empty FASTA sequence.')
431
- except Exception as e:
432
- raise gr.Error(f'Failed to process FASTA due to error: {str(e)}')
433
-
434
-
435
- def send_email(receiver, msg):
436
- pass
437
-
438
-
439
- def submit_predict(predict_filepath, task, preset, target_family, flag, state, progress=gr.Progress(track_tqdm=True)):
440
- if flag:
441
- try:
442
- job_id = flag
443
- global COLUMN_ALIASES
444
- task = TASK_MAP[task]
445
- if not preset:
446
- raise gr.Error('Please select a model.')
447
- preset = PRESET_MAP[preset]
448
- target_family = TARGET_FAMILY_MAP[target_family]
449
- # email_hash = hashlib.sha256(email.encode()).hexdigest()
450
- COLUMN_ALIASES.update({
451
- 'Y': 'Actual Interaction Probability' if task == 'DTI' else 'Actual Binding Affinity',
452
- 'Y^': 'Predicted Interaction Probability' if task == 'DTI' else 'Predicted Binding Affinity'
453
- })
454
-
455
- # target_family_list = [target_family]
456
- # for family in target_family_list:
457
-
458
- # try:
459
- prediction_df = pd.DataFrame()
460
- with hydra.initialize(version_base="1.3", config_path="configs", job_name="webserver_inference"):
461
- cfg = hydra.compose(
462
- config_name="webserver_inference",
463
- overrides=[f"task={task}",
464
- f"preset={preset}",
465
- f"ckpt_path=resources/checkpoints/{preset}-{task}-{target_family}.ckpt",
466
- f"data.data_file='{str(predict_filepath)}'"])
467
-
468
- predictions, _ = predict(cfg)
469
- predictions = [pd.DataFrame(prediction) for prediction in predictions]
470
- prediction_df = pd.concat([prediction_df, pd.concat(predictions, ignore_index=True)])
471
- prediction_df.set_index('N', inplace=True)
472
- orig_df = pd.read_csv(
473
- predict_filepath,
474
- usecols=lambda x: x not in ['X1', 'ID1', 'Compound', 'Scaffold', 'Scaffold SMILES',
475
- 'X2', 'ID2',
476
- 'Y', 'Y^']
477
- )
478
- prediction_df = pd.merge(prediction_df, orig_df, left_index=True, right_index=True, how='left')
479
-
480
- predictions_file = f'temp/{job_id}_predictions.csv'
481
- prediction_df.to_csv(predictions_file)
482
-
483
- return {file_for_report: predictions_file,
484
- run_state: False,
485
- report_upload_flag: False}
486
- except Exception as e:
487
- gr.Warning(f"Prediction job failed due to error: {str(e)}")
488
- return {run_state: False}
489
- else:
490
- return {run_state: state}
491
- #
492
- # except Exception as e:
493
- # raise gr.Error(str(e))
494
-
495
- # email_lock = Path(f"outputs/{email_hash}.lock")
496
- # with open(email_lock, "w") as file:
497
- # record = {
498
- # "email": email,
499
- # "job_id": job_id
500
- # }
501
- # json.dump(record, file)
502
- # def run_predict():
503
- # TODO per-user submit usage
504
- # # email_lock = Path(f"outputs/{email_hash}.lock")
505
- # # with open(email_lock, "w") as file:
506
- # # record = {
507
- # # "email": email,
508
- # # "job_id": job_id
509
- # # }
510
- # # json.dump(record, file)
511
- #
512
- # job_lock = DATA_PATH / f"outputs/{job_id}.lock"
513
- # with open(job_lock, "w") as file:
514
- # pass
515
- #
516
- # try:
517
- # prediction_df = pd.DataFrame()
518
- # for family in target_family_list:
519
- # with hydra.initialize(version_base="1.3", config_path="configs", job_name="webserver_inference"):
520
- # cfg = hydra.compose(
521
- # config_name="webserver_inference",
522
- # overrides=[f"task={task}",
523
- # f"preset={preset}",
524
- # f"ckpt_path=resources/checkpoints/{preset}-{task}-{family}.ckpt",
525
- # f"data.data_file='{str(predict_dataset)}'"])
526
- #
527
- # predictions, _ = predict(cfg)
528
- # predictions = [pd.DataFrame(prediction) for prediction in predictions]
529
- # prediction_df = pd.concat([prediction_df, pd.concat(predictions, ignore_index=True)])
530
- # prediction_df.to_csv(f'outputs/{job_id}.csv')
531
- # # email_lock.unlink()
532
- # job_lock.unlink()
533
- #
534
- # msg = (f'Your DeepSEQcreen prediction job (id: {job_id}) completed successfully. You may retrieve the '
535
- # f'results and generate an analytical report at {URL} using the job id within 48 hours.')
536
- # gr.Info(msg)
537
- # except Exception as e:
538
- # msg = (f'Your DeepSEQcreen prediction job (id: {job_id}) failed due to an error: "{str(e)}." You may '
539
- # f'reach out to the author about the error through email ([email protected]).')
540
- # raise gr.Error(str(e))
541
- # finally:
542
- # send_email(email, msg)
543
- #
544
- # # Run "predict" asynchronously
545
- # threading.Thread(target=run_predict).start()
546
- #
547
- # msg = (f'Your DeepSEQcreen prediction job (id: {job_id}) started running. You may retrieve the results '
548
- # f'and generate an analytical report at {URL} using the job id once the job is done. Only one job '
549
- # f'per user is allowed at the same time.')
550
- # send_email(email, msg)
551
-
552
- # # Return the job id first
553
- # return [
554
- # gr.Blocks(visible=False),
555
- # gr.Markdown(f"Your prediction job is running... "
556
- # f"You may stay on this page or come back later to retrieve the results "
557
- # f"Once you receive our email notification."),
558
- # ]
559
-
560
-
561
- def update_df(file, progress=gr.Progress(track_tqdm=True)):
562
- # global DF_FOR_REPORT
563
- if file and Path(file).is_file():
564
- df = pd.read_csv(file)
565
- if 'N' in df.columns:
566
- df.set_index('N', inplace=True)
567
- if not any(col in ['X1', 'X2'] for col in df.columns):
568
- gr.Warning("At least one of columns `X1` and `X2` must be in the uploaded dataset.")
569
- return {analyze_btn: gr.Button(interactive=False)}
570
- # if df['X1'].nunique() > 1:
571
- if 'X1' in df.columns:
572
- df['Scaffold SMILES'] = df['X1'].swifter.progress_bar(
573
- desc=f"Calculating scaffold...").apply(MurckoScaffold.MurckoScaffoldSmilesFromSmiles)
574
- df['Scaffold'] = df['Scaffold SMILES'].swifter.progress_bar(
575
- desc='Generating scaffold graphs...').apply(
576
- lambda smiles: _MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
577
- # Add a new column with RDKit molecule objects
578
- if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
579
- df['Compound'] = df['X1'].swifter.progress_bar(
580
- desc='Generating molecular graphs...').apply(
581
- lambda smiles: _MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
582
-
583
- # DF_FOR_REPORT = df.copy()
584
-
585
- # pie_chart = None
586
- # value = None
587
- # if 'Y^' in DF_FOR_REPORT.columns:
588
- # value = 'Y^'
589
- # elif 'Y' in DF_FOR_REPORT.columns:
590
- # value = 'Y'
591
-
592
- # if value:
593
- # if DF_FOR_REPORT['X1'].nunique() > 1 >= DF_FOR_REPORT['X2'].nunique():
594
- # pie_chart = create_pie_chart(DF_FOR_REPORT, category='Scaffold SMILES', value=value, top_k=100)
595
- # elif DF_FOR_REPORT['X2'].nunique() > 1 >= DF_FOR_REPORT['X1'].nunique():
596
- # pie_chart = create_pie_chart(DF_FOR_REPORT, category='Target family', value=value, top_k=100)
597
-
598
- return {html_report: create_html_report(df),
599
- raw_df: df,
600
- report_df: df.copy(),
601
- analyze_btn: gr.Button(interactive=True)} # pie_chart
602
- else:
603
- return {analyze_btn: gr.Button(interactive=False)}
604
-
605
-
606
- def create_html_report(df, file=None, task=None, progress=gr.Progress(track_tqdm=True)):
607
- df_html = df.copy(deep=True)
608
- # email_hash = hashlib.sha256(email.encode()).hexdigest()
609
-
610
- cols_left = list(pd.Index(
611
- ['ID1', 'Compound', 'Scaffold', 'Scaffold SMILES', 'ID2', 'Y', 'Y^']).intersection(df_html.columns))
612
- cols_right = list(pd.Index(['X1', 'X2']).intersection(df_html.columns))
613
- df_html = df_html[cols_left + (df_html.columns.drop(cols_left + cols_right).tolist()) + cols_right]
614
-
615
- if isinstance(task, str):
616
- task = TASK_MAP[task]
617
- COLUMN_ALIASES.update({
618
- 'Y': 'Actual Interaction Probability' if task == 'DTI' else 'Actual Binding Affinity',
619
- 'Y^': 'Predicted Interaction Probability' if task == 'DTI' else 'Predicted Binding Affinity'
620
- })
621
-
622
- ascending = True if COLUMN_ALIASES['Y^'] == 'Predicted Binding Affinity' else False
623
- df_html = df_html.sort_values(
624
- [col for col in ['Y', 'Y^'] if col in df_html.columns], ascending=ascending
625
- )
626
-
627
- if not file:
628
- df_html = df_html.iloc[:31]
629
-
630
- # Remove repeated info for one-against-N tasks to save visual and physical space
631
- job = 'Chemical Property'
632
- unique_entity = 'Unique Entity'
633
- unique_df = None
634
- category = None
635
- columns_unique = None
636
- if 'X1' in df_html.columns and 'X2' in df_html.columns:
637
- n_compound = df_html['X1'].nunique()
638
- n_protein = df_html['X2'].nunique()
639
-
640
- if n_compound == 1 and n_protein >= 2:
641
- unique_entity = 'Compound of Interest'
642
- if any(col in df_html.columns for col in ['Y^', 'Y']):
643
- job = 'Target Protein Identification'
644
- category = 'Target Family'
645
- columns_unique = df_html.columns.isin(['X1', 'ID1', 'Scaffold', 'Compound', 'Scaffold SMILES']
646
- + list(FILTER_MAP.keys()) + list(SCORE_MAP.keys()))
647
-
648
- elif n_compound >= 2 and n_protein == 1:
649
- unique_entity = 'Target of Interest'
650
- if any(col in df_html.columns for col in ['Y^', 'Y']):
651
- job = 'Drug Hit Screening'
652
- category = 'Scaffold SMILES'
653
- columns_unique = df_html.columns.isin(['X2', 'ID2'])
654
-
655
- elif 'Y^' in df_html.columns:
656
- job = 'Interaction Pair Inference'
657
- if 'Compound' in df_html.columns:
658
- df_html['Compound'] = df_html['Compound'].swifter.progress_bar(
659
- desc='Generating compound graph...').apply(
660
- lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
661
- if 'Scaffold' in df_html.columns:
662
- df_html['Scaffold'] = df_html['Scaffold'].swifter.progress_bar(
663
- desc='Generating scaffold graph...').apply(
664
- lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
665
-
666
- df_html.rename(columns=COLUMN_ALIASES, inplace=True)
667
- df_html.index.name = 'Index'
668
- if 'Target FASTA' in df_html.columns:
669
- df_html['Target FASTA'] = df_html['Target FASTA'].swifter.progress_bar(
670
- desc='Processing FASTA...').apply(
671
- lambda x: wrap_text(x) if not pd.isna(x) else x)
672
-
673
- num_cols = df_html.select_dtypes('number').columns
674
- num_col_colors = sns.color_palette('husl', len(num_cols))
675
- bool_cols = df_html.select_dtypes(bool).columns
676
- bool_col_colors = {True: 'lightgreen', False: 'lightpink'}
677
-
678
- if columns_unique is not None:
679
- unique_df = df_html.loc[:, columns_unique].iloc[[0]].copy()
680
- df_html = df_html.loc[:, ~columns_unique]
681
-
682
- if not file:
683
- if 'Compound ID' in df_html.columns:
684
- df_html.drop(['Compound SMILES'], axis=1, inplace=True)
685
- if 'Target ID' in df_html.columns:
686
- df_html.drop(['Target FASTA'], axis=1, inplace=True)
687
- if 'Target FASTA' in df_html.columns:
688
- df_html['Target FASTA'] = df_html['Target FASTA'].swifter.progress_bar(
689
- desc='Processing FASTA...').apply(
690
- lambda x: wrap_text(x) if not pd.isna(x) else x)
691
- if 'Scaffold SMILES' in df_html.columns:
692
- df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
693
- styled_df = df_html.style.format(precision=3)
694
-
695
- for i, col in enumerate(num_cols):
696
- if col in df_html.columns:
697
- if col not in ['Predicted Binding Affinity', 'Actual Binding Affinity']:
698
- styled_df = styled_df.background_gradient(
699
- subset=[col], cmap=sns.light_palette(num_col_colors[i], as_cmap=True))
700
- else:
701
- styled_df = styled_df.background_gradient(
702
- subset=[col], cmap=sns.light_palette(num_col_colors[i], as_cmap=True).reversed())
703
-
704
- if any(df_html.columns.isin(bool_cols)):
705
- styled_df.applymap(lambda val: f'background-color: {bool_col_colors[val]}', subset=bool_cols)
706
-
707
- table_html = styled_df.to_html()
708
- unique_html = ''
709
- if unique_df is not None:
710
- if 'Target FASTA' in unique_df.columns:
711
- unique_df['Target FASTA'] = unique_df['Target FASTA'].str.replace('\n', '<br>')
712
- if any(unique_df.columns.isin(bool_cols)):
713
- unique_df = unique_df.style.applymap(
714
- lambda val: f"background-color: {bool_col_colors[val]}", subset=bool_cols)
715
- unique_html = (f'<div style="font-family: Courier !important;">'
716
- f'{unique_df.to_html(escape=False, index=False)}</div>')
717
-
718
- return (f'<div style="font-size: 16px; font-weight: bold;">{job} Report Preview (Top 30 Records)</div>'
719
- f'<div style="overflow-x:auto; font-family: Courier !important;">{unique_html}</div>'
720
- f'<div style="overflow:auto; height: 300px; font-family: Courier !important;">{table_html}</div>')
721
-
722
- else:
723
- bool_formatters = {col: BooleanFormatter() for col in bool_cols}
724
- float_formatters = {col: NumberFormatter(format='0.000') for col in df_html.select_dtypes('floating').columns}
725
- other_formatters = {
726
- 'Predicted Interaction Probability': {'type': 'progress', 'max': 1.0, 'legend': True},
727
- 'Actual Interaction Probability': {'type': 'progress', 'max': 1.0, 'legend': True},
728
- 'Compound': HTMLTemplateFormatter(template='<div class="image-zoom-viewer"><%= value %></div>'),
729
- 'Scaffold': HTMLTemplateFormatter(template='<div class="image-zoom-viewer"><%= value %></div>'),
730
- 'Target FASTA': {'type': 'textarea', 'width': 60},
731
- 'Target ID': HTMLTemplateFormatter(
732
- template='<a href="<% '
733
- 'if (/^[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}$/.test(value)) '
734
- '{ %>https://www.uniprot.org/uniprotkb/<%= value %><% } '
735
- 'else { %>https://www.uniprot.org/uniprotkb?query=<%= value %><% } '
736
- '%>" target="_blank"><%= value %></a>'),
737
- 'Compound ID': HTMLTemplateFormatter(
738
- template='<a href="https://pubchem.ncbi.nlm.nih.gov/compound/<%= value %>" '
739
- 'target="_blank"><%= value %></a>')
740
- }
741
- formatters = {**bool_formatters, **float_formatters, **other_formatters}
742
-
743
- # html = df.to_html(file)
744
- # return html
745
-
746
- report_table = pn.widgets.Tabulator(
747
- df_html, formatters=formatters,
748
- frozen_columns=['Index', 'Target ID', 'Compound ID', 'Compound', 'Scaffold'],
749
- disabled=True, sizing_mode='stretch_both', pagination='local', page_size=30)
750
-
751
- for i, col in enumerate(num_cols):
752
- if col not in ['Predicted Binding Affinity', 'Actual Binding Affinity']:
753
- if col not in ['Predicted Interaction Probability', 'Actual Interaction Probability']:
754
- report_table.style.background_gradient(
755
- subset=df_html.columns == col, cmap=sns.light_palette(num_col_colors[i], as_cmap=True))
756
- else:
757
- continue
758
- else:
759
- report_table.style.background_gradient(
760
- subset=df_html.columns == col, cmap=sns.light_palette(num_col_colors[i], as_cmap=True).reversed())
761
-
762
- pie_charts = {}
763
- for y in df_html.columns.intersection(['Predicted Interaction Probability', 'Actual Interaction Probability',
764
- 'Predicted Binding Affinity', 'Actual Binding Affinity']):
765
- pie_charts[y] = []
766
- for k in [10, 30, 100]:
767
- if k < len(df_html):
768
- pie_charts[y].append(create_pie_chart(df_html, category=category, value=y, top_k=k))
769
- pie_charts[y].append(create_pie_chart(df_html, category=category, value=y, top_k=len(df_html)))
770
-
771
- # Remove keys with empty values
772
- pie_charts = {k: v for k, v in pie_charts.items() if any(v)}
773
-
774
- pn_css = """
775
- .tabulator {
776
- font-family: Courier New !important;
777
- font-weight: normal !important;
778
- font-size: 12px !important;
779
- }
780
-
781
- .tabulator-cell {
782
- overflow: visible !important;
783
- }
784
-
785
- .tabulator-cell:hover {
786
- z-index: 1000 !important;
787
- }
788
-
789
- .tabulator-cell.tabulator-frozen:hover {
790
- z-index: 1000 !important;
791
- }
792
-
793
- .image-zoom-viewer {
794
- display: inline-block;
795
- overflow: visible;
796
- z-index: 1000;
797
- }
798
-
799
- .image-zoom-viewer::after {
800
- content: "";
801
- top: 0;
802
- left: 0;
803
- width: 100%;
804
- height: 100%;
805
- pointer-events: none;
806
- }
807
-
808
- .image-zoom-viewer:hover::after {
809
- pointer-events: all;
810
- }
811
-
812
- /* When hovering over the container, scale its child (the SVG) */
813
- .tabulator-cell:hover .image-zoom-viewer svg {
814
- padding: 3px;
815
- position: absolute;
816
- background-color: rgba(250, 250, 250, 0.854);
817
- box-shadow: 0 0 10px rgba(0, 0, 0, 0.618);
818
- border-radius: 3px;
819
- transform: scale(3); /* Scale up the SVG */
820
- transition: transform 0.3s ease;
821
- pointer-events: none; /* Prevents the SVG from blocking mouse interactions */
822
- z-index: 1000;
823
- }
824
-
825
- .image-zoom-viewer svg {
826
- display: block; /* SVG is a block-level element for proper scaling */
827
- z-index: 1000;
828
- }
829
-
830
- .image-zoom-viewer:hover {
831
- z-index: 1000;
832
- }
833
-
834
- """
835
-
836
- pn.extension(raw_css=[pn_css])
837
-
838
- template = pn.template.VanillaTemplate(
839
- title=f'DeepSEQreen {job} Report',
840
- sidebar=[],
841
- favicon='deepseqreen.svg',
842
- logo='deepseqreen.svg',
843
- header_background='#F3F5F7',
844
- header_color='#4372c4',
845
- busy_indicator=None,
846
- )
847
-
848
- stats_pane = pn.Row()
849
- if unique_df is not None:
850
- unique_table = pn.widgets.Tabulator(unique_df, formatters=formatters, sizing_mode='stretch_width',
851
- show_index=False, disabled=True,
852
- frozen_columns=['Compound ID', 'Compound', 'Scaffold'])
853
- # if pie_charts:
854
- # unique_table.width = 640
855
- stats_pane.append(pn.Column(f'### {unique_entity}', unique_table))
856
- if pie_charts:
857
- for score_name, figure_list in pie_charts.items():
858
- stats_pane.append(
859
- pn.Column(f'### {category} by Top {score_name}',
860
- pn.Tabs(*figure_list, tabs_location='above'))
861
- # pn.Card(pn.Row(v), title=f'{category} by Top {k}')
862
- )
863
-
864
- if stats_pane:
865
- template.main.append(pn.Card(stats_pane,
866
- sizing_mode='stretch_width', title='Summary Statistics', margin=10))
867
-
868
- template.main.append(
869
- pn.Card(report_table, title=f'{job} Results', # width=1200,
870
- margin=10)
871
- )
872
-
873
- template.save(file, resources=INLINE)
874
- return file
875
-
876
-
877
- def create_pie_chart(df, category, value, top_k):
878
- if category not in df or value not in df:
879
- return
880
- top_k_df = df.nlargest(top_k, value)
881
- category_counts = top_k_df[category].value_counts()
882
- data = pd.DataFrame({category: category_counts.index, 'value': category_counts.values})
883
-
884
- data['proportion'] = data['value'] / data['value'].sum()
885
- # Merge rows with proportion less than 0.2% into one row
886
- mask = data['proportion'] < 0.002
887
- if any(mask):
888
- merged_row = data[mask].sum()
889
- merged_row[category] = '...'
890
- data = pd.concat([data[~mask], pd.DataFrame(merged_row).T])
891
- data['angle'] = data['proportion'] * 2 * pi
892
-
893
- color_dict = {cat: color for cat, color in
894
- zip(df[category].unique(),
895
- (Category20c_20 * (len(df[category].unique()) // 20 + 1))[:len(df[category].unique())])}
896
- color_dict['...'] = '#636363'
897
- data['color'] = data[category].map(color_dict)
898
-
899
- tooltips = [
900
- (f"{category}", f"@{{{category}}}"),
901
- ("Count", "@value"),
902
- ("Percentage", "@proportion{0.0%}")
903
- ]
904
-
905
- if category == 'Scaffold SMILES':
906
- data = data.merge(top_k_df[['Scaffold SMILES', 'Scaffold']].drop_duplicates(), how='left',
907
- left_on='Scaffold SMILES', right_on='Scaffold SMILES')
908
- tooltips.append(("Scaffold", "<div>@{Scaffold}{safe}</div>"))
909
- p = figure(height=384, width=960, name=f"Top {top_k}" if top_k < len(df) else 'All', sizing_mode='stretch_height',
910
- toolbar_location=None, tools="hover", tooltips=tooltips, x_range=(-0.4, 0.4))
911
-
912
- def truncate_label(label, max_length=60):
913
- return label if len(label) <= max_length else label[:max_length] + "..."
914
-
915
- data['legend_field'] = data[category].apply(truncate_label)
916
-
917
- p.add_layout(Legend(padding=0, margin=0), 'right')
918
- p.wedge(x=0, y=1, radius=0.3,
919
- start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
920
- line_color="white", fill_color='color', legend_field='legend_field', source=data)
921
-
922
- # Limit the number of legend items to 20 and add "..." if there are more than 20 items
923
- if len(p.legend.items) > 20:
924
- new_legend_items = p.legend.items[:20]
925
- new_legend_items.append(LegendItem(label="..."))
926
- p.legend.items = new_legend_items
927
-
928
- p.legend.label_text_font_size = "10pt"
929
- p.legend.label_text_font="courier"
930
- p.axis.axis_label = None
931
- p.axis.visible = False
932
- p.grid.grid_line_color = None
933
- p.outline_line_width = 0
934
- p.min_border = 0
935
- p.margin = 0
936
-
937
- return p
938
-
939
-
940
- def submit_report(df, score_list, filter_list, task, progress=gr.Progress(track_tqdm=True)):
941
- df_report = df.copy()
942
- try:
943
- for filter_name in filter_list:
944
- df_report[filter_name] = df_report['Compound'].swifter.progress_bar(
945
- desc=f"Calculating {filter_name}").apply(
946
- lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x)
947
-
948
- for score_name in score_list:
949
- df_report[score_name] = df_report['Compound'].swifter.progress_bar(
950
- desc=f"Calculating {score_name}").apply(
951
- lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x)
952
-
953
- # pie_chart = None
954
- # value = None
955
- # if 'Y^' in df.columns:
956
- # value = 'Y^'
957
- # elif 'Y' in df.columns:
958
- # value = 'Y'
959
- #
960
- # if value:
961
- # if df['X1'].nunique() > 1 >= df['X2'].nunique():
962
- # pie_chart = create_pie_chart(df, category='Scaffold SMILES', value=value, top_k=100)
963
- # elif df['X2'].nunique() > 1 >= df['X1'].nunique():
964
- # pie_chart = create_pie_chart(df, category='Target family', value=value, top_k=100)
965
-
966
- return (create_html_report(df_report, file=None, task=task), df_report,
967
- gr.File(visible=False), gr.File(visible=False))
968
-
969
- except Exception as e:
970
- gr.Warning(f'Failed to report results due to error: {str(e)}')
971
- return None, None, None, None
972
-
973
-
974
- # def check_job_status(job_id):
975
- # job_lock = DATA_PATH / f"{job_id}.lock"
976
- # job_file = DATA_PATH / f"{job_id}.csv"
977
- # if job_lock.is_file():
978
- # return {gr.Markdown(f"Your job ({job_id}) is still running... "
979
- # f"You may stay on this page or come back later to retrieve the results "
980
- # f"Once you receive our email notification."),
981
- # None,
982
- # None
983
- # }
984
- # elif job_file.is_file():
985
- # return {gr.Markdown(f"Your job ({job_id}) is done! Redirecting you to generate reports..."),
986
- # gr.Tabs(selected=3),
987
- # gr.File(str(job_lock))}
988
-
989
-
990
- def wrap_text(text, line_length=60):
991
- if isinstance(text, str):
992
- wrapper = textwrap.TextWrapper(width=line_length)
993
- if text.startswith('>'):
994
- sections = text.split('>')
995
- wrapped_sections = []
996
- for section in sections:
997
- if not section:
998
- continue
999
- lines = section.split('\n')
1000
- seq_header = lines[0]
1001
- wrapped_seq = wrapper.fill(''.join(lines[1:]))
1002
- wrapped_sections.append(f">{seq_header}\n{wrapped_seq}")
1003
- return '\n'.join(wrapped_sections)
1004
- else:
1005
- return wrapper.fill(text)
1006
- else:
1007
- return text
1008
-
1009
-
1010
- def unwrap_text(text):
1011
- return text.strip.replece('\n', '')
1012
-
1013
-
1014
- def drug_library_from_sdf(sdf_path):
1015
- return PandasTools.LoadSDF(
1016
- sdf_path,
1017
- smilesName='X1', molColName='Compound', includeFingerprints=True
1018
- )
1019
-
1020
-
1021
- def process_target_library_upload(library_upload):
1022
- if library_upload.endswith('.csv'):
1023
- df = pd.read_csv(library_upload)
1024
- elif library_upload.endswith('.fasta'):
1025
- df = target_library_from_fasta(library_upload)
1026
- else:
1027
- raise gr.Error('Currently only CSV and FASTA files are supported as target libraries.')
1028
- validate_columns(df, ['X2'])
1029
- return df
1030
-
1031
-
1032
- def process_drug_library_upload(library_upload):
1033
- if library_upload.endswith('.csv'):
1034
- df = pd.read_csv(library_upload)
1035
- elif library_upload.endswith('.sdf'):
1036
- df = drug_library_from_sdf(library_upload)
1037
- else:
1038
- raise gr.Error('Currently only CSV and SDF files are supported as drug libraries.')
1039
- validate_columns(df, ['X1'])
1040
- return df
1041
-
1042
-
1043
- def target_library_from_fasta(fasta_path):
1044
- records = list(SeqIO.parse(fasta_path, "fasta"))
1045
- id2 = [record.id for record in records]
1046
- seq = [str(record.seq) for record in records]
1047
- df = pd.DataFrame({'ID2': id2, 'X2': seq})
1048
- return df
1049
-
1050
-
1051
- theme = gr.themes.Base(spacing_size="sm", text_size='md').set(
1052
- background_fill_primary='#dfe6f0',
1053
- background_fill_secondary='#dfe6f0',
1054
- checkbox_label_background_fill='#dfe6f0',
1055
- checkbox_label_background_fill_hover='#dfe6f0',
1056
- checkbox_background_color='white',
1057
- checkbox_border_color='#4372c4',
1058
- border_color_primary='#4372c4',
1059
- border_color_accent='#4372c4',
1060
- button_primary_background_fill='#4372c4',
1061
- button_primary_text_color='white',
1062
- button_secondary_border_color='#4372c4',
1063
- body_text_color='#4372c4',
1064
- block_title_text_color='#4372c4',
1065
- block_label_text_color='#4372c4',
1066
- block_info_text_color='#505358',
1067
- block_border_color=None,
1068
- input_border_color='#4372c4',
1069
- panel_border_color='#4372c4',
1070
- input_background_fill='white',
1071
- code_background_fill='white',
1072
- )
1073
-
1074
- with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS) as demo:
1075
- run_state = gr.State(value=False)
1076
- screen_flag = gr.State(value=False)
1077
- identify_flag = gr.State(value=False)
1078
- infer_flag = gr.State(value=False)
1079
- report_upload_flag = gr.State(value=False)
1080
-
1081
- with gr.Tabs() as tabs:
1082
- with gr.TabItem(label='Drug Hit Screening', id=0):
1083
- gr.Markdown('''
1084
- # <center>Drug Hit Screening</center>
1085
- <center>
1086
- To predict interactions or binding affinities of a single target against a compound library.
1087
- </center>
1088
- ''')
1089
- with gr.Blocks() as screen_block:
1090
- with gr.Column() as screen_page:
1091
- with gr.Row():
1092
- with gr.Column():
1093
- HelpTip(
1094
- "Enter (paste) a amino acid sequence below manually or upload a FASTA file. "
1095
- "If multiple entities are in the FASTA, only the first will be used. "
1096
- "Alternatively, enter a Uniprot ID or gene symbol with organism and click Query for "
1097
- "the sequence."
1098
- )
1099
- target_input_type = gr.Dropdown(
1100
- label='Step 1. Select Target Input Type and Input',
1101
- choices=['Sequence', 'UniProt ID', 'Gene symbol'],
1102
- info='Enter (paste) a FASTA string below manually or upload a FASTA file.',
1103
- value='Sequence',
1104
- scale=4, interactive=True
1105
- )
1106
-
1107
- with gr.Row():
1108
- target_id = gr.Textbox(show_label=False, visible=False,
1109
- interactive=True, scale=4,
1110
- info='Enter a UniProt ID and query.')
1111
- target_gene = gr.Textbox(
1112
- show_label=False, visible=False,
1113
- interactive=True, scale=4,
1114
- info='Enter a gene symbol and query.')
1115
- target_organism = gr.Textbox(
1116
- info='Organism scientific name (default: Homo sapiens).',
1117
- placeholder='Homo sapiens', show_label=False,
1118
- visible=False, interactive=True, scale=4, )
1119
- target_upload_btn = gr.UploadButton(label='Upload a FASTA File', type='binary',
1120
- visible=True, variant='primary',
1121
- size='lg')
1122
- target_paste_markdown = gr.Button(value='OR Paste Your Sequence Below', visible=True)
1123
- target_query_btn = gr.Button(value='Query the Sequence', variant='primary',
1124
- visible=False, scale=4)
1125
- # with gr.Row():
1126
- # example_uniprot = gr.Button(value='Example: Q16539', elem_classes='example', visible=False)
1127
- # example_gene = gr.Button(value='Example: MAPK14', elem_classes='example', visible=False)
1128
- example_fasta = gr.Button(value='Example: MAPK14 (Q16539)', elem_classes='example')
1129
- target_fasta = gr.Code(label='Input or Display FASTA', interactive=True, lines=5)
1130
- # with gr.Row():
1131
- # with gr.Column():
1132
- # with gr.Column():
1133
- # gr.File(label='Example FASTA file',
1134
- # value='data/examples/MAPK14.fasta', interactive=False)
1135
-
1136
- with gr.Row():
1137
- with gr.Column():
1138
- HelpTip(
1139
- "Click Auto-detect to identify the protein family using sequence alignment. "
1140
- "This optional step allows applying a family-specific model instead of a all-family "
1141
- "model (general). "
1142
- "Manually select general if the alignment results are unsatisfactory."
1143
- )
1144
- drug_screen_target_family = gr.Dropdown(
1145
- choices=list(TARGET_FAMILY_MAP.keys()),
1146
- value='General',
1147
- label='Step 2. Select Target Family (Optional)', interactive=True)
1148
- # with gr.Column(scale=1, min_width=24):
1149
-
1150
- with gr.Row():
1151
- with gr.Column():
1152
- target_family_detect_btn = gr.Button(value='OR Let Us Auto-Detect for You',
1153
- variant='primary')
1154
-
1155
- with gr.Row():
1156
- with gr.Column():
1157
- HelpTip(
1158
- "Select a preset compound library (e.g., DrugBank). "
1159
- "Alternatively, upload a CSV file with a column named X1 containing compound SMILES, "
1160
- "or use an SDF file (Max. 10,000 compounds per task). Example CSV and SDF files are "
1161
- "provided below and can be downloaded by clicking the lower right corner."
1162
- )
1163
- drug_library = gr.Dropdown(label='Step 3. Select a Preset Compound Library',
1164
- choices=list(DRUG_LIBRARY_MAP.keys()))
1165
- with gr.Row():
1166
- gr.File(label='Example SDF compound library',
1167
- value='data/examples/compound_library.sdf', interactive=False)
1168
- gr.File(label='Example CSV compound library',
1169
- value='data/examples/compound_library.csv', interactive=False)
1170
- drug_library_upload_btn = gr.UploadButton(
1171
- label='OR Upload Your Own Library', variant='primary')
1172
- drug_library_upload = gr.File(label='Custom compound library file', visible=False)
1173
- with gr.Row():
1174
- with gr.Column():
1175
- HelpTip(
1176
- "Interaction prediction provides you binding probability score between the target of "
1177
- "interest and each compound in the library, "
1178
- "while affinity prediction directly estimates their binding strength measured using "
1179
- "IC50."
1180
- )
1181
- drug_screen_task = gr.Dropdown(
1182
- list(TASK_MAP.keys()),
1183
- label='Step 4. Select the Prediction Task You Want to Conduct',
1184
- value='Compound-protein interaction')
1185
-
1186
- with gr.Row():
1187
- with gr.Column():
1188
- HelpTip(
1189
- "Select your preferred model, or click Recommend for the best-performing model based "
1190
- "on the selected task, family, and whether the target was trained. "
1191
- "Please refer to documentation for detailed benchamrk results."
1192
- )
1193
- drug_screen_preset = gr.Dropdown(list(PRESET_MAP.keys()),
1194
- label='Step 5. Select a Preset Model')
1195
- screen_preset_recommend_btn = gr.Button(
1196
- value='OR Let Us Recommend for You', variant='primary')
1197
- with gr.Row():
1198
- with gr.Column():
1199
- drug_screen_email = gr.Textbox(
1200
- label='Step 6. Input Your Email Address (Optional)',
1201
- info="Your email address will be used to notify you about the completion of your job."
1202
- )
1203
-
1204
- with gr.Row(visible=True):
1205
- with gr.Column():
1206
- # drug_screen_clr_btn = gr.ClearButton(size='lg')
1207
- drug_screen_btn = gr.Button(value='SUBMIT THE SCREENING JOB', variant='primary', size='lg')
1208
- # TODO Modify the pd df directly with df['X2'] = target
1209
-
1210
- screen_data_for_predict = gr.File(visible=False, file_count="single", type='filepath')
1211
- screen_waiting = gr.Markdown("""
1212
- <center>Your job is running... It might take a few minutes.
1213
- When it's done, you will be redirected to the report page.
1214
- Meanwhile, please leave the page on.</center>
1215
- """, visible=False)
1216
-
1217
- with gr.TabItem(label='Target protein identification', id=1):
1218
- gr.Markdown('''
1219
- # <center>Target Protein Identification</center>
1220
-
1221
- <center>
1222
- To predict interactions or binding affinities of a single compound against a protein library.
1223
- </center>
1224
- ''')
1225
- with gr.Blocks() as identify_block:
1226
- with gr.Column() as identify_page:
1227
- with gr.Row():
1228
- with gr.Column():
1229
- HelpTip(
1230
- "Enter (paste) a compound SMILES below manually or upload a SDF file. "
1231
- "If multiple entities are in the SDF, only the first will be used. "
1232
- "SMILES can be obtained by searching for the compound of interest in databases such "
1233
- "as NCBI, PubChem and and ChEMBL."
1234
- )
1235
- compound_type = gr.Dropdown(
1236
- label='Step 1. Select Compound Input Type and Input',
1237
- choices=['SMILES', 'SDF'],
1238
- info='Enter (paste) an SMILES string or upload an SDF file to convert to SMILES.',
1239
- value='SMILES',
1240
- interactive=True)
1241
- compound_upload_btn = gr.UploadButton(label='OR Upload a SDF File', variant='primary',
1242
- type='binary', visible=False)
1243
-
1244
- compound_smiles = gr.Code(label='Input or Display Compound SMILES', interactive=True, lines=5)
1245
- example_drug = gr.Button(value='Example: Aspirin', elem_classes='example')
1246
-
1247
- with gr.Row():
1248
- with gr.Column():
1249
- HelpTip(
1250
- "By default, models trained on all protein families (general) will be applied. "
1251
- # "If the proteins in the target library of interest all belong to the same protein "
1252
- # "family, manually selecting the family is supported."
1253
- )
1254
- target_identify_target_family = gr.Dropdown(choices=['General'],
1255
- value='General',
1256
- label='Step 2. Select Target Family ('
1257
- 'Optional)')
1258
-
1259
- with gr.Row():
1260
- with gr.Column():
1261
- HelpTip(
1262
- "Select a preset target library (e.g., ChEMBL33_human_proteins). "
1263
- "Alternatively, upload a CSV file with a column named X2 containing target protein "
1264
- "sequences, or use an FASTA file (Max. 10,000 targets per task). "
1265
- "Example CSV and SDF files are provided below "
1266
- "and can be downloaded by clicking the lower right corner."
1267
- )
1268
- target_library = gr.Dropdown(label='Step 3. Select a Preset Target Library',
1269
- choices=list(TARGET_LIBRARY_MAP.keys()))
1270
- with gr.Row():
1271
- gr.File(label='Example FASTA target library',
1272
- value='data/examples/target_library.fasta', interactive=False)
1273
- gr.File(label='Example CSV target library',
1274
- value='data/examples/target_library.csv', interactive=False)
1275
- target_library_upload_btn = gr.UploadButton(
1276
- label='OR Upload Your Own Library', variant='primary')
1277
- target_library_upload = gr.File(label='Custom target library file', visible=False)
1278
-
1279
- with gr.Row():
1280
- with gr.Column():
1281
- HelpTip(
1282
- "Interaction prediction provides you binding probability score between the target of "
1283
- "interest and each compound in the library, "
1284
- "while affinity prediction directly estimates their binding strength measured using "
1285
- "IC50."
1286
- )
1287
- target_identify_task = gr.Dropdown(
1288
- list(TASK_MAP.keys()),
1289
- label='Step 4. Select the Prediction Task You Want to Conduct',
1290
- value='Compound-protein interaction')
1291
-
1292
- with gr.Row():
1293
- with gr.Column():
1294
- HelpTip(
1295
- "Select your preferred model, or click Recommend for the best-performing model based "
1296
- "on the selected task, family, and whether the compound was trained. "
1297
- "Please refer to documentation for detailed benchamrk results."
1298
- )
1299
- target_identify_preset = gr.Dropdown(list(PRESET_MAP.keys()),
1300
- label='Step 5. Select a Preset Model')
1301
- identify_preset_recommend_btn = gr.Button(value='OR Let Us Recommend for You',
1302
- variant='primary')
1303
-
1304
- with gr.Row():
1305
- with gr.Column():
1306
- target_identify_email = gr.Textbox(
1307
- label='Step 6. Input Your Email Address (Optional)',
1308
- info="Your email address will be used to notify you about the completion of your job."
1309
- )
1310
-
1311
- with gr.Row(visible=True):
1312
- # target_identify_clr_btn = gr.ClearButton(size='lg')
1313
- target_identify_btn = gr.Button(value='SUBMIT THE IDENTIFICATION JOB', variant='primary',
1314
- size='lg')
1315
-
1316
- identify_data_for_predict = gr.File(visible=False, file_count="single", type='filepath')
1317
- identify_waiting = gr.Markdown(f"Your job is running... It might take a few minutes."
1318
- f"When it's done, you will be redirected to the report page. "
1319
- f"Meanwhile, please leave the page on.",
1320
- visible=False)
1321
- with gr.TabItem(label='Interaction pair inference', id=2):
1322
- gr.Markdown('''
1323
- # <center>Interaction Pair Inference</center>
1324
- <center>To predict interactions or binding affinities between up to 10,000 paired compound-protein data.</center>
1325
- ''')
1326
- with gr.Blocks() as infer_block:
1327
- with gr.Column() as infer_page:
1328
- HelpTip(
1329
- "A custom interation pair dataset can be a CSV file with 2 required columns "
1330
- "(X1 for smiles and X2 for sequences) "
1331
- "and optionally 2 ID columns (ID1 for compound ID and ID2 for target ID), "
1332
- "or generated from a FASTA file containing multiple "
1333
- "sequences and a SDF file containing multiple compounds. "
1334
- "Currently, a maximum of 10,000 pairs is supported, "
1335
- "which means that the size of CSV file or "
1336
- "the product of the two library sizes should not exceed 10,000."
1337
- )
1338
- infer_type = gr.Dropdown(
1339
- choices=['Upload a CSV file containing paired compound-protein data',
1340
- 'Upload a compound library and a target library'],
1341
- label='Step 1. Select Pair Input Type and Input',
1342
- value='Upload a CSV file containing paired compound-protein data')
1343
- with gr.Column() as pair_upload:
1344
- gr.File(label="Example CSV dataset",
1345
- value="data/examples/interaction_pair_inference.csv",
1346
- interactive=False)
1347
- with gr.Row():
1348
- infer_csv_prompt = gr.Button(value="Upload Your Own Dataset Below",
1349
- visible=True)
1350
- with gr.Column():
1351
- infer_data_for_predict = gr.File(
1352
- label='Upload CSV File Containing Paired Records',
1353
- file_count="single", type='filepath', visible=True)
1354
- with gr.Column(visible=False) as pair_generate:
1355
- with gr.Row():
1356
- gr.File(label='Example SDF compound library',
1357
- value='data/examples/compound_library.sdf', interactive=False)
1358
- gr.File(label='Example FASTA target library',
1359
- value='data/examples/target_library.fasta', interactive=False)
1360
- with gr.Row():
1361
- gr.File(label='Example CSV compound library',
1362
- value='data/examples/compound_library.csv', interactive=False)
1363
- gr.File(label='Example CSV target library',
1364
- value='data/examples/target_library.csv', interactive=False)
1365
- with gr.Row():
1366
- infer_library_prompt = gr.Button(value="Upload Your Own Libraries Below",
1367
- visible=False)
1368
- with gr.Row():
1369
- infer_drug = gr.File(label='Upload SDF/CSV File Containing Multiple Compounds',
1370
- file_count="single", type='filepath')
1371
- infer_target = gr.File(label='Upload FASTA/CSV File Containing Multiple Targets',
1372
- file_count="single", type='filepath')
1373
-
1374
- with gr.Row():
1375
- with gr.Column():
1376
- HelpTip(
1377
- "By default, models trained on all protein families (general) will be applied. "
1378
- "If the proteins in the target library of interest "
1379
- "all belong to the same protein family, manually selecting the family is supported."
1380
- )
1381
- pair_infer_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
1382
- value='General',
1383
- label='Step 2. Select Target Family (Optional)')
1384
-
1385
- with gr.Row():
1386
- with gr.Column():
1387
- HelpTip(
1388
- "Interaction prediction provides you binding probability score "
1389
- "between the target of interest and each compound in the library, "
1390
- "while affinity prediction directly estimates their binding strength "
1391
- "measured using IC50."
1392
- )
1393
- pair_infer_task = gr.Dropdown(
1394
- list(TASK_MAP.keys()),
1395
- label='Step 3. Select the Prediction Task You Want to Conduct',
1396
- value='Compound-protein interaction')
1397
-
1398
- with gr.Row():
1399
- with gr.Column():
1400
- HelpTip("Select your preferred model. "
1401
- "Please refer to documentation for detailed benchmark results."
1402
- )
1403
- pair_infer_preset = gr.Dropdown(list(PRESET_MAP.keys()),
1404
- label='Step 4. Select a Preset Model')
1405
- # infer_preset_recommend_btn = gr.Button(value='OR Let Us Recommend for You',
1406
- # variant='primary')
1407
-
1408
- with gr.Row():
1409
- pair_infer_email = gr.Textbox(
1410
- label='Step 5. Input Your Email Address (Optional)',
1411
- info="Your email address will be used to notify you about the completion of your job."
1412
- )
1413
-
1414
- with gr.Row(visible=True):
1415
- # pair_infer_clr_btn = gr.ClearButton(size='lg')
1416
- pair_infer_btn = gr.Button(value='SUBMIT THE INFERENCE JOB', variant='primary', size='lg')
1417
-
1418
- infer_waiting = gr.Markdown(f"Your job is running... It might take a few minutes."
1419
- f"When it's done, you will be redirected to the report page. "
1420
- f"Meanwhile, please leave the page on.",
1421
- visible=False)
1422
-
1423
- with gr.TabItem(label='Chemical property report', id=3):
1424
- with gr.Blocks() as report:
1425
- gr.Markdown('''
1426
- # <center>Chemical Property Report</center>
1427
-
1428
- To compute chemical properties for the predictions of drug hit screening,
1429
- target protein identification, and interaction pair inference.
1430
-
1431
- You may also upload your own dataset using a CSV file containing
1432
- one required column `X1` for compound SMILES.
1433
-
1434
- The page shows only a preview report displaying at most 30 records
1435
- (with top predicted CPI/CPA if reporting results from a prediction job).
1436
-
1437
- Please first `Preview` the report, then `Generate` and download a CSV report
1438
- or an interactive HTML report below if you wish to access the full report.
1439
- ''')
1440
- with gr.Row():
1441
- with gr.Column():
1442
- file_for_report = gr.File(interactive=True, type='filepath')
1443
- report_task = gr.Dropdown(list(TASK_MAP.keys()), visible=False, value=None,
1444
- label='Specify the Task for the Labels in the Upload Dataset')
1445
- raw_df = gr.State(value=pd.DataFrame())
1446
- report_df = gr.State(value=pd.DataFrame())
1447
- scores = gr.CheckboxGroup(list(SCORE_MAP.keys()), label='Scores')
1448
- filters = gr.CheckboxGroup(list(FILTER_MAP.keys()), label='Filters')
1449
-
1450
- with gr.Row():
1451
- # clear_btn = gr.ClearButton(size='lg')
1452
- analyze_btn = gr.Button('Preview Top 30 Records', variant='primary', size='lg',
1453
- interactive=False)
1454
-
1455
- with gr.Row():
1456
- with gr.Column(scale=3):
1457
- html_report = gr.HTML() # label='Results', visible=True)
1458
- ranking_pie_chart = gr.Plot(visible=False)
1459
-
1460
- with gr.Row():
1461
- with gr.Column():
1462
- csv_generate = gr.Button(value='Generate CSV Report',
1463
- interactive=False, variant='primary')
1464
- csv_download_file = gr.File(label='Download CSV Report', visible=False)
1465
- with gr.Column():
1466
- html_generate = gr.Button(value='Generate HTML Report',
1467
- interactive=False, variant='primary')
1468
- html_download_file = gr.File(label='Download HTML Report', visible=False)
1469
-
1470
-
1471
- def target_input_type_select(input_type):
1472
- match input_type:
1473
- case 'UniProt ID':
1474
- return [gr.Dropdown(info=''),
1475
- gr.UploadButton(visible=False),
1476
- gr.Textbox(visible=True, value=''),
1477
- gr.Textbox(visible=False, value=''),
1478
- gr.Textbox(visible=False, value=''),
1479
- gr.Button(visible=True),
1480
- gr.Code(value=''),
1481
- gr.Button(visible=False)]
1482
- case 'Gene symbol':
1483
- return [gr.Dropdown(info=''),
1484
- gr.UploadButton(visible=False),
1485
- gr.Textbox(visible=False, value=''),
1486
- gr.Textbox(visible=True, value=''),
1487
- gr.Textbox(visible=True, value=''),
1488
- gr.Button(visible=True),
1489
- gr.Code(value=''),
1490
- gr.Button(visible=False)]
1491
- case 'Sequence':
1492
- return [gr.Dropdown(info='Enter (paste) a FASTA string below manually or upload a FASTA file.'),
1493
- gr.UploadButton(visible=True),
1494
- gr.Textbox(visible=False, value=''),
1495
- gr.Textbox(visible=False, value=''),
1496
- gr.Textbox(visible=False, value=''),
1497
- gr.Button(visible=False),
1498
- gr.Code(value=''),
1499
- gr.Button(visible=True)]
1500
-
1501
-
1502
- target_input_type.select(
1503
- fn=target_input_type_select,
1504
- inputs=target_input_type,
1505
- outputs=[
1506
- target_input_type, target_upload_btn,
1507
- target_id, target_gene, target_organism, target_query_btn,
1508
- target_fasta, target_paste_markdown
1509
- ],
1510
- show_progress=False
1511
- )
1512
-
1513
-
1514
- def uniprot_query(input_type, uid, gene, organism='Human'):
1515
- fasta_seq = ''
1516
-
1517
- match input_type:
1518
- case 'UniProt ID':
1519
- query = f"{uid.strip()}.fasta"
1520
- case 'Gene symbol':
1521
- organism = organism if organism else 'Human'
1522
- query = f'search?query=organism_name:{organism.strip()}+AND+gene:{gene.strip()}&format=fasta'
1523
-
1524
- try:
1525
- fasta = SESSION.get(UNIPROT_ENDPOINT.format(query=query))
1526
- fasta.raise_for_status()
1527
- fasta_seq = fasta.text
1528
- except Exception as e:
1529
- raise gr.Warning(f"Failed to query FASTA from UniProt database due to {str(e)}")
1530
- finally:
1531
- return fasta_seq
1532
-
1533
- def process_fasta_upload(fasta_upload):
1534
- fasta = ''
1535
- try:
1536
- fasta = fasta_upload.decode()
1537
- except Exception as e:
1538
- gr.Warning(f"Please upload a valid FASTA file. Error: {str(e)}")
1539
- return fasta
1540
-
1541
- target_upload_btn.upload(fn=process_fasta_upload, inputs=target_upload_btn, outputs=target_fasta)
1542
- target_query_btn.click(uniprot_query,
1543
- inputs=[target_input_type, target_id, target_gene, target_organism],
1544
- outputs=target_fasta)
1545
-
1546
-
1547
- def target_family_detect(fasta, progress=gr.Progress(track_tqdm=True)):
1548
- aligner = PairwiseAligner(scoring='blastp', mode='local')
1549
- alignment_df = pd.read_csv('data/target_libraries/ChEMBL33_all_spe_single_prot_info.csv')
1550
-
1551
- def align_score(query):
1552
- return aligner.align(process_target_fasta(fasta), query).score
1553
-
1554
- alignment_df['score'] = alignment_df['X2'].swifter.progress_bar(
1555
- desc="Detecting protein family of the target...").apply(align_score)
1556
- row = alignment_df.loc[alignment_df['score'].idxmax()]
1557
- return gr.Dropdown(value=row['protein_family'].capitalize(),
1558
- info=f"Reason: Best BLASTP score ({row['score']}) "
1559
- f"with {row['ID2']} from family {row['protein_family']}")
1560
-
1561
-
1562
- target_family_detect_btn.click(fn=target_family_detect, inputs=target_fasta, outputs=drug_screen_target_family)
1563
-
1564
- # target_fasta.focus(fn=wrap_text, inputs=target_fasta, outputs=target_fasta, show_progress=False)
1565
- target_fasta.blur(fn=wrap_text, inputs=target_fasta, outputs=target_fasta, show_progress=False)
1566
-
1567
- drug_library_upload_btn.upload(fn=lambda x: [
1568
- x.name, gr.Dropdown(value=Path(x.name).name, choices=list(DRUG_LIBRARY_MAP.keys()) + [Path(x.name).name])
1569
- ], inputs=drug_library_upload_btn, outputs=[drug_library_upload, drug_library])
1570
-
1571
-
1572
- def example_fill(input_type):
1573
- return {target_id: 'Q16539',
1574
- target_gene: 'MAPK14',
1575
- target_organism: 'Human',
1576
- target_fasta: """
1577
- >sp|Q16539|MK14_HUMAN Mitogen-activated protein kinase 14 OS=Homo sapiens OX=9606 GN=MAPK14 PE=1 SV=3
1578
- MSQERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGLRVAVKKLSRPFQ
1579
- SIIHAKRTYRELRLLKHMKHENVIGLLDVFTPARSLEEFNDVYLVTHLMGADLNNIVKCQ
1580
- KLTDDHVQFLIYQILRGLKYIHSADIIHRDLKPSNLAVNEDCELKILDFGLARHTDDEMT
1581
- GYVATRWYRAPEIMLNWMHYNQTVDIWSVGCIMAELLTGRTLFPGTDHIDQLKLILRLVG
1582
- TPGAELLKKISSESARNYIQSLTQMPKMNFANVFIGANPLAVDLLEKMLVLDSDKRITAA
1583
- QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
1584
- """}
1585
-
1586
-
1587
- example_fasta.click(fn=example_fill, inputs=target_input_type, outputs=[
1588
- target_id, target_gene, target_organism, target_fasta], show_progress=False)
1589
-
1590
-
1591
- # example_uniprot.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
1592
- # example_gene.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
1593
-
1594
-
1595
- def screen_recommend_model(fasta, family, task):
1596
- task = TASK_MAP[task]
1597
- score = TASK_METRIC_MAP[task]
1598
- benchmark_df = pd.read_csv(f'data/benchmarks/{task}_test_metrics.csv')
1599
-
1600
- if not fasta:
1601
- gr.Warning('Please enter a valid FASTA for model recommendation.')
1602
- return None
1603
-
1604
- if family == 'General':
1605
- seen_targets = pd.read_csv(
1606
- f'data/benchmarks/seen_targets/all_families_full_{task.lower()}_random_split.csv')
1607
- if process_target_fasta(fasta) in seen_targets['X2'].values:
1608
- scenario = "Seen Target"
1609
- else:
1610
- scenario = "Unseen Target"
1611
- filtered_df = benchmark_df[(benchmark_df['Family'] == 'All Families')
1612
- & (benchmark_df['Scenario'] == scenario)
1613
- & (benchmark_df['Type'] == 'General')]
1614
-
1615
- else:
1616
- seen_targets_general = pd.read_csv(
1617
- f'data/benchmarks/seen_targets/all_families_full_{task.lower()}_random_split.csv')
1618
- if process_target_fasta(fasta) in seen_targets_general['X2'].values:
1619
- scenario_general = "Seen Target"
1620
- else:
1621
- scenario_general = "Unseen Target"
1622
-
1623
- seen_targets_family = pd.read_csv(
1624
- f'data/benchmarks/seen_targets/{TARGET_FAMILY_MAP[family]}_{task.lower()}_random_split.csv')
1625
- if process_target_fasta(fasta) in seen_targets_family['X2'].values:
1626
- scenario_family = "Seen Target"
1627
- else:
1628
- scenario_family = "Unseen Target"
1629
-
1630
- filtered_df_general = benchmark_df[(benchmark_df['Family'] == family)
1631
- & (benchmark_df['Scenario'] == scenario_general)
1632
- & (benchmark_df['Type'] == 'General')]
1633
- filtered_df_family = benchmark_df[(benchmark_df['Family'] == family)
1634
- & (benchmark_df['Scenario'] == scenario_family)
1635
- & (benchmark_df['Type'] == 'Family')]
1636
- filtered_df = pd.concat([filtered_df_general, filtered_df_family])
1637
-
1638
- row = filtered_df.loc[filtered_df[score].idxmax()]
1639
-
1640
- return gr.Dropdown(value=row['Model'],
1641
- info=f"Reason: {row['Scenario']} in training; we recommend the model "
1642
- f"with the best {score} ({float(row[score]):.3f}) "
1643
- f"in the {row['Scenario']} scenario on {row['Family']}.")
1644
-
1645
-
1646
- screen_preset_recommend_btn.click(fn=screen_recommend_model,
1647
- inputs=[target_fasta, drug_screen_target_family, drug_screen_task],
1648
- outputs=drug_screen_preset)
1649
-
1650
-
1651
- def compound_input_type_select(input_type):
1652
- match input_type:
1653
- case 'SMILES':
1654
- return gr.Button(visible=False)
1655
- case 'SDF':
1656
- return gr.Button(visible=True)
1657
-
1658
-
1659
- compound_type.select(fn=compound_input_type_select,
1660
- inputs=compound_type, outputs=compound_upload_btn, show_progress=False)
1661
-
1662
-
1663
- def compound_upload_process(input_type, input_upload):
1664
- smiles = ''
1665
- try:
1666
- match input_type:
1667
- case 'SMILES':
1668
- smiles = input_upload.decode()
1669
- case 'SDF':
1670
- suppl = Chem.ForwardSDMolSupplier(io.BytesIO(input_upload))
1671
- smiles = Chem.MolToSmiles(next(suppl))
1672
- except Exception as e:
1673
- gr.Warning(f"Please upload a valid {input_type} file. Error: {str(e)}")
1674
- return smiles
1675
-
1676
-
1677
- compound_upload_btn.upload(fn=compound_upload_process,
1678
- inputs=[compound_type, compound_upload_btn],
1679
- outputs=compound_smiles)
1680
-
1681
- example_drug.click(fn=lambda: 'CC(=O)Oc1ccccc1C(=O)O', outputs=compound_smiles, show_progress=False)
1682
-
1683
- target_library_upload_btn.upload(fn=lambda x: [
1684
- x.name, gr.Dropdown(value=Path(x.name).name, choices=list(TARGET_LIBRARY_MAP.keys()) + [Path(x.name).name])
1685
- ], inputs=target_library_upload_btn, outputs=[target_library_upload, target_library])
1686
-
1687
-
1688
- def identify_recommend_model(smiles, task):
1689
- task = TASK_MAP[task]
1690
- score = TASK_METRIC_MAP[task]
1691
- benchmark_df = pd.read_csv(f'data/benchmarks/{task}_test_metrics.csv')
1692
-
1693
- if not smiles:
1694
- gr.Warning('Please enter a valid SMILES for model recommendation.')
1695
- return None
1696
-
1697
- seen_drugs = pd.read_csv(
1698
- f'data/benchmarks/seen_drugs/all_families_full_{task.lower()}_random_split.csv')
1699
- if rdkit_canonicalize(smiles) in seen_drugs['X1'].values:
1700
- scenario = "Seen Compound"
1701
- else:
1702
- scenario = "Unseen Compound"
1703
-
1704
- filtered_df = benchmark_df[(benchmark_df['Family'] == 'All Families')
1705
- & (benchmark_df['Scenario'] == scenario)
1706
- & (benchmark_df['Type'] == 'General')]
1707
-
1708
- row = filtered_df.loc[filtered_df[score].idxmax()]
1709
-
1710
- return gr.Dropdown(value=row['Model'],
1711
- info=f"Reason: {scenario} in training; choosing the model "
1712
- f"with the best {score} ({float(row[score]):3f}) "
1713
- f"in the {scenario} scenario.")
1714
-
1715
-
1716
- identify_preset_recommend_btn.click(fn=identify_recommend_model,
1717
- inputs=[compound_smiles, target_identify_task],
1718
- outputs=target_identify_preset)
1719
-
1720
-
1721
- def infer_type_change(upload_type):
1722
- match upload_type:
1723
- case "Upload a compound library and a target library":
1724
- return {
1725
- pair_upload: gr.Column(visible=False),
1726
- pair_generate: gr.Column(visible=True),
1727
- infer_data_for_predict: None,
1728
- infer_drug: None,
1729
- infer_target: None,
1730
- infer_csv_prompt: gr.Button(visible=False),
1731
- infer_library_prompt: gr.Button(visible=True),
1732
- }
1733
- match upload_type:
1734
- case "Upload a CSV file containing paired compound-protein data":
1735
- return {
1736
- pair_upload: gr.Column(visible=True),
1737
- pair_generate: gr.Column(visible=False),
1738
- infer_data_for_predict: None,
1739
- infer_drug: None,
1740
- infer_target: None,
1741
- infer_csv_prompt: gr.Button(visible=True),
1742
- infer_library_prompt: gr.Button(visible=False),
1743
- }
1744
-
1745
-
1746
- infer_type.select(fn=infer_type_change, inputs=infer_type,
1747
- outputs=[pair_upload, pair_generate, infer_data_for_predict, infer_drug, infer_target,
1748
- infer_csv_prompt, infer_library_prompt])
1749
-
1750
-
1751
- def drug_screen_validate(fasta, library, library_upload, state, progress=gr.Progress(track_tqdm=True)):
1752
- if not state:
1753
- try:
1754
- fasta = process_target_fasta(fasta)
1755
- err = validate_seq_str(fasta, FASTA_PAT)
1756
- if err:
1757
- raise ValueError(f'Found error(s) in your target fasta input: {err}')
1758
- if library in DRUG_LIBRARY_MAP.keys():
1759
- screen_df = pd.read_csv(Path('data/drug_libraries', DRUG_LIBRARY_MAP[library]))
1760
- else:
1761
- screen_df = process_drug_library_upload(library_upload)
1762
- if len(screen_df) >= CUSTOM_DATASET_MAX_LEN:
1763
- raise gr.Error(f'The uploaded compound library has more records '
1764
- f'than the allowed maximum (CUSTOM_DATASET_MAX_LEN).')
1765
-
1766
- screen_df['X2'] = fasta
1767
-
1768
- job_id = uuid4()
1769
- temp_file = Path(f'temp/{job_id}_input.csv').resolve()
1770
- screen_df.to_csv(temp_file, index=False)
1771
- if temp_file.is_file():
1772
- return {screen_data_for_predict: str(temp_file),
1773
- screen_flag: job_id,
1774
- run_state: job_id}
1775
- else:
1776
- raise SystemError('Failed to create temporary files. Please try again later.')
1777
- except Exception as e:
1778
- gr.Warning(f'Failed to submit the job due to error: {str(e)}')
1779
- return {screen_flag: False,
1780
- run_state: False}
1781
- else:
1782
- gr.Warning('You have another prediction job '
1783
- '(drug hit screening, target protein identification, or interation pair inference) '
1784
- 'running in the session right now. '
1785
- 'Please submit another job when your current job has finished.')
1786
- return {screen_flag: False,
1787
- run_state: state}
1788
-
1789
-
1790
- def target_identify_validate(smiles, library, library_upload, state, progress=gr.Progress(track_tqdm=True)):
1791
- if not state:
1792
- try:
1793
- smiles = smiles.strip()
1794
- err = validate_seq_str(smiles, SMILES_PAT)
1795
- if err:
1796
- raise ValueError(f'Found error(s) in your target fasta input: {err}')
1797
- if library in TARGET_LIBRARY_MAP.keys():
1798
- identify_df = pd.read_csv(Path('data/target_libraries', TARGET_LIBRARY_MAP[library]))
1799
- else:
1800
- identify_df = process_target_library_upload(library_upload)
1801
- if len(identify_df) >= CUSTOM_DATASET_MAX_LEN:
1802
- raise gr.Error(f'The uploaded target library has more records '
1803
- f'than the allowed maximum (CUSTOM_DATASET_MAX_LEN).')
1804
- identify_df['X1'] = smiles
1805
-
1806
- job_id = uuid4()
1807
- temp_file = Path(f'temp/{job_id}_input.csv').resolve()
1808
- identify_df.to_csv(temp_file, index=False)
1809
- if temp_file.is_file():
1810
- return {identify_data_for_predict: str(temp_file),
1811
- identify_flag: job_id,
1812
- run_state: job_id}
1813
- else:
1814
- raise SystemError('Failed to create temporary files. Please try again later.')
1815
- except Exception as e:
1816
- gr.Warning(f'Failed to submit the job due to error: {str(e)}')
1817
- return {identify_flag: False,
1818
- run_state: False}
1819
- else:
1820
- gr.Warning('You have another prediction job '
1821
- '(drug hit screening, target protein identification, or interation pair inference) '
1822
- 'running in the session right now. '
1823
- 'Please submit another job when your current job has finished.')
1824
- return {identify_flag: False,
1825
- run_state: state}
1826
- # return {identify_flag: False}
1827
-
1828
-
1829
- def pair_infer_validate(drug_target_pair_upload, drug_upload, target_upload, state,
1830
- progress=gr.Progress(track_tqdm=True)):
1831
- if not state:
1832
- try:
1833
- job_id = uuid4()
1834
- if drug_target_pair_upload:
1835
- infer_df = pd.read_csv(drug_target_pair_upload)
1836
- validate_columns(infer_df, ['X1', 'X2'])
1837
-
1838
- infer_df['X1_ERR'] = infer_df['X1'].swifter.progress_bar(desc="Validating SMILES...").apply(
1839
- validate_seq_str, regex=SMILES_PAT)
1840
- if not infer_df['X1_ERR'].isna().all():
1841
- raise ValueError(
1842
- f"Encountered invalid SMILES:\n{infer_df[~infer_df['X1_ERR'].isna()][['X1', 'X1_ERR']]}")
1843
-
1844
- infer_df['X2_ERR'] = infer_df['X2'].swifter.progress_bar(desc="Validating FASTA...").apply(
1845
- validate_seq_str, regex=FASTA_PAT)
1846
- if not infer_df['X2_ERR'].isna().all():
1847
- raise ValueError(
1848
- f"Encountered invalid FASTA:\n{infer_df[~infer_df['X2_ERR'].isna()][['X2', 'X2_ERR']]}")
1849
-
1850
- return {infer_data_for_predict: str(drug_target_pair_upload),
1851
- infer_flag: job_id,
1852
- run_state: job_id}
1853
-
1854
- elif drug_upload and target_upload:
1855
- drug_df = process_drug_library_upload(drug_upload)
1856
- target_df = process_target_library_upload(target_upload)
1857
-
1858
- drug_df.drop_duplicates(subset=['X1'], inplace=True)
1859
- target_df.drop_duplicates(subset=['X2'], inplace=True)
1860
-
1861
- infer_df = pd.DataFrame(list(itertools.product(drug_df['X1'], target_df['X2'])),
1862
- columns=['X1', 'X2'])
1863
- infer_df = infer_df.merge(drug_df, on='X1').merge(target_df, on='X2')
1864
-
1865
- temp_file = Path(f'temp/{job_id}_input.csv').resolve()
1866
- infer_df.to_csv(temp_file, index=False)
1867
- if temp_file.is_file():
1868
- return {infer_data_for_predict: str(temp_file),
1869
- infer_flag: job_id,
1870
- run_state: job_id}
1871
-
1872
- else:
1873
- raise gr.Error('Should upload a compound-protein pair dataset,or '
1874
- 'upload both a compound library and a target library.')
1875
-
1876
- if len(infer_df) >= CUSTOM_DATASET_MAX_LEN:
1877
- raise gr.Error(f'The uploaded/generated compound-protein pair dataset has more records '
1878
- f'than the allowed maximum (CUSTOM_DATASET_MAX_LEN).')
1879
-
1880
- except Exception as e:
1881
- gr.Warning(f'Failed to submit the job due to error: {str(e)}')
1882
- return {infer_flag: False,
1883
- run_state: False}
1884
-
1885
- else:
1886
- gr.Warning('You have another prediction job '
1887
- '(drug hit screening, target protein identification, or interation pair inference) '
1888
- 'running in the session right now. '
1889
- 'Please submit another job when your current job has finished.')
1890
- return {infer_flag: False,
1891
- run_state: state}
1892
-
1893
-
1894
- drug_screen_btn.click(
1895
- fn=drug_screen_validate,
1896
- inputs=[target_fasta, drug_library, drug_library_upload, run_state], # , drug_screen_email],
1897
- outputs=[screen_data_for_predict, screen_flag, run_state]
1898
- ).then(
1899
- fn=lambda: [gr.Column(visible=False), gr.Markdown(visible=True)],
1900
- outputs=[screen_page, screen_waiting]
1901
- ).then(
1902
- fn=submit_predict,
1903
- inputs=[screen_data_for_predict, drug_screen_task, drug_screen_preset,
1904
- drug_screen_target_family, screen_flag, run_state], # , drug_screen_email],
1905
- outputs=[file_for_report, run_state, report_upload_flag]
1906
- ).then(
1907
- fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False), gr.Tabs(selected=3)],
1908
- outputs=[screen_page, screen_waiting, tabs]
1909
- )
1910
-
1911
- target_identify_btn.click(
1912
- fn=target_identify_validate,
1913
- inputs=[compound_smiles, target_library, target_library_upload, run_state], # , drug_screen_email],
1914
- outputs=[identify_data_for_predict, identify_flag, run_state]
1915
- ).then(
1916
- fn=lambda: [gr.Column(visible=False), gr.Markdown(visible=True)],
1917
- outputs=[identify_page, identify_waiting]
1918
- ).then(
1919
- fn=submit_predict,
1920
- inputs=[identify_data_for_predict, target_identify_task, target_identify_preset,
1921
- target_identify_target_family, identify_flag, run_state], # , target_identify_email],
1922
- outputs=[file_for_report, run_state, report_upload_flag]
1923
- ).then(
1924
- fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False), gr.Tabs(selected=3)],
1925
- outputs=[identify_page, identify_waiting, tabs]
1926
- )
1927
-
1928
- pair_infer_btn.click(
1929
- fn=pair_infer_validate,
1930
- inputs=[infer_data_for_predict, infer_drug, infer_target, run_state], # , drug_screen_email],
1931
- outputs=[infer_data_for_predict, infer_flag, run_state]
1932
- ).then(
1933
- fn=lambda: [gr.Column(visible=False), gr.Markdown(visible=True)],
1934
- outputs=[infer_page, infer_waiting]
1935
- ).then(
1936
- fn=submit_predict,
1937
- inputs=[infer_data_for_predict, pair_infer_task, pair_infer_preset,
1938
- pair_infer_target_family, infer_flag, run_state], # , pair_infer_email],
1939
- outputs=[file_for_report, run_state, report_upload_flag]
1940
- ).then(
1941
- fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False), gr.Tabs(selected=3)],
1942
- outputs=[infer_page, infer_waiting, tabs]
1943
- )
1944
-
1945
- # TODO background job from these 3 pipelines to update file_for_report
1946
- def inquire_task(df, upload_flag):
1947
- if upload_flag:
1948
- if 'Y' in df.columns:
1949
- label = 'actual CPI/CPA labels (`Y`)'
1950
- elif 'Y^' in df.columns:
1951
- label = 'predicted CPI/CPA labels (`Y^`)'
1952
- else:
1953
- return {analyze_btn: gr.Button(interactive=True),
1954
- csv_generate: gr.Button(interactive=True),
1955
- html_generate: gr.Button(interactive=True)}
1956
-
1957
- return {report_task: gr.Dropdown(visible=True,
1958
- info=f'Found {label} in your uploaded dataset. '
1959
- 'Is it compound-target interaction or binding affinity?'),
1960
- html_report: '',
1961
- analyze_btn: gr.Button(interactive=False),
1962
- csv_generate: gr.Button(interactive=False),
1963
- html_generate: gr.Button(interactive=False)}
1964
- else:
1965
- return {report_task: gr.Dropdown(visible=False)}
1966
-
1967
- file_for_report.upload(
1968
- fn=lambda: True, outputs=report_upload_flag
1969
- )
1970
- file_for_report.change(fn=update_df, inputs=file_for_report, outputs=[
1971
- html_report, raw_df, report_df, analyze_btn]).success(
1972
- fn=lambda: [gr.Button(interactive=False)]*2 + [gr.File(visible=False)]*2 + [gr.Dropdown(visible=False)],
1973
- outputs=[csv_generate, html_generate, csv_download_file, html_download_file, report_task]
1974
- ).then(
1975
- fn=inquire_task, inputs=[raw_df, report_upload_flag],
1976
- outputs=[report_task, html_report, analyze_btn, csv_generate, html_generate]
1977
- )
1978
- file_for_report.clear(fn=lambda: [gr.Dropdown(visible=False, value=None), False],
1979
- outputs=[report_task, report_upload_flag])
1980
-
1981
- analyze_btn.click(fn=submit_report, inputs=[raw_df, scores, filters, report_task], outputs=[
1982
- html_report, report_df, csv_download_file, html_download_file
1983
- ]).success(fn=lambda: [gr.Button(interactive=True)] * 2,
1984
- outputs=[csv_generate, html_generate])
1985
-
1986
- report_task.select(fn=lambda: gr.Button(interactive=True),
1987
- outputs=analyze_btn)
1988
-
1989
-
1990
- def create_csv_report_file(df, file_report, progress=gr.Progress(track_tqdm=True)):
1991
- try:
1992
- now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
1993
- filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.csv"
1994
- df.drop(labels=['Compound', 'Scaffold'], axis=1).to_csv(filename, index=False)
1995
-
1996
- return gr.File(filename)
1997
- except Exception as e:
1998
- gr.Warning(f"Failed to generate CSV due to error: {str(e)}")
1999
- return None
2000
-
2001
-
2002
- def create_html_report_file(df, file_report, progress=gr.Progress(track_tqdm=True)):
2003
- try:
2004
- now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
2005
- filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.html"
2006
- create_html_report(df, filename)
2007
- return gr.File(filename, visible=True)
2008
- except Exception as e:
2009
- gr.Warning(f"Failed to generate HTML due to error: {str(e)}")
2010
- return None
2011
-
2012
-
2013
- html_report.change(lambda: [gr.Button(visible=True)] * 2, outputs=[csv_generate, html_generate])
2014
- csv_generate.click(
2015
- lambda: [gr.Button(visible=False), gr.File(visible=True)], outputs=[csv_generate, csv_download_file],
2016
- ).then(fn=create_csv_report_file, inputs=[report_df, file_for_report],
2017
- outputs=csv_download_file, show_progress='full')
2018
- html_generate.click(
2019
- lambda: [gr.Button(visible=False), gr.File(visible=True)], outputs=[html_generate, html_download_file],
2020
- ).then(fn=create_html_report_file, inputs=[report_df, file_for_report],
2021
- outputs=html_download_file, show_progress='full')
2022
-
2023
- # screen_waiting.change(fn=check_job_status, inputs=run_state, outputs=[pair_waiting, tabs, file_for_report],
2024
- # every=5)
2025
- # identify_waiting.change(fn=check_job_status, inputs=run_state, outputs=[identify_waiting, tabs, file_for_report],
2026
- # every=5)
2027
- # pair_waiting.change(fn=check_job_status, inputs=run_state, outputs=[pair_waiting, tabs, file_for_report],
2028
- # every=5)
2029
-
2030
- # demo.load(None, None, None, js="() => {document.body.classList.remove('dark')}")
2031
-
2032
- if __name__ == "__main__":
2033
- screen_block.queue(max_size=3)
2034
- identify_block.queue(max_size=3)
2035
- infer_block.queue(max_size=3)
2036
- report.queue(max_size=3)
2037
 
2038
- # SCHEDULER.add_job(func=file_cleanup(), trigger="interval", seconds=60)
2039
- # SCHEDULER.start()
2040
 
2041
- demo.launch(
2042
- show_api=False,
2043
- )
 
1
+ from email.utils import formatdate, make_msgid
2
+ from email.mime.multipart import MIMEMultipart
3
+ from email.mime.text import MIMEText
4
+ import smtplib
5
+ from markdown import markdown
 
 
 
6
 
 
 
 
 
 
7
 
8
+ def send_email(receiver, job_info):
9
+ email_serv = "smtpdm.aliyun.com" # "ciddr-lab.ac.cn" # "srvsmtp.xjtlu.edu.cn"
10
+ email_port = 80 # 1025 # 587 # 25
11
+ email_addr = "[email protected]"
12
+ email_pass = "ciddrw447JkpB"
13
+ email_form = """
14
+ Dear user,
 
 
 
 
 
 
 
 
15
 
16
+ Your DeepSEQreen job is {status}.
 
 
 
 
 
17
 
18
+ **Job details:**
 
19
 
20
+ - Job id: {id}
21
+ - Job type: {type}
22
+ - Start time: {start_time}
23
+ - End time: {end_time}
24
+ - Expiry time: {expiry_time}
25
+ - Error: {error}
26
 
27
+ Please visit the [DeepSEQreen web server](https://www.ciddr-lab.ac.cn/deepseqreen/) to check the job status or retrieve the results.
 
28
 
29
+ Best,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ CIDDR Team
32
  """
33
+ server = smtplib.SMTP(email_serv, email_port)
34
+ # server.starttls()
35
 
36
+ server.login(email_addr, email_pass)
37
+ msg = MIMEMultipart("alternative")
38
+ msg["From"] = email_addr
39
+ msg["To"] = receiver
40
+ msg["Subject"] = f"DeepSEQreen Job {job_info['status']}: {job_info['id']}"
41
+ msg["Date"] = formatdate(localtime=True)
42
+ msg["Message-ID"] = make_msgid()
43
 
44
+ msg.attach(MIMEText(markdown(email_form.format(**job_info)), 'html'))
45
+ msg.attach(MIMEText(email_form.format(**job_info), 'plain'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
+ server.sendmail(email_addr, receiver, msg.as_string())
48
+ server.quit()
49
 
50
+ send_email('xinran.[email protected]', {'id': 'a1b2c3d', 'type': 'Drug Hit Screening', 'status': 'RUNNING', 'start_time': '2021-10-10 10:00:00', 'end_time': 'TBD', 'expiry_time': 'TBD', 'error': 'TBD'})