DeepSEQreen_NAR_fb

Sleeping

App Files Files Community

libokj commited on Mar 14, 2024

Commit

9b0c1e4

1 Parent(s): d113635

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -2030

app.py CHANGED Viewed

@@ -1,2043 +1,50 @@
-from datetime import datetime
-import hashlib
-import itertools
-import json
-import textwrap
-import threading
-from math import pi
-from uuid import uuid4
-import io
-import os
-import pathlib
-from pathlib import Path
-import sys
-import numpy as np
-from Bio import SeqIO
-from Bio.Align import PairwiseAligner
-# from email_validator import validate_email
-import gradio as gr
-import hydra
-import pandas as pd
-import requests
-from rdkit.Chem.PandasTools import _MolPlusFingerprint
-from rdkit.Chem.rdMolDescriptors import CalcNumRotatableBonds, CalcNumHeavyAtoms, CalcNumAtoms, CalcTPSA
-from requests.adapters import HTTPAdapter, Retry
-from rdkit import Chem
-from rdkit.Chem import RDConfig, Descriptors, Draw, Lipinski, Crippen, PandasTools
-from rdkit.Chem.Scaffolds import MurckoScaffold
-import seaborn as sns
-from bokeh.models import Legend, NumberFormatter, BooleanFormatter, HTMLTemplateFormatter, LegendItem
-from bokeh.palettes import Category20c_20
-from bokeh.plotting import figure
-from bokeh.transform import cumsum
-from bokeh.resources import INLINE
-import panel as pn
-import swifter
-from tqdm.auto import tqdm
-from deepscreen.data.dti import validate_seq_str, rdkit_canonicalize, FASTA_PAT, SMILES_PAT
-from deepscreen.predict import predict
-sys.path.append(os.path.join(RDConfig.RDContribDir, 'SA_Score'))
-import sascorer
-ROOT = Path.cwd()
-# DF_FOR_REPORT = pd.DataFrame()
-pd.set_option('display.float_format', '{:.3f}'.format)
-PandasTools.molRepresentation = 'svg'
-PandasTools.drawOptions = Draw.rdMolDraw2D.MolDrawOptions()
-PandasTools.drawOptions.clearBackground = False
-PandasTools.drawOptions.bondLineWidth = 1
-PandasTools.drawOptions.explicitMethyl = True
-PandasTools.drawOptions.singleColourWedgeBonds = True
-PandasTools.drawOptions.useCDKAtomPalette()
-PandasTools.molSize = (128, 80)
-SESSION = requests.Session()
-ADAPTER = HTTPAdapter(max_retries=Retry(total=5, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504]))
-SESSION.mount('http://', ADAPTER)
-SESSION.mount('https://', ADAPTER)
-# SCHEDULER = BackgroundScheduler()
-UNIPROT_ENDPOINT = 'https://rest.uniprot.org/uniprotkb/{query}'
-CUSTOM_DATASET_MAX_LEN = 10_000
-CSS = """
-.help-tip {
-  position: absolute;
-  display: inline-block;
-  top: 16px;
-  right: 0px;
-  text-align: center;
-  border-radius: 40%;
-  /* border: 2px solid darkred; background-color: #8B0000;*/
-  width: 24px;
-  height: 24px;
-  font-size: 16px;
-  line-height: 26px;
-  cursor: default;
-  transition: all 0.5s cubic-bezier(0.55, 0, 0.1, 1);
-  z-index: 100 !important;
-}
-.help-tip:hover {
-  cursor: pointer;
-  /*background-color: #ccc;*/
-}
-.help-tip:before {
-  content: '?';
-  font-weight: 700;
-  color: #8B0000;
-  z-index: 100 !important;
-}
-.help-tip p {
-  visibility: hidden;
-  opacity: 0;
-  text-align: left;
-  background-color: #EFDDE3;
-  padding: 20px;
-  width: 300px;
-  position: absolute;
-  border-radius: 4px;
-  right: -4px;
-  color: #494F5A;
-  font-size: 13px;
-  line-height: normal;
-  transform: scale(0.7);
-  transform-origin: 100% 0%;
-  transition: all 0.5s cubic-bezier(0.55, 0, 0.1, 1);
-  z-index: 100;
-}
-.help-tip:hover p {
-  cursor: default;
-  visibility: visible;
-  opacity: 1;
-  transform: scale(1.0);
-}
-.help-tip p:before {
-  position: absolute;
-  content: '';
-  width: 0;
-  height: 0;
-  border: 6px solid transparent;
-  border-bottom-color: #EFDDE3;
-  right: 10px;
-  top: -12px;
-}
-.help-tip p:after {
-  width: 100%;
-  height: 40px;
-  content: '';
-  position: absolute;
-  top: -5px;
-  left: 0;
-}
-.upload_button {
-  background-color: #008000;
-}
-.absolute {
-  position: absolute;
-}
-.example {
-padding: 0;
-background: none;
-border: none;
-text-decoration: underline;
-box-shadow: none;
-text-align: left !important;
-display: inline-block !important;
-}
-footer {
-visibility: hidden
-}
 """
-class HelpTip:
-    def __new__(cls, text):
-        return gr.HTML(
-            # elem_classes="absolute",
-            value=f'<div class="help-tip"><p>{text}</p>',
-        )
-def sa_score(mol):
-    return sascorer.calculateScore(mol)
-def mw(mol):
-    return Chem.Descriptors.MolWt(mol)
-def mr(mol):
-    return Crippen.MolMR(mol)
-def hbd(mol):
-    return Lipinski.NumHDonors(mol)
-def hba(mol):
-    return Lipinski.NumHAcceptors(mol)
-def logp(mol):
-    return Crippen.MolLogP(mol)
-def atom(mol):
-    return CalcNumAtoms(mol)
-def heavy_atom(mol):
-    return CalcNumHeavyAtoms(mol)
-def rotatable_bond(mol):
-    return CalcNumRotatableBonds((mol))
-def tpsa(mol):
-    return CalcTPSA((mol))
-def lipinski(mol):
-    """
-    Lipinski's rules:
-    Hydrogen bond donors <= 5
-    Hydrogen bond acceptors <= 10
-    Molecular weight <= 500 daltons
-    logP <= 5
-    """
-    if hbd(mol) > 5:
-        return False
-    elif hba(mol) > 10:
-        return False
-    elif mw(mol) > 500:
-        return False
-    elif logp(mol) > 5:
-        return False
-    else:
-        return True
-def reos(mol):
-    """
-    Rapid Elimination Of Swill filter:
-    Molecular weight between 200 and 500
-    LogP between -5.0 and +5.0
-    H-bond donor count between 0 and 5
-    H-bond acceptor count between 0 and 10
-    Formal charge between -2 and +2
-    Rotatable bond count between 0 and 8
-    Heavy atom count between 15 and 50
-    """
-    if not 200 < mw(mol) < 500:
-        return False
-    elif not -5.0 < logp(mol) < 5.0:
-        return False
-    elif not 0 < hbd(mol) < 5:
-        return False
-    elif not 0 < hba(mol) < 10:
-        return False
-    elif not 0 < rotatable_bond(mol) < 8:
-        return False
-    elif not 15 < heavy_atom(mol) < 50:
-        return False
-    else:
-        return True
-def ghose(mol):
-    """
-    Ghose drug like filter:
-    Molecular weight between 160 and 480
-    LogP between -0.4 and +5.6
-    Atom count between 20 and 70
-    Molar refractivity between 40 and 130
-    """
-    if not 160 < mw(mol) < 480:
-        return False
-    elif not -0.4 < logp(mol) < 5.6:
-        return False
-    elif not 20 < atom(mol) < 70:
-        return False
-    elif not 40 < mr(mol) < 130:
-        return False
-    else:
-        return True
-def veber(mol):
-    """
-    The Veber filter is a rule of thumb filter for orally active drugs described in
-    Veber et al., J Med Chem. 2002; 45(12): 2615-23.:
-    Rotatable bonds <= 10
-    Topological polar surface area <= 140
-    """
-    if not rotatable_bond(mol) <= 10:
-        return False
-    elif not tpsa(mol) <= 140:
-        return False
-    else:
-        return True
-def rule_of_three(mol):
-    """
-    Rule of Three filter (Congreve et al., Drug Discov. Today. 8 (19): 876–7, (2003).):
-    Molecular weight <= 300
-    LogP <= 3
-    H-bond donor <= 3
-    H-bond acceptor count <= 3
-    Rotatable bond count <= 3
-    """
-    if not mw(mol) <= 300:
-        return False
-    elif not logp(mol) <= 3:
-        return False
-    elif not hbd(mol) <= 3:
-        return False
-    elif not hba(mol) <= 3:
-        return False
-    elif not rotatable_bond(mol) <= 3:
-        return False
-    else:
-        return True
-# def smarts_filter():
-#     alerts = Chem.MolFromSmarts("enter one smart here")
-#     detected_alerts = []
-#     for smiles in data['X1']:
-#         mol = Chem.MolFromSmiles(smiles)
-#         detected_alerts.append(mol.HasSubstructMatch(alerts))
-SCORE_MAP = {
-    'SAscore': sa_score,
-    'LogP': logp,
-    'Molecular Weight': mw,
-    'Number of Heavy Atoms': heavy_atom,
-    'Molar Refractivity': mr,
-    'H-Bond Donor Count': hbd,
-    'H-Bond Acceptor Count': hba,
-    'Rotatable Bond Count': rotatable_bond,
-    'Topological Polar Surface Area': tpsa,
-}
-FILTER_MAP = {
-    # TODO support number_of_violations
-    'REOS': reos,
-    "Lipinski's Rule of Five": lipinski,
-    'Ghose': ghose,
-    'Rule of Three': rule_of_three,
-    'Veber': veber,
-    # 'PAINS': pains,
-}
-TASK_MAP = {
-    'Compound-protein interaction': 'DTI',
-    'Compound-protein binding affinity': 'DTA',
-}
-TASK_METRIC_MAP = {
-    'DTI': 'AUROC',
-    'DTA': 'CI',
-}
-PRESET_MAP = {
-    'DeepDTA': 'deep_dta',
-    'DeepConvDTI': 'deep_conv_dti',
-    'GraphDTA': 'graph_dta',
-    'MGraphDTA': 'm_graph_dta',
-    'HyperAttentionDTI': 'hyper_attention_dti',
-    'MolTrans': 'mol_trans',
-    'TransformerCPI': 'transformer_cpi',
-    'TransformerCPI2': 'transformer_cpi_2',
-    'DrugBAN': 'drug_ban',
-    'DrugVQA-Seq': 'drug_vqa'
-}
-TARGET_FAMILY_MAP = {
-    'General': 'general',
-    'Kinase': 'kinase',
-    'Non-Kinase Enzyme': 'non_kinase_enzyme',
-    'Membrane Receptor': 'membrane_receptor',
-    'Nuclear Receptor': 'nuclear_receptor',
-    'Ion Channel': 'ion_channel',
-    'Others': 'others',
-}
-TARGET_LIBRARY_MAP = {
-    'DrugBank (Human)': 'drugbank_targets.csv',
-    'ChEMBL33 (Human)': 'ChEMBL33_human_proteins.csv',
-}
-DRUG_LIBRARY_MAP = {
-    'DrugBank (Human)': 'drugbank_compounds.csv',
-    'Drug Repurposing Hub': 'drug_repurposing_hub.csv'
-}
-COLUMN_ALIASES = {
-    'X1': 'Compound SMILES',
-    'X2': 'Target FASTA',
-    'ID1': 'Compound ID',
-    'ID2': 'Target ID',
-    'Y': 'Actual CPI/CPA',
-    'Y^': 'Predicted CPI/CPA',
-}
-def validate_columns(df, mandatory_cols):
-    missing_cols = [col for col in mandatory_cols if col not in df.columns]
-    if missing_cols:
-        error_message = (f"The following mandatory columns are missing "
-                         f"in the uploaded dataset: {str(mandatory_cols).strip('[]')}.")
-        raise ValueError(error_message)
-    else:
-        return
-def process_target_fasta(sequence):
-    try:
-        if sequence:
-            lines = sequence.strip().split("\n")
-            if lines[0].startswith(">"):
-                lines = lines[1:]
-            return ''.join(lines).split(">")[0]
-            # record = list(SeqIO.parse(io.StringIO(sequence), "fasta"))[0]
-            # return str(record.seq)
-        else:
-            raise ValueError('Empty FASTA sequence.')
-    except Exception as e:
-        raise gr.Error(f'Failed to process FASTA due to error: {str(e)}')
-def send_email(receiver, msg):
-    pass
-def submit_predict(predict_filepath, task, preset, target_family, flag, state, progress=gr.Progress(track_tqdm=True)):
-    if flag:
-        try:
-            job_id = flag
-            global COLUMN_ALIASES
-            task = TASK_MAP[task]
-            if not preset:
-                raise gr.Error('Please select a model.')
-            preset = PRESET_MAP[preset]
-            target_family = TARGET_FAMILY_MAP[target_family]
-            # email_hash = hashlib.sha256(email.encode()).hexdigest()
-            COLUMN_ALIASES.update({
-                'Y': 'Actual Interaction Probability' if task == 'DTI' else 'Actual Binding Affinity',
-                'Y^': 'Predicted Interaction Probability' if task == 'DTI' else 'Predicted Binding Affinity'
-            })
-            # target_family_list = [target_family]
-            # for family in target_family_list:
-            # try:
-            prediction_df = pd.DataFrame()
-            with hydra.initialize(version_base="1.3", config_path="configs", job_name="webserver_inference"):
-                cfg = hydra.compose(
-                    config_name="webserver_inference",
-                    overrides=[f"task={task}",
-                               f"preset={preset}",
-                               f"ckpt_path=resources/checkpoints/{preset}-{task}-{target_family}.ckpt",
-                               f"data.data_file='{str(predict_filepath)}'"])
-                predictions, _ = predict(cfg)
-                predictions = [pd.DataFrame(prediction) for prediction in predictions]
-                prediction_df = pd.concat([prediction_df, pd.concat(predictions, ignore_index=True)])
-                prediction_df.set_index('N', inplace=True)
-                orig_df = pd.read_csv(
-                    predict_filepath,
-                    usecols=lambda x: x not in ['X1', 'ID1', 'Compound', 'Scaffold', 'Scaffold SMILES',
-                                                'X2', 'ID2',
-                                                'Y', 'Y^']
-                )
-                prediction_df = pd.merge(prediction_df, orig_df, left_index=True, right_index=True, how='left')
-                predictions_file = f'temp/{job_id}_predictions.csv'
-                prediction_df.to_csv(predictions_file)
-                return {file_for_report: predictions_file,
-                        run_state: False,
-                        report_upload_flag: False}
-        except Exception as e:
-            gr.Warning(f"Prediction job failed due to error: {str(e)}")
-            return {run_state: False}
-    else:
-        return {run_state: state}
-        #
-        # except Exception as e:
-        #     raise gr.Error(str(e))
-    # email_lock = Path(f"outputs/{email_hash}.lock")
-    # with open(email_lock, "w") as file:
-    #     record = {
-    #         "email": email,
-    #         "job_id": job_id
-    #     }
-    #     json.dump(record, file)
-    # def run_predict():
-    # TODO per-user submit usage
-    #     # email_lock = Path(f"outputs/{email_hash}.lock")
-    #     # with open(email_lock, "w") as file:
-    #     #     record = {
-    #     #         "email": email,
-    #     #         "job_id": job_id
-    #     #     }
-    #     #     json.dump(record, file)
-    #
-    #     job_lock = DATA_PATH / f"outputs/{job_id}.lock"
-    #     with open(job_lock, "w") as file:
-    #         pass
-    #
-    #     try:
-    #         prediction_df = pd.DataFrame()
-    #         for family in target_family_list:
-    #             with hydra.initialize(version_base="1.3", config_path="configs", job_name="webserver_inference"):
-    #                 cfg = hydra.compose(
-    #                     config_name="webserver_inference",
-    #                     overrides=[f"task={task}",
-    #                                f"preset={preset}",
-    #                                f"ckpt_path=resources/checkpoints/{preset}-{task}-{family}.ckpt",
-    #                                f"data.data_file='{str(predict_dataset)}'"])
-    #
-    #             predictions, _ = predict(cfg)
-    #             predictions = [pd.DataFrame(prediction) for prediction in predictions]
-    #             prediction_df = pd.concat([prediction_df, pd.concat(predictions, ignore_index=True)])
-    #         prediction_df.to_csv(f'outputs/{job_id}.csv')
-    #         # email_lock.unlink()
-    #         job_lock.unlink()
-    #
-    #         msg = (f'Your DeepSEQcreen prediction job (id: {job_id}) completed successfully. You may retrieve the '
-    #                f'results and generate an analytical report at {URL} using the job id within 48 hours.')
-    #         gr.Info(msg)
-    #     except Exception as e:
-    #         msg = (f'Your DeepSEQcreen prediction job (id: {job_id}) failed due to an error: "{str(e)}." You may '
-    #                f'reach out to the author about the error through email ([email protected]).')
-    #         raise gr.Error(str(e))
-    #     finally:
-    #         send_email(email, msg)
-    #
-    # # Run "predict" asynchronously
-    # threading.Thread(target=run_predict).start()
-    #
-    # msg = (f'Your DeepSEQcreen prediction job (id: {job_id}) started running. You may retrieve the results '
-    #        f'and generate an analytical report at {URL} using the job id once the job is done. Only one job '
-    #        f'per user is allowed at the same time.')
-    # send_email(email, msg)
-    # # Return the job id first
-    # return [
-    #     gr.Blocks(visible=False),
-    #     gr.Markdown(f"Your prediction job is running... "
-    #                 f"You may stay on this page or come back later to retrieve the results "
-    #                 f"Once you receive our email notification."),
-    # ]
-def update_df(file, progress=gr.Progress(track_tqdm=True)):
-    # global DF_FOR_REPORT
-    if file and Path(file).is_file():
-        df = pd.read_csv(file)
-        if 'N' in df.columns:
-            df.set_index('N', inplace=True)
-        if not any(col in ['X1', 'X2'] for col in df.columns):
-            gr.Warning("At least one of columns `X1` and `X2` must be in the uploaded dataset.")
-            return {analyze_btn: gr.Button(interactive=False)}
-        # if df['X1'].nunique() > 1:
-        if 'X1' in df.columns:
-            df['Scaffold SMILES'] = df['X1'].swifter.progress_bar(
-                desc=f"Calculating scaffold...").apply(MurckoScaffold.MurckoScaffoldSmilesFromSmiles)
-            df['Scaffold'] = df['Scaffold SMILES'].swifter.progress_bar(
-                desc='Generating scaffold graphs...').apply(
-                lambda smiles: _MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
-            # Add a new column with RDKit molecule objects
-            if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
-                df['Compound'] = df['X1'].swifter.progress_bar(
-                    desc='Generating molecular graphs...').apply(
-                    lambda smiles: _MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
-        # DF_FOR_REPORT = df.copy()
-        # pie_chart = None
-        # value = None
-        # if 'Y^' in DF_FOR_REPORT.columns:
-        #     value = 'Y^'
-        # elif 'Y' in DF_FOR_REPORT.columns:
-        #     value = 'Y'
-        # if value:
-        #     if DF_FOR_REPORT['X1'].nunique() > 1 >= DF_FOR_REPORT['X2'].nunique():
-        #         pie_chart = create_pie_chart(DF_FOR_REPORT, category='Scaffold SMILES', value=value, top_k=100)
-        #     elif DF_FOR_REPORT['X2'].nunique() > 1 >= DF_FOR_REPORT['X1'].nunique():
-        #         pie_chart = create_pie_chart(DF_FOR_REPORT, category='Target family', value=value, top_k=100)
-        return {html_report: create_html_report(df),
-                raw_df: df,
-                report_df: df.copy(),
-                analyze_btn: gr.Button(interactive=True)}  # pie_chart
-    else:
-        return {analyze_btn: gr.Button(interactive=False)}
-def create_html_report(df, file=None, task=None, progress=gr.Progress(track_tqdm=True)):
-    df_html = df.copy(deep=True)
-    # email_hash = hashlib.sha256(email.encode()).hexdigest()
-    cols_left = list(pd.Index(
-        ['ID1', 'Compound', 'Scaffold', 'Scaffold SMILES', 'ID2', 'Y', 'Y^']).intersection(df_html.columns))
-    cols_right = list(pd.Index(['X1', 'X2']).intersection(df_html.columns))
-    df_html = df_html[cols_left + (df_html.columns.drop(cols_left + cols_right).tolist()) + cols_right]
-    if isinstance(task, str):
-        task = TASK_MAP[task]
-        COLUMN_ALIASES.update({
-            'Y': 'Actual Interaction Probability' if task == 'DTI' else 'Actual Binding Affinity',
-            'Y^': 'Predicted Interaction Probability' if task == 'DTI' else 'Predicted Binding Affinity'
-        })
-    ascending = True if COLUMN_ALIASES['Y^'] == 'Predicted Binding Affinity' else False
-    df_html = df_html.sort_values(
-        [col for col in ['Y', 'Y^'] if col in df_html.columns], ascending=ascending
-    )
-    if not file:
-        df_html = df_html.iloc[:31]
-    # Remove repeated info for one-against-N tasks to save visual and physical space
-    job = 'Chemical Property'
-    unique_entity = 'Unique Entity'
-    unique_df = None
-    category = None
-    columns_unique = None
-    if 'X1' in df_html.columns and 'X2' in df_html.columns:
-        n_compound = df_html['X1'].nunique()
-        n_protein = df_html['X2'].nunique()
-        if n_compound == 1 and n_protein >= 2:
-            unique_entity = 'Compound of Interest'
-            if any(col in df_html.columns for col in ['Y^', 'Y']):
-                job = 'Target Protein Identification'
-                category = 'Target Family'
-            columns_unique = df_html.columns.isin(['X1', 'ID1', 'Scaffold', 'Compound', 'Scaffold SMILES']
-                                                  + list(FILTER_MAP.keys()) + list(SCORE_MAP.keys()))
-        elif n_compound >= 2 and n_protein == 1:
-            unique_entity = 'Target of Interest'
-            if any(col in df_html.columns for col in ['Y^', 'Y']):
-                job = 'Drug Hit Screening'
-                category = 'Scaffold SMILES'
-            columns_unique = df_html.columns.isin(['X2', 'ID2'])
-        elif 'Y^' in df_html.columns:
-            job = 'Interaction Pair Inference'
-    if 'Compound' in df_html.columns:
-        df_html['Compound'] = df_html['Compound'].swifter.progress_bar(
-            desc='Generating compound graph...').apply(
-            lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
-    if 'Scaffold' in df_html.columns:
-        df_html['Scaffold'] = df_html['Scaffold'].swifter.progress_bar(
-            desc='Generating scaffold graph...').apply(
-            lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
-    df_html.rename(columns=COLUMN_ALIASES, inplace=True)
-    df_html.index.name = 'Index'
-    if 'Target FASTA' in df_html.columns:
-        df_html['Target FASTA'] = df_html['Target FASTA'].swifter.progress_bar(
-            desc='Processing FASTA...').apply(
-            lambda x: wrap_text(x) if not pd.isna(x) else x)
-    num_cols = df_html.select_dtypes('number').columns
-    num_col_colors = sns.color_palette('husl', len(num_cols))
-    bool_cols = df_html.select_dtypes(bool).columns
-    bool_col_colors = {True: 'lightgreen', False: 'lightpink'}
-    if columns_unique is not None:
-        unique_df = df_html.loc[:, columns_unique].iloc[[0]].copy()
-        df_html = df_html.loc[:, ~columns_unique]
-    if not file:
-        if 'Compound ID' in df_html.columns:
-            df_html.drop(['Compound SMILES'], axis=1, inplace=True)
-        if 'Target ID' in df_html.columns:
-            df_html.drop(['Target FASTA'], axis=1, inplace=True)
-        if 'Target FASTA' in df_html.columns:
-            df_html['Target FASTA'] = df_html['Target FASTA'].swifter.progress_bar(
-                desc='Processing FASTA...').apply(
-                lambda x: wrap_text(x) if not pd.isna(x) else x)
-        if 'Scaffold SMILES' in df_html.columns:
-            df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
-        styled_df = df_html.style.format(precision=3)
-        for i, col in enumerate(num_cols):
-            if col in df_html.columns:
-                if col not in ['Predicted Binding Affinity', 'Actual Binding Affinity']:
-                    styled_df = styled_df.background_gradient(
-                        subset=[col], cmap=sns.light_palette(num_col_colors[i], as_cmap=True))
-                else:
-                    styled_df = styled_df.background_gradient(
-                        subset=[col], cmap=sns.light_palette(num_col_colors[i], as_cmap=True).reversed())
-        if any(df_html.columns.isin(bool_cols)):
-            styled_df.applymap(lambda val: f'background-color: {bool_col_colors[val]}', subset=bool_cols)
-        table_html = styled_df.to_html()
-        unique_html = ''
-        if unique_df is not None:
-            if 'Target FASTA' in unique_df.columns:
-                unique_df['Target FASTA'] = unique_df['Target FASTA'].str.replace('\n', '<br>')
-            if any(unique_df.columns.isin(bool_cols)):
-                unique_df = unique_df.style.applymap(
-                    lambda val: f"background-color: {bool_col_colors[val]}", subset=bool_cols)
-            unique_html = (f'<div style="font-family: Courier !important;">'
-                           f'{unique_df.to_html(escape=False, index=False)}</div>')
-        return (f'<div style="font-size: 16px; font-weight: bold;">{job} Report Preview (Top 30 Records)</div>'
-                f'<div style="overflow-x:auto; font-family: Courier !important;">{unique_html}</div>'
-                f'<div style="overflow:auto; height: 300px; font-family: Courier !important;">{table_html}</div>')
-    else:
-        bool_formatters = {col: BooleanFormatter() for col in bool_cols}
-        float_formatters = {col: NumberFormatter(format='0.000') for col in df_html.select_dtypes('floating').columns}
-        other_formatters = {
-            'Predicted Interaction Probability': {'type': 'progress', 'max': 1.0, 'legend': True},
-            'Actual Interaction Probability': {'type': 'progress', 'max': 1.0, 'legend': True},
-            'Compound': HTMLTemplateFormatter(template='<div class="image-zoom-viewer"><%= value %></div>'),
-            'Scaffold': HTMLTemplateFormatter(template='<div class="image-zoom-viewer"><%= value %></div>'),
-            'Target FASTA': {'type': 'textarea', 'width': 60},
-            'Target ID': HTMLTemplateFormatter(
-                template='<a href="<% '
-                         'if (/^[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}$/.test(value)) '
-                         '{ %>https://www.uniprot.org/uniprotkb/<%= value %><% } '
-                         'else { %>https://www.uniprot.org/uniprotkb?query=<%= value %><% } '
-                         '%>" target="_blank"><%= value %></a>'),
-            'Compound ID': HTMLTemplateFormatter(
-                template='<a href="https://pubchem.ncbi.nlm.nih.gov/compound/<%= value %>" '
-                         'target="_blank"><%= value %></a>')
-        }
-        formatters = {**bool_formatters, **float_formatters, **other_formatters}
-        # html = df.to_html(file)
-        # return html
-        report_table = pn.widgets.Tabulator(
-            df_html, formatters=formatters,
-            frozen_columns=['Index', 'Target ID', 'Compound ID', 'Compound', 'Scaffold'],
-            disabled=True, sizing_mode='stretch_both', pagination='local', page_size=30)
-        for i, col in enumerate(num_cols):
-            if col not in ['Predicted Binding Affinity', 'Actual Binding Affinity']:
-                if col not in ['Predicted Interaction Probability', 'Actual Interaction Probability']:
-                    report_table.style.background_gradient(
-                        subset=df_html.columns == col, cmap=sns.light_palette(num_col_colors[i], as_cmap=True))
-                else:
-                    continue
-            else:
-                report_table.style.background_gradient(
-                    subset=df_html.columns == col, cmap=sns.light_palette(num_col_colors[i], as_cmap=True).reversed())
-        pie_charts = {}
-        for y in df_html.columns.intersection(['Predicted Interaction Probability', 'Actual Interaction Probability',
-                                               'Predicted Binding Affinity', 'Actual Binding Affinity']):
-            pie_charts[y] = []
-            for k in [10, 30, 100]:
-                if k < len(df_html):
-                    pie_charts[y].append(create_pie_chart(df_html, category=category, value=y, top_k=k))
-            pie_charts[y].append(create_pie_chart(df_html, category=category, value=y, top_k=len(df_html)))
-        # Remove keys with empty values
-        pie_charts = {k: v for k, v in pie_charts.items() if any(v)}
-        pn_css = """
-        .tabulator {
-            font-family: Courier New !important;
-            font-weight: normal !important;
-            font-size: 12px !important;
-        }
-        .tabulator-cell {
-            overflow: visible !important;
-        }
-        .tabulator-cell:hover {
-            z-index: 1000 !important;
-        }
-        .tabulator-cell.tabulator-frozen:hover {
-            z-index: 1000 !important;
-        }
-        .image-zoom-viewer {
-            display: inline-block;
-            overflow: visible;
-            z-index: 1000;
-        }
-        .image-zoom-viewer::after {
-            content: "";
-            top: 0;
-            left: 0;
-            width: 100%;
-            height: 100%;
-            pointer-events: none;
-        }
-        .image-zoom-viewer:hover::after {
-            pointer-events: all;
-        }
-        /* When hovering over the container, scale its child (the SVG) */
-        .tabulator-cell:hover .image-zoom-viewer svg {
-            padding: 3px;
-            position: absolute;
-            background-color: rgba(250, 250, 250, 0.854);
-            box-shadow: 0 0 10px rgba(0, 0, 0, 0.618);
-            border-radius: 3px;
-            transform: scale(3); /* Scale up the SVG */
-            transition: transform 0.3s ease;
-            pointer-events: none; /* Prevents the SVG from blocking mouse interactions */
-            z-index: 1000;
-        }
-        .image-zoom-viewer svg {
-            display: block; /* SVG is a block-level element for proper scaling */
-            z-index: 1000;
-        }
-        .image-zoom-viewer:hover {
-            z-index: 1000;
-        }
-        """
-        pn.extension(raw_css=[pn_css])
-        template = pn.template.VanillaTemplate(
-            title=f'DeepSEQreen {job} Report',
-            sidebar=[],
-            favicon='deepseqreen.svg',
-            logo='deepseqreen.svg',
-            header_background='#F3F5F7',
-            header_color='#4372c4',
-            busy_indicator=None,
-        )
-        stats_pane = pn.Row()
-        if unique_df is not None:
-            unique_table = pn.widgets.Tabulator(unique_df, formatters=formatters, sizing_mode='stretch_width',
-                                                show_index=False, disabled=True,
-                                                frozen_columns=['Compound ID', 'Compound', 'Scaffold'])
-            # if pie_charts:
-            #     unique_table.width = 640
-            stats_pane.append(pn.Column(f'### {unique_entity}', unique_table))
-        if pie_charts:
-            for score_name, figure_list in pie_charts.items():
-                stats_pane.append(
-                    pn.Column(f'### {category} by Top {score_name}',
-                              pn.Tabs(*figure_list, tabs_location='above'))
-                    # pn.Card(pn.Row(v), title=f'{category} by Top {k}')
-                )
-        if stats_pane:
-            template.main.append(pn.Card(stats_pane,
-                                         sizing_mode='stretch_width', title='Summary Statistics', margin=10))
-        template.main.append(
-            pn.Card(report_table, title=f'{job} Results',  # width=1200,
-                    margin=10)
-        )
-        template.save(file, resources=INLINE)
-        return file
-def create_pie_chart(df, category, value, top_k):
-    if category not in df or value not in df:
-        return
-    top_k_df = df.nlargest(top_k, value)
-    category_counts = top_k_df[category].value_counts()
-    data = pd.DataFrame({category: category_counts.index, 'value': category_counts.values})
-    data['proportion'] = data['value'] / data['value'].sum()
-    # Merge rows with proportion less than 0.2% into one row
-    mask = data['proportion'] < 0.002
-    if any(mask):
-        merged_row = data[mask].sum()
-        merged_row[category] = '...'
-        data = pd.concat([data[~mask], pd.DataFrame(merged_row).T])
-    data['angle'] = data['proportion'] * 2 * pi
-    color_dict = {cat: color for cat, color in
-                  zip(df[category].unique(),
-                      (Category20c_20 * (len(df[category].unique()) // 20 + 1))[:len(df[category].unique())])}
-    color_dict['...'] = '#636363'
-    data['color'] = data[category].map(color_dict)
-    tooltips = [
-        (f"{category}", f"@{{{category}}}"),
-        ("Count", "@value"),
-        ("Percentage", "@proportion{0.0%}")
-    ]
-    if category == 'Scaffold SMILES':
-        data = data.merge(top_k_df[['Scaffold SMILES', 'Scaffold']].drop_duplicates(), how='left',
-                          left_on='Scaffold SMILES', right_on='Scaffold SMILES')
-        tooltips.append(("Scaffold", "<div>@{Scaffold}{safe}</div>"))
-    p = figure(height=384, width=960, name=f"Top {top_k}" if top_k < len(df) else 'All', sizing_mode='stretch_height',
-               toolbar_location=None, tools="hover", tooltips=tooltips, x_range=(-0.4, 0.4))
-    def truncate_label(label, max_length=60):
-        return label if len(label) <= max_length else label[:max_length] + "..."
-    data['legend_field'] = data[category].apply(truncate_label)
-    p.add_layout(Legend(padding=0, margin=0), 'right')
-    p.wedge(x=0, y=1, radius=0.3,
-            start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
-            line_color="white", fill_color='color', legend_field='legend_field', source=data)
-    # Limit the number of legend items to 20 and add "..." if there are more than 20 items
-    if len(p.legend.items) > 20:
-        new_legend_items = p.legend.items[:20]
-        new_legend_items.append(LegendItem(label="..."))
-        p.legend.items = new_legend_items
-    p.legend.label_text_font_size = "10pt"
-    p.legend.label_text_font="courier"
-    p.axis.axis_label = None
-    p.axis.visible = False
-    p.grid.grid_line_color = None
-    p.outline_line_width = 0
-    p.min_border = 0
-    p.margin = 0
-    return p
-def submit_report(df, score_list, filter_list, task, progress=gr.Progress(track_tqdm=True)):
-    df_report = df.copy()
-    try:
-        for filter_name in filter_list:
-            df_report[filter_name] = df_report['Compound'].swifter.progress_bar(
-                desc=f"Calculating {filter_name}").apply(
-                lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x)
-        for score_name in score_list:
-            df_report[score_name] = df_report['Compound'].swifter.progress_bar(
-                desc=f"Calculating {score_name}").apply(
-                lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x)
-        # pie_chart = None
-        # value = None
-        # if 'Y^' in df.columns:
-        #     value = 'Y^'
-        # elif 'Y' in df.columns:
-        #     value = 'Y'
-        #
-        # if value:
-        #     if df['X1'].nunique() > 1 >= df['X2'].nunique():
-        #         pie_chart = create_pie_chart(df, category='Scaffold SMILES', value=value, top_k=100)
-        #     elif df['X2'].nunique() > 1 >= df['X1'].nunique():
-        #         pie_chart = create_pie_chart(df, category='Target family', value=value, top_k=100)
-        return (create_html_report(df_report, file=None, task=task), df_report,
-                gr.File(visible=False), gr.File(visible=False))
-    except Exception as e:
-        gr.Warning(f'Failed to report results due to error: {str(e)}')
-        return None, None, None, None
-# def check_job_status(job_id):
-#     job_lock = DATA_PATH / f"{job_id}.lock"
-#     job_file = DATA_PATH / f"{job_id}.csv"
-#     if job_lock.is_file():
-#         return {gr.Markdown(f"Your job ({job_id}) is still running... "
-#                             f"You may stay on this page or come back later to retrieve the results "
-#                             f"Once you receive our email notification."),
-#                 None,
-#                 None
-#                 }
-#     elif job_file.is_file():
-#         return {gr.Markdown(f"Your job ({job_id}) is done! Redirecting you to generate reports..."),
-#                 gr.Tabs(selected=3),
-#                 gr.File(str(job_lock))}
-def wrap_text(text, line_length=60):
-    if isinstance(text, str):
-        wrapper = textwrap.TextWrapper(width=line_length)
-        if text.startswith('>'):
-            sections = text.split('>')
-            wrapped_sections = []
-            for section in sections:
-                if not section:
-                    continue
-                lines = section.split('\n')
-                seq_header = lines[0]
-                wrapped_seq = wrapper.fill(''.join(lines[1:]))
-                wrapped_sections.append(f">{seq_header}\n{wrapped_seq}")
-            return '\n'.join(wrapped_sections)
-        else:
-            return wrapper.fill(text)
-    else:
-        return text
-def unwrap_text(text):
-    return text.strip.replece('\n', '')
-def drug_library_from_sdf(sdf_path):
-    return PandasTools.LoadSDF(
-        sdf_path,
-        smilesName='X1', molColName='Compound', includeFingerprints=True
-    )
-def process_target_library_upload(library_upload):
-    if library_upload.endswith('.csv'):
-        df = pd.read_csv(library_upload)
-    elif library_upload.endswith('.fasta'):
-        df = target_library_from_fasta(library_upload)
-    else:
-        raise gr.Error('Currently only CSV and FASTA files are supported as target libraries.')
-    validate_columns(df, ['X2'])
-    return df
-def process_drug_library_upload(library_upload):
-    if library_upload.endswith('.csv'):
-        df = pd.read_csv(library_upload)
-    elif library_upload.endswith('.sdf'):
-        df = drug_library_from_sdf(library_upload)
-    else:
-        raise gr.Error('Currently only CSV and SDF files are supported as drug libraries.')
-    validate_columns(df, ['X1'])
-    return df
-def target_library_from_fasta(fasta_path):
-    records = list(SeqIO.parse(fasta_path, "fasta"))
-    id2 = [record.id for record in records]
-    seq = [str(record.seq) for record in records]
-    df = pd.DataFrame({'ID2': id2, 'X2': seq})
-    return df
-theme = gr.themes.Base(spacing_size="sm", text_size='md').set(
-    background_fill_primary='#dfe6f0',
-    background_fill_secondary='#dfe6f0',
-    checkbox_label_background_fill='#dfe6f0',
-    checkbox_label_background_fill_hover='#dfe6f0',
-    checkbox_background_color='white',
-    checkbox_border_color='#4372c4',
-    border_color_primary='#4372c4',
-    border_color_accent='#4372c4',
-    button_primary_background_fill='#4372c4',
-    button_primary_text_color='white',
-    button_secondary_border_color='#4372c4',
-    body_text_color='#4372c4',
-    block_title_text_color='#4372c4',
-    block_label_text_color='#4372c4',
-    block_info_text_color='#505358',
-    block_border_color=None,
-    input_border_color='#4372c4',
-    panel_border_color='#4372c4',
-    input_background_fill='white',
-    code_background_fill='white',
-)
-with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS) as demo:
-    run_state = gr.State(value=False)
-    screen_flag = gr.State(value=False)
-    identify_flag = gr.State(value=False)
-    infer_flag = gr.State(value=False)
-    report_upload_flag = gr.State(value=False)
-    with gr.Tabs() as tabs:
-        with gr.TabItem(label='Drug Hit Screening', id=0):
-            gr.Markdown('''
-                # <center>Drug Hit Screening</center>
-                <center>
-                To predict interactions or binding affinities of a single target against a compound library.
-                </center>
-                    ''')
-            with gr.Blocks() as screen_block:
-                with gr.Column() as screen_page:
-                    with gr.Row():
-                        with gr.Column():
-                            HelpTip(
-                                "Enter (paste) a amino acid sequence below manually or upload a FASTA file. "
-                                "If multiple entities are in the FASTA, only the first will be used. "
-                                "Alternatively, enter a Uniprot ID or gene symbol with organism and click Query for "
-                                "the sequence."
-                            )
-                            target_input_type = gr.Dropdown(
-                                label='Step 1. Select Target Input Type and Input',
-                                choices=['Sequence', 'UniProt ID', 'Gene symbol'],
-                                info='Enter (paste) a FASTA string below manually or upload a FASTA file.',
-                                value='Sequence',
-                                scale=4, interactive=True
-                            )
-                    with gr.Row():
-                        target_id = gr.Textbox(show_label=False, visible=False,
-                                               interactive=True, scale=4,
-                                               info='Enter a UniProt ID and query.')
-                        target_gene = gr.Textbox(
-                            show_label=False, visible=False,
-                            interactive=True, scale=4,
-                            info='Enter a gene symbol and query.')
-                        target_organism = gr.Textbox(
-                            info='Organism scientific name (default: Homo sapiens).',
-                            placeholder='Homo sapiens', show_label=False,
-                            visible=False, interactive=True, scale=4, )
-                        target_upload_btn = gr.UploadButton(label='Upload a FASTA File', type='binary',
-                                                            visible=True, variant='primary',
-                                                            size='lg')
-                        target_paste_markdown = gr.Button(value='OR Paste Your Sequence Below', visible=True)
-                        target_query_btn = gr.Button(value='Query the Sequence', variant='primary',
-                                                     visible=False, scale=4)
-                    # with gr.Row():
-                    #     example_uniprot = gr.Button(value='Example: Q16539', elem_classes='example', visible=False)
-                    #     example_gene = gr.Button(value='Example: MAPK14', elem_classes='example', visible=False)
-                    example_fasta = gr.Button(value='Example: MAPK14 (Q16539)', elem_classes='example')
-                    target_fasta = gr.Code(label='Input or Display FASTA', interactive=True, lines=5)
-                    # with gr.Row():
-                    #     with gr.Column():
-                    # with gr.Column():
-                    #     gr.File(label='Example FASTA file',
-                    #             value='data/examples/MAPK14.fasta', interactive=False)
-                    with gr.Row():
-                        with gr.Column():
-                            HelpTip(
-                                "Click Auto-detect to identify the protein family using sequence alignment. "
-                                "This optional step allows applying a family-specific model instead of a all-family "
-                                "model (general). "
-                                "Manually select general if the alignment results are unsatisfactory."
-                            )
-                            drug_screen_target_family = gr.Dropdown(
-                                choices=list(TARGET_FAMILY_MAP.keys()),
-                                value='General',
-                                label='Step 2. Select Target Family (Optional)', interactive=True)
-                            # with gr.Column(scale=1, min_width=24):
-                    with gr.Row():
-                        with gr.Column():
-                            target_family_detect_btn = gr.Button(value='OR Let Us Auto-Detect for You',
-                                                                 variant='primary')
-                    with gr.Row():
-                        with gr.Column():
-                            HelpTip(
-                                "Select a preset compound library (e.g., DrugBank). "
-                                "Alternatively, upload a CSV file with a column named X1 containing compound SMILES, "
-                                "or use an SDF file (Max. 10,000 compounds per task). Example CSV and SDF files are "
-                                "provided below and can be downloaded by clicking the lower right corner."
-                            )
-                            drug_library = gr.Dropdown(label='Step 3. Select a Preset Compound Library',
-                                                       choices=list(DRUG_LIBRARY_MAP.keys()))
-                            with gr.Row():
-                                gr.File(label='Example SDF compound library',
-                                        value='data/examples/compound_library.sdf', interactive=False)
-                                gr.File(label='Example CSV compound library',
-                                        value='data/examples/compound_library.csv', interactive=False)
-                            drug_library_upload_btn = gr.UploadButton(
-                                label='OR Upload Your Own Library', variant='primary')
-                            drug_library_upload = gr.File(label='Custom compound library file', visible=False)
-                    with gr.Row():
-                        with gr.Column():
-                            HelpTip(
-                                "Interaction prediction provides you binding probability score between the target of "
-                                "interest and each compound in the library, "
-                                "while affinity prediction directly estimates their binding strength measured using "
-                                "IC50."
-                            )
-                            drug_screen_task = gr.Dropdown(
-                                list(TASK_MAP.keys()),
-                                label='Step 4. Select the Prediction Task You Want to Conduct',
-                                value='Compound-protein interaction')
-                    with gr.Row():
-                        with gr.Column():
-                            HelpTip(
-                                "Select your preferred model, or click Recommend for the best-performing model based "
-                                "on the selected task, family, and whether the target was trained. "
-                                "Please refer to documentation for detailed benchamrk results."
-                            )
-                            drug_screen_preset = gr.Dropdown(list(PRESET_MAP.keys()),
-                                                             label='Step 5. Select a Preset Model')
-                            screen_preset_recommend_btn = gr.Button(
-                                value='OR Let Us Recommend for You', variant='primary')
-                    with gr.Row():
-                        with gr.Column():
-                            drug_screen_email = gr.Textbox(
-                                label='Step 6. Input Your Email Address (Optional)',
-                                info="Your email address will be used to notify you about the completion of your job."
-                            )
-                    with gr.Row(visible=True):
-                        with gr.Column():
-                            # drug_screen_clr_btn = gr.ClearButton(size='lg')
-                            drug_screen_btn = gr.Button(value='SUBMIT THE SCREENING JOB', variant='primary', size='lg')
-                    # TODO Modify the pd df directly with df['X2'] = target
-            screen_data_for_predict = gr.File(visible=False, file_count="single", type='filepath')
-            screen_waiting = gr.Markdown("""
-            <center>Your job is running... It might take a few minutes.
-            When it's done, you will be redirected to the report page.
-            Meanwhile, please leave the page on.</center>
-            """, visible=False)
-        with gr.TabItem(label='Target protein identification', id=1):
-            gr.Markdown('''
-                # <center>Target Protein Identification</center>
-                <center>
-                To predict interactions or binding affinities of a single compound against a protein library.
-                </center>
-                ''')
-            with gr.Blocks() as identify_block:
-                with gr.Column() as identify_page:
-                    with gr.Row():
-                        with gr.Column():
-                            HelpTip(
-                                "Enter (paste) a compound SMILES below manually or upload a SDF file. "
-                                "If multiple entities are in the SDF, only the first will be used. "
-                                "SMILES can be obtained by searching for the compound of interest in databases such "
-                                "as NCBI, PubChem and and ChEMBL."
-                            )
-                            compound_type = gr.Dropdown(
-                                label='Step 1. Select Compound Input Type and Input',
-                                choices=['SMILES', 'SDF'],
-                                info='Enter (paste) an SMILES string or upload an SDF file to convert to SMILES.',
-                                value='SMILES',
-                                interactive=True)
-                            compound_upload_btn = gr.UploadButton(label='OR Upload a SDF File', variant='primary',
-                                                                  type='binary', visible=False)
-                    compound_smiles = gr.Code(label='Input or Display Compound SMILES', interactive=True, lines=5)
-                    example_drug = gr.Button(value='Example: Aspirin', elem_classes='example')
-                    with gr.Row():
-                        with gr.Column():
-                            HelpTip(
-                                "By default, models trained on all protein families (general) will be applied. "
-                                # "If the proteins in the target library of interest all belong to the same protein "
-                                # "family, manually selecting the family is supported."
-                            )
-                            target_identify_target_family = gr.Dropdown(choices=['General'],
-                                                                        value='General',
-                                                                        label='Step 2. Select Target Family ('
-                                                                              'Optional)')
-                    with gr.Row():
-                        with gr.Column():
-                            HelpTip(
-                                "Select a preset target library (e.g., ChEMBL33_human_proteins). "
-                                "Alternatively, upload a CSV file with a column named X2 containing target protein "
-                                "sequences, or use an FASTA file (Max. 10,000 targets per task). "
-                                "Example CSV and SDF files are provided below "
-                                "and can be downloaded by clicking the lower right corner."
-                            )
-                            target_library = gr.Dropdown(label='Step 3. Select a Preset Target Library',
-                                                         choices=list(TARGET_LIBRARY_MAP.keys()))
-                            with gr.Row():
-                                gr.File(label='Example FASTA target library',
-                                        value='data/examples/target_library.fasta', interactive=False)
-                                gr.File(label='Example CSV target library',
-                                        value='data/examples/target_library.csv', interactive=False)
-                            target_library_upload_btn = gr.UploadButton(
-                                label='OR Upload Your Own Library', variant='primary')
-                            target_library_upload = gr.File(label='Custom target library file', visible=False)
-                    with gr.Row():
-                        with gr.Column():
-                            HelpTip(
-                                "Interaction prediction provides you binding probability score between the target of "
-                                "interest and each compound in the library, "
-                                "while affinity prediction directly estimates their binding strength measured using "
-                                "IC50."
-                            )
-                            target_identify_task = gr.Dropdown(
-                                list(TASK_MAP.keys()),
-                                label='Step 4. Select the Prediction Task You Want to Conduct',
-                                value='Compound-protein interaction')
-                    with gr.Row():
-                        with gr.Column():
-                            HelpTip(
-                                "Select your preferred model, or click Recommend for the best-performing model based "
-                                "on the selected task, family, and whether the compound was trained. "
-                                "Please refer to documentation for detailed benchamrk results."
-                            )
-                            target_identify_preset = gr.Dropdown(list(PRESET_MAP.keys()),
-                                                                 label='Step 5. Select a Preset Model')
-                            identify_preset_recommend_btn = gr.Button(value='OR Let Us Recommend for You',
-                                                                      variant='primary')
-                    with gr.Row():
-                        with gr.Column():
-                            target_identify_email = gr.Textbox(
-                                label='Step 6. Input Your Email Address (Optional)',
-                                info="Your email address will be used to notify you about the completion of your job."
-                            )
-                    with gr.Row(visible=True):
-                        # target_identify_clr_btn = gr.ClearButton(size='lg')
-                        target_identify_btn = gr.Button(value='SUBMIT THE IDENTIFICATION JOB', variant='primary',
-                                                        size='lg')
-            identify_data_for_predict = gr.File(visible=False, file_count="single", type='filepath')
-            identify_waiting = gr.Markdown(f"Your job is running... It might take a few minutes."
-                                           f"When it's done, you will be redirected to the report page. "
-                                           f"Meanwhile, please leave the page on.",
-                                           visible=False)
-        with gr.TabItem(label='Interaction pair inference', id=2):
-            gr.Markdown('''
-# <center>Interaction Pair Inference</center>
-<center>To predict interactions or binding affinities between up to 10,000 paired compound-protein data.</center>
-''')
-            with gr.Blocks() as infer_block:
-                with gr.Column() as infer_page:
-                    HelpTip(
-                        "A custom interation pair dataset can be a CSV file with 2 required columns "
-                        "(X1 for smiles and X2 for sequences) "
-                        "and optionally 2 ID columns (ID1 for compound ID and ID2 for target ID), "
-                        "or generated from a FASTA file containing multiple "
-                        "sequences and a SDF file containing multiple compounds. "
-                        "Currently, a maximum of 10,000 pairs is supported, "
-                        "which means that the size of CSV file or "
-                        "the product of the two library sizes should not exceed 10,000."
-                    )
-                    infer_type = gr.Dropdown(
-                        choices=['Upload a CSV file containing paired compound-protein data',
-                                 'Upload a compound library and a target library'],
-                        label='Step 1. Select Pair Input Type and Input',
-                        value='Upload a CSV file containing paired compound-protein data')
-                    with gr.Column() as pair_upload:
-                        gr.File(label="Example CSV dataset",
-                                value="data/examples/interaction_pair_inference.csv",
-                                interactive=False)
-                        with gr.Row():
-                            infer_csv_prompt = gr.Button(value="Upload Your Own Dataset Below",
-                                                         visible=True)
-                        with gr.Column():
-                            infer_data_for_predict = gr.File(
-                                label='Upload CSV File Containing Paired Records',
-                                file_count="single", type='filepath', visible=True)
-                    with gr.Column(visible=False) as pair_generate:
-                        with gr.Row():
-                            gr.File(label='Example SDF compound library',
-                                    value='data/examples/compound_library.sdf', interactive=False)
-                            gr.File(label='Example FASTA target library',
-                                    value='data/examples/target_library.fasta', interactive=False)
-                        with gr.Row():
-                            gr.File(label='Example CSV compound library',
-                                    value='data/examples/compound_library.csv', interactive=False)
-                            gr.File(label='Example CSV target library',
-                                    value='data/examples/target_library.csv', interactive=False)
-                        with gr.Row():
-                            infer_library_prompt = gr.Button(value="Upload Your Own Libraries Below",
-                                                             visible=False)
-                        with gr.Row():
-                            infer_drug = gr.File(label='Upload SDF/CSV File Containing Multiple Compounds',
-                                                 file_count="single", type='filepath')
-                            infer_target = gr.File(label='Upload FASTA/CSV File Containing Multiple Targets',
-                                                   file_count="single", type='filepath')
-                    with gr.Row():
-                        with gr.Column():
-                            HelpTip(
-                                "By default, models trained on all protein families (general) will be applied. "
-                                "If the proteins in the target library of interest "
-                                "all belong to the same protein family, manually selecting the family is supported."
-                            )
-                            pair_infer_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
-                                                                   value='General',
-                                                                   label='Step 2. Select Target Family (Optional)')
-                    with gr.Row():
-                        with gr.Column():
-                            HelpTip(
-                                "Interaction prediction provides you binding probability score "
-                                "between the target of interest and each compound in the library, "
-                                "while affinity prediction directly estimates their binding strength "
-                                "measured using IC50."
-                            )
-                            pair_infer_task = gr.Dropdown(
-                                list(TASK_MAP.keys()),
-                                label='Step 3. Select the Prediction Task You Want to Conduct',
-                                value='Compound-protein interaction')
-                    with gr.Row():
-                        with gr.Column():
-                            HelpTip("Select your preferred model. "
-                                    "Please refer to documentation for detailed benchmark results."
-                                    )
-                            pair_infer_preset = gr.Dropdown(list(PRESET_MAP.keys()),
-                                                            label='Step 4. Select a Preset Model')
-                            # infer_preset_recommend_btn = gr.Button(value='OR Let Us Recommend for You',
-                            #                                        variant='primary')
-                    with gr.Row():
-                        pair_infer_email = gr.Textbox(
-                            label='Step 5. Input Your Email Address (Optional)',
-                            info="Your email address will be used to notify you about the completion of your job."
-                        )
-                    with gr.Row(visible=True):
-                        # pair_infer_clr_btn = gr.ClearButton(size='lg')
-                        pair_infer_btn = gr.Button(value='SUBMIT THE INFERENCE JOB', variant='primary', size='lg')
-            infer_waiting = gr.Markdown(f"Your job is running... It might take a few minutes."
-                                        f"When it's done, you will be redirected to the report page. "
-                                        f"Meanwhile, please leave the page on.",
-                                        visible=False)
-        with gr.TabItem(label='Chemical property report', id=3):
-            with gr.Blocks() as report:
-                gr.Markdown('''
-                # <center>Chemical Property Report</center>
-                To compute chemical properties for the predictions of drug hit screening,
-                target protein identification, and interaction pair inference.
-                You may also upload your own dataset using a CSV file containing
-                one required column `X1` for compound SMILES.
-                The page shows only a preview report displaying at most 30 records
-                (with top predicted CPI/CPA if reporting results from a prediction job).
-                Please first `Preview` the report, then `Generate` and download a CSV report
-                or an interactive HTML report below if you wish to access the full report.
-                ''')
-                with gr.Row():
-                    with gr.Column():
-                        file_for_report = gr.File(interactive=True, type='filepath')
-                        report_task = gr.Dropdown(list(TASK_MAP.keys()), visible=False, value=None,
-                                                  label='Specify the Task for the Labels in the Upload Dataset')
-                    raw_df = gr.State(value=pd.DataFrame())
-                    report_df = gr.State(value=pd.DataFrame())
-                    scores = gr.CheckboxGroup(list(SCORE_MAP.keys()), label='Scores')
-                    filters = gr.CheckboxGroup(list(FILTER_MAP.keys()), label='Filters')
-                with gr.Row():
-                    # clear_btn = gr.ClearButton(size='lg')
-                    analyze_btn = gr.Button('Preview Top 30 Records', variant='primary', size='lg',
-                                            interactive=False)
-                with gr.Row():
-                    with gr.Column(scale=3):
-                        html_report = gr.HTML()  # label='Results', visible=True)
-                    ranking_pie_chart = gr.Plot(visible=False)
-                with gr.Row():
-                    with gr.Column():
-                        csv_generate = gr.Button(value='Generate CSV Report',
-                                                 interactive=False, variant='primary')
-                        csv_download_file = gr.File(label='Download CSV Report', visible=False)
-                    with gr.Column():
-                        html_generate = gr.Button(value='Generate HTML Report',
-                                                  interactive=False, variant='primary')
-                        html_download_file = gr.File(label='Download HTML Report', visible=False)
-    def target_input_type_select(input_type):
-        match input_type:
-            case 'UniProt ID':
-                return [gr.Dropdown(info=''),
-                        gr.UploadButton(visible=False),
-                        gr.Textbox(visible=True, value=''),
-                        gr.Textbox(visible=False, value=''),
-                        gr.Textbox(visible=False, value=''),
-                        gr.Button(visible=True),
-                        gr.Code(value=''),
-                        gr.Button(visible=False)]
-            case 'Gene symbol':
-                return [gr.Dropdown(info=''),
-                        gr.UploadButton(visible=False),
-                        gr.Textbox(visible=False, value=''),
-                        gr.Textbox(visible=True, value=''),
-                        gr.Textbox(visible=True, value=''),
-                        gr.Button(visible=True),
-                        gr.Code(value=''),
-                        gr.Button(visible=False)]
-            case 'Sequence':
-                return [gr.Dropdown(info='Enter (paste) a FASTA string below manually or upload a FASTA file.'),
-                        gr.UploadButton(visible=True),
-                        gr.Textbox(visible=False, value=''),
-                        gr.Textbox(visible=False, value=''),
-                        gr.Textbox(visible=False, value=''),
-                        gr.Button(visible=False),
-                        gr.Code(value=''),
-                        gr.Button(visible=True)]
-    target_input_type.select(
-        fn=target_input_type_select,
-        inputs=target_input_type,
-        outputs=[
-            target_input_type, target_upload_btn,
-            target_id, target_gene, target_organism, target_query_btn,
-            target_fasta, target_paste_markdown
-        ],
-        show_progress=False
-    )
-    def uniprot_query(input_type, uid, gene, organism='Human'):
-        fasta_seq = ''
-        match input_type:
-            case 'UniProt ID':
-                query = f"{uid.strip()}.fasta"
-            case 'Gene symbol':
-                organism = organism if organism else 'Human'
-                query = f'search?query=organism_name:{organism.strip()}+AND+gene:{gene.strip()}&format=fasta'
-        try:
-            fasta = SESSION.get(UNIPROT_ENDPOINT.format(query=query))
-            fasta.raise_for_status()
-            fasta_seq = fasta.text
-        except Exception as e:
-            raise gr.Warning(f"Failed to query FASTA from UniProt database due to {str(e)}")
-        finally:
-            return fasta_seq
-    def process_fasta_upload(fasta_upload):
-        fasta = ''
-        try:
-            fasta = fasta_upload.decode()
-        except Exception as e:
-            gr.Warning(f"Please upload a valid FASTA file. Error: {str(e)}")
-            return fasta
-    target_upload_btn.upload(fn=process_fasta_upload, inputs=target_upload_btn, outputs=target_fasta)
-    target_query_btn.click(uniprot_query,
-                           inputs=[target_input_type, target_id, target_gene, target_organism],
-                           outputs=target_fasta)
-    def target_family_detect(fasta, progress=gr.Progress(track_tqdm=True)):
-        aligner = PairwiseAligner(scoring='blastp', mode='local')
-        alignment_df = pd.read_csv('data/target_libraries/ChEMBL33_all_spe_single_prot_info.csv')
-        def align_score(query):
-            return aligner.align(process_target_fasta(fasta), query).score
-        alignment_df['score'] = alignment_df['X2'].swifter.progress_bar(
-            desc="Detecting protein family of the target...").apply(align_score)
-        row = alignment_df.loc[alignment_df['score'].idxmax()]
-        return gr.Dropdown(value=row['protein_family'].capitalize(),
-                           info=f"Reason: Best BLASTP score ({row['score']}) "
-                                f"with {row['ID2']} from family {row['protein_family']}")
-    target_family_detect_btn.click(fn=target_family_detect, inputs=target_fasta, outputs=drug_screen_target_family)
-    # target_fasta.focus(fn=wrap_text, inputs=target_fasta, outputs=target_fasta, show_progress=False)
-    target_fasta.blur(fn=wrap_text, inputs=target_fasta, outputs=target_fasta, show_progress=False)
-    drug_library_upload_btn.upload(fn=lambda x: [
-        x.name, gr.Dropdown(value=Path(x.name).name, choices=list(DRUG_LIBRARY_MAP.keys()) + [Path(x.name).name])
-    ], inputs=drug_library_upload_btn, outputs=[drug_library_upload, drug_library])
-    def example_fill(input_type):
-        return {target_id: 'Q16539',
-                target_gene: 'MAPK14',
-                target_organism: 'Human',
-                target_fasta: """
->sp|Q16539|MK14_HUMAN Mitogen-activated protein kinase 14 OS=Homo sapiens OX=9606 GN=MAPK14 PE=1 SV=3
-MSQERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGLRVAVKKLSRPFQ
-SIIHAKRTYRELRLLKHMKHENVIGLLDVFTPARSLEEFNDVYLVTHLMGADLNNIVKCQ
-KLTDDHVQFLIYQILRGLKYIHSADIIHRDLKPSNLAVNEDCELKILDFGLARHTDDEMT
-GYVATRWYRAPEIMLNWMHYNQTVDIWSVGCIMAELLTGRTLFPGTDHIDQLKLILRLVG
-TPGAELLKKISSESARNYIQSLTQMPKMNFANVFIGANPLAVDLLEKMLVLDSDKRITAA
-QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
-"""}
-    example_fasta.click(fn=example_fill, inputs=target_input_type, outputs=[
-        target_id, target_gene, target_organism, target_fasta], show_progress=False)
-    # example_uniprot.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
-    # example_gene.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
-    def screen_recommend_model(fasta, family, task):
-        task = TASK_MAP[task]
-        score = TASK_METRIC_MAP[task]
-        benchmark_df = pd.read_csv(f'data/benchmarks/{task}_test_metrics.csv')
-        if not fasta:
-            gr.Warning('Please enter a valid FASTA for model recommendation.')
-            return None
-        if family == 'General':
-            seen_targets = pd.read_csv(
-                f'data/benchmarks/seen_targets/all_families_full_{task.lower()}_random_split.csv')
-            if process_target_fasta(fasta) in seen_targets['X2'].values:
-                scenario = "Seen Target"
-            else:
-                scenario = "Unseen Target"
-            filtered_df = benchmark_df[(benchmark_df['Family'] == 'All Families')
-                                       & (benchmark_df['Scenario'] == scenario)
-                                       & (benchmark_df['Type'] == 'General')]
-        else:
-            seen_targets_general = pd.read_csv(
-                f'data/benchmarks/seen_targets/all_families_full_{task.lower()}_random_split.csv')
-            if process_target_fasta(fasta) in seen_targets_general['X2'].values:
-                scenario_general = "Seen Target"
-            else:
-                scenario_general = "Unseen Target"
-            seen_targets_family = pd.read_csv(
-                f'data/benchmarks/seen_targets/{TARGET_FAMILY_MAP[family]}_{task.lower()}_random_split.csv')
-            if process_target_fasta(fasta) in seen_targets_family['X2'].values:
-                scenario_family = "Seen Target"
-            else:
-                scenario_family = "Unseen Target"
-            filtered_df_general = benchmark_df[(benchmark_df['Family'] == family)
-                                               & (benchmark_df['Scenario'] == scenario_general)
-                                               & (benchmark_df['Type'] == 'General')]
-            filtered_df_family = benchmark_df[(benchmark_df['Family'] == family)
-                                              & (benchmark_df['Scenario'] == scenario_family)
-                                              & (benchmark_df['Type'] == 'Family')]
-            filtered_df = pd.concat([filtered_df_general, filtered_df_family])
-        row = filtered_df.loc[filtered_df[score].idxmax()]
-        return gr.Dropdown(value=row['Model'],
-                           info=f"Reason: {row['Scenario']} in training; we recommend the model "
-                                f"with the best {score} ({float(row[score]):.3f}) "
-                                f"in the {row['Scenario']} scenario on {row['Family']}.")
-    screen_preset_recommend_btn.click(fn=screen_recommend_model,
-                                      inputs=[target_fasta, drug_screen_target_family, drug_screen_task],
-                                      outputs=drug_screen_preset)
-    def compound_input_type_select(input_type):
-        match input_type:
-            case 'SMILES':
-                return gr.Button(visible=False)
-            case 'SDF':
-                return gr.Button(visible=True)
-    compound_type.select(fn=compound_input_type_select,
-                         inputs=compound_type, outputs=compound_upload_btn, show_progress=False)
-    def compound_upload_process(input_type, input_upload):
-        smiles = ''
-        try:
-            match input_type:
-                case 'SMILES':
-                    smiles = input_upload.decode()
-                case 'SDF':
-                    suppl = Chem.ForwardSDMolSupplier(io.BytesIO(input_upload))
-                    smiles = Chem.MolToSmiles(next(suppl))
-        except Exception as e:
-            gr.Warning(f"Please upload a valid {input_type} file. Error: {str(e)}")
-        return smiles
-    compound_upload_btn.upload(fn=compound_upload_process,
-                               inputs=[compound_type, compound_upload_btn],
-                               outputs=compound_smiles)
-    example_drug.click(fn=lambda: 'CC(=O)Oc1ccccc1C(=O)O', outputs=compound_smiles, show_progress=False)
-    target_library_upload_btn.upload(fn=lambda x: [
-        x.name, gr.Dropdown(value=Path(x.name).name, choices=list(TARGET_LIBRARY_MAP.keys()) + [Path(x.name).name])
-    ], inputs=target_library_upload_btn, outputs=[target_library_upload, target_library])
-    def identify_recommend_model(smiles, task):
-        task = TASK_MAP[task]
-        score = TASK_METRIC_MAP[task]
-        benchmark_df = pd.read_csv(f'data/benchmarks/{task}_test_metrics.csv')
-        if not smiles:
-            gr.Warning('Please enter a valid SMILES for model recommendation.')
-            return None
-        seen_drugs = pd.read_csv(
-            f'data/benchmarks/seen_drugs/all_families_full_{task.lower()}_random_split.csv')
-        if rdkit_canonicalize(smiles) in seen_drugs['X1'].values:
-            scenario = "Seen Compound"
-        else:
-            scenario = "Unseen Compound"
-        filtered_df = benchmark_df[(benchmark_df['Family'] == 'All Families')
-                                   & (benchmark_df['Scenario'] == scenario)
-                                   & (benchmark_df['Type'] == 'General')]
-        row = filtered_df.loc[filtered_df[score].idxmax()]
-        return gr.Dropdown(value=row['Model'],
-                           info=f"Reason: {scenario} in training; choosing the model "
-                                f"with the best {score} ({float(row[score]):3f}) "
-                                f"in the {scenario} scenario.")
-    identify_preset_recommend_btn.click(fn=identify_recommend_model,
-                                        inputs=[compound_smiles, target_identify_task],
-                                        outputs=target_identify_preset)
-    def infer_type_change(upload_type):
-        match upload_type:
-            case "Upload a compound library and a target library":
-                return {
-                    pair_upload: gr.Column(visible=False),
-                    pair_generate: gr.Column(visible=True),
-                    infer_data_for_predict: None,
-                    infer_drug: None,
-                    infer_target: None,
-                    infer_csv_prompt: gr.Button(visible=False),
-                    infer_library_prompt: gr.Button(visible=True),
-                }
-        match upload_type:
-            case "Upload a CSV file containing paired compound-protein data":
-                return {
-                    pair_upload: gr.Column(visible=True),
-                    pair_generate: gr.Column(visible=False),
-                    infer_data_for_predict: None,
-                    infer_drug: None,
-                    infer_target: None,
-                    infer_csv_prompt: gr.Button(visible=True),
-                    infer_library_prompt: gr.Button(visible=False),
-                }
-    infer_type.select(fn=infer_type_change, inputs=infer_type,
-                      outputs=[pair_upload, pair_generate, infer_data_for_predict, infer_drug, infer_target,
-                               infer_csv_prompt, infer_library_prompt])
-    def drug_screen_validate(fasta, library, library_upload, state, progress=gr.Progress(track_tqdm=True)):
-        if not state:
-            try:
-                fasta = process_target_fasta(fasta)
-                err = validate_seq_str(fasta, FASTA_PAT)
-                if err:
-                    raise ValueError(f'Found error(s) in your target fasta input: {err}')
-                if library in DRUG_LIBRARY_MAP.keys():
-                    screen_df = pd.read_csv(Path('data/drug_libraries', DRUG_LIBRARY_MAP[library]))
-                else:
-                    screen_df = process_drug_library_upload(library_upload)
-                    if len(screen_df) >= CUSTOM_DATASET_MAX_LEN:
-                        raise gr.Error(f'The uploaded compound library has more records '
-                                       f'than the allowed maximum (CUSTOM_DATASET_MAX_LEN).')
-                screen_df['X2'] = fasta
-                job_id = uuid4()
-                temp_file = Path(f'temp/{job_id}_input.csv').resolve()
-                screen_df.to_csv(temp_file, index=False)
-                if temp_file.is_file():
-                    return {screen_data_for_predict: str(temp_file),
-                            screen_flag: job_id,
-                            run_state: job_id}
-                else:
-                    raise SystemError('Failed to create temporary files. Please try again later.')
-            except Exception as e:
-                gr.Warning(f'Failed to submit the job due to error: {str(e)}')
-                return {screen_flag: False,
-                        run_state: False}
-        else:
-            gr.Warning('You have another prediction job '
-                       '(drug hit screening, target protein identification, or interation pair inference) '
-                       'running in the session right now. '
-                       'Please submit another job when your current job has finished.')
-            return {screen_flag: False,
-                    run_state: state}
-    def target_identify_validate(smiles, library, library_upload, state, progress=gr.Progress(track_tqdm=True)):
-        if not state:
-            try:
-                smiles = smiles.strip()
-                err = validate_seq_str(smiles, SMILES_PAT)
-                if err:
-                    raise ValueError(f'Found error(s) in your target fasta input: {err}')
-                if library in TARGET_LIBRARY_MAP.keys():
-                    identify_df = pd.read_csv(Path('data/target_libraries', TARGET_LIBRARY_MAP[library]))
-                else:
-                    identify_df = process_target_library_upload(library_upload)
-                    if len(identify_df) >= CUSTOM_DATASET_MAX_LEN:
-                        raise gr.Error(f'The uploaded target library has more records '
-                                       f'than the allowed maximum (CUSTOM_DATASET_MAX_LEN).')
-                identify_df['X1'] = smiles
-                job_id = uuid4()
-                temp_file = Path(f'temp/{job_id}_input.csv').resolve()
-                identify_df.to_csv(temp_file, index=False)
-                if temp_file.is_file():
-                    return {identify_data_for_predict: str(temp_file),
-                            identify_flag: job_id,
-                            run_state: job_id}
-                else:
-                    raise SystemError('Failed to create temporary files. Please try again later.')
-            except Exception as e:
-                gr.Warning(f'Failed to submit the job due to error: {str(e)}')
-                return {identify_flag: False,
-                        run_state: False}
-        else:
-            gr.Warning('You have another prediction job '
-                       '(drug hit screening, target protein identification, or interation pair inference) '
-                       'running in the session right now. '
-                       'Please submit another job when your current job has finished.')
-            return {identify_flag: False,
-                    run_state: state}
-            # return {identify_flag: False}
-    def pair_infer_validate(drug_target_pair_upload, drug_upload, target_upload, state,
-                            progress=gr.Progress(track_tqdm=True)):
-        if not state:
-            try:
-                job_id = uuid4()
-                if drug_target_pair_upload:
-                    infer_df = pd.read_csv(drug_target_pair_upload)
-                    validate_columns(infer_df, ['X1', 'X2'])
-                    infer_df['X1_ERR'] = infer_df['X1'].swifter.progress_bar(desc="Validating SMILES...").apply(
-                        validate_seq_str, regex=SMILES_PAT)
-                    if not infer_df['X1_ERR'].isna().all():
-                        raise ValueError(
-                            f"Encountered invalid SMILES:\n{infer_df[~infer_df['X1_ERR'].isna()][['X1', 'X1_ERR']]}")
-                    infer_df['X2_ERR'] = infer_df['X2'].swifter.progress_bar(desc="Validating FASTA...").apply(
-                        validate_seq_str, regex=FASTA_PAT)
-                    if not infer_df['X2_ERR'].isna().all():
-                        raise ValueError(
-                            f"Encountered invalid FASTA:\n{infer_df[~infer_df['X2_ERR'].isna()][['X2', 'X2_ERR']]}")
-                    return {infer_data_for_predict: str(drug_target_pair_upload),
-                            infer_flag: job_id,
-                            run_state: job_id}
-                elif drug_upload and target_upload:
-                    drug_df = process_drug_library_upload(drug_upload)
-                    target_df = process_target_library_upload(target_upload)
-                    drug_df.drop_duplicates(subset=['X1'], inplace=True)
-                    target_df.drop_duplicates(subset=['X2'], inplace=True)
-                    infer_df = pd.DataFrame(list(itertools.product(drug_df['X1'], target_df['X2'])),
-                                            columns=['X1', 'X2'])
-                    infer_df = infer_df.merge(drug_df, on='X1').merge(target_df, on='X2')
-                    temp_file = Path(f'temp/{job_id}_input.csv').resolve()
-                    infer_df.to_csv(temp_file, index=False)
-                    if temp_file.is_file():
-                        return {infer_data_for_predict: str(temp_file),
-                                infer_flag: job_id,
-                                run_state: job_id}
-                else:
-                    raise gr.Error('Should upload a compound-protein pair dataset,or '
-                                   'upload both a compound library and a target library.')
-                if len(infer_df) >= CUSTOM_DATASET_MAX_LEN:
-                    raise gr.Error(f'The uploaded/generated compound-protein pair dataset has more records '
-                                   f'than the allowed maximum (CUSTOM_DATASET_MAX_LEN).')
-            except Exception as e:
-                gr.Warning(f'Failed to submit the job due to error: {str(e)}')
-                return {infer_flag: False,
-                        run_state: False}
-        else:
-            gr.Warning('You have another prediction job '
-                       '(drug hit screening, target protein identification, or interation pair inference) '
-                       'running in the session right now. '
-                       'Please submit another job when your current job has finished.')
-            return {infer_flag: False,
-                    run_state: state}
-    drug_screen_btn.click(
-        fn=drug_screen_validate,
-        inputs=[target_fasta, drug_library, drug_library_upload, run_state],  # , drug_screen_email],
-        outputs=[screen_data_for_predict, screen_flag, run_state]
-    ).then(
-        fn=lambda: [gr.Column(visible=False), gr.Markdown(visible=True)],
-        outputs=[screen_page, screen_waiting]
-    ).then(
-        fn=submit_predict,
-        inputs=[screen_data_for_predict, drug_screen_task, drug_screen_preset,
-                drug_screen_target_family, screen_flag, run_state],  # , drug_screen_email],
-        outputs=[file_for_report, run_state, report_upload_flag]
-    ).then(
-        fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False), gr.Tabs(selected=3)],
-        outputs=[screen_page, screen_waiting, tabs]
-    )
-    target_identify_btn.click(
-        fn=target_identify_validate,
-        inputs=[compound_smiles, target_library, target_library_upload, run_state],  # , drug_screen_email],
-        outputs=[identify_data_for_predict, identify_flag, run_state]
-    ).then(
-        fn=lambda: [gr.Column(visible=False), gr.Markdown(visible=True)],
-        outputs=[identify_page, identify_waiting]
-    ).then(
-        fn=submit_predict,
-        inputs=[identify_data_for_predict, target_identify_task, target_identify_preset,
-                target_identify_target_family, identify_flag, run_state],  # , target_identify_email],
-        outputs=[file_for_report, run_state, report_upload_flag]
-    ).then(
-        fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False), gr.Tabs(selected=3)],
-        outputs=[identify_page, identify_waiting, tabs]
-    )
-    pair_infer_btn.click(
-        fn=pair_infer_validate,
-        inputs=[infer_data_for_predict, infer_drug, infer_target, run_state],  # , drug_screen_email],
-        outputs=[infer_data_for_predict, infer_flag, run_state]
-    ).then(
-        fn=lambda: [gr.Column(visible=False), gr.Markdown(visible=True)],
-        outputs=[infer_page, infer_waiting]
-    ).then(
-        fn=submit_predict,
-        inputs=[infer_data_for_predict, pair_infer_task, pair_infer_preset,
-                pair_infer_target_family, infer_flag, run_state],  # , pair_infer_email],
-        outputs=[file_for_report, run_state, report_upload_flag]
-    ).then(
-        fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False), gr.Tabs(selected=3)],
-        outputs=[infer_page, infer_waiting, tabs]
-    )
-    # TODO background job from these 3 pipelines to update file_for_report
-    def inquire_task(df, upload_flag):
-        if upload_flag:
-            if 'Y' in df.columns:
-                label = 'actual CPI/CPA labels (`Y`)'
-            elif 'Y^' in df.columns:
-                label = 'predicted CPI/CPA labels (`Y^`)'
-            else:
-                return {analyze_btn: gr.Button(interactive=True),
-                        csv_generate: gr.Button(interactive=True),
-                        html_generate: gr.Button(interactive=True)}
-            return {report_task: gr.Dropdown(visible=True,
-                                             info=f'Found {label} in your uploaded dataset. '
-                                                  'Is it compound-target interaction or binding affinity?'),
-                    html_report: '',
-                    analyze_btn: gr.Button(interactive=False),
-                    csv_generate: gr.Button(interactive=False),
-                    html_generate: gr.Button(interactive=False)}
-        else:
-            return {report_task: gr.Dropdown(visible=False)}
-    file_for_report.upload(
-        fn=lambda: True, outputs=report_upload_flag
-    )
-    file_for_report.change(fn=update_df, inputs=file_for_report, outputs=[
-        html_report, raw_df, report_df, analyze_btn]).success(
-        fn=lambda: [gr.Button(interactive=False)]*2 + [gr.File(visible=False)]*2 + [gr.Dropdown(visible=False)],
-        outputs=[csv_generate, html_generate, csv_download_file, html_download_file, report_task]
-    ).then(
-        fn=inquire_task, inputs=[raw_df, report_upload_flag],
-        outputs=[report_task, html_report, analyze_btn, csv_generate, html_generate]
-    )
-    file_for_report.clear(fn=lambda: [gr.Dropdown(visible=False, value=None), False],
-                          outputs=[report_task, report_upload_flag])
-    analyze_btn.click(fn=submit_report, inputs=[raw_df, scores, filters, report_task], outputs=[
-        html_report, report_df, csv_download_file, html_download_file
-    ]).success(fn=lambda: [gr.Button(interactive=True)] * 2,
-               outputs=[csv_generate, html_generate])
-    report_task.select(fn=lambda: gr.Button(interactive=True),
-                       outputs=analyze_btn)
-    def create_csv_report_file(df, file_report, progress=gr.Progress(track_tqdm=True)):
-        try:
-            now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
-            filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.csv"
-            df.drop(labels=['Compound', 'Scaffold'], axis=1).to_csv(filename, index=False)
-            return gr.File(filename)
-        except Exception as e:
-            gr.Warning(f"Failed to generate CSV due to error: {str(e)}")
-            return None
-    def create_html_report_file(df, file_report, progress=gr.Progress(track_tqdm=True)):
-        try:
-            now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
-            filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.html"
-            create_html_report(df, filename)
-            return gr.File(filename, visible=True)
-        except Exception as e:
-            gr.Warning(f"Failed to generate HTML due to error: {str(e)}")
-            return None
-    html_report.change(lambda: [gr.Button(visible=True)] * 2, outputs=[csv_generate, html_generate])
-    csv_generate.click(
-        lambda: [gr.Button(visible=False), gr.File(visible=True)], outputs=[csv_generate, csv_download_file],
-    ).then(fn=create_csv_report_file, inputs=[report_df, file_for_report],
-           outputs=csv_download_file, show_progress='full')
-    html_generate.click(
-        lambda: [gr.Button(visible=False), gr.File(visible=True)], outputs=[html_generate, html_download_file],
-    ).then(fn=create_html_report_file, inputs=[report_df, file_for_report],
-           outputs=html_download_file, show_progress='full')
-    # screen_waiting.change(fn=check_job_status, inputs=run_state, outputs=[pair_waiting, tabs, file_for_report],
-    #                       every=5)
-    # identify_waiting.change(fn=check_job_status, inputs=run_state, outputs=[identify_waiting, tabs, file_for_report],
-    #                         every=5)
-    # pair_waiting.change(fn=check_job_status, inputs=run_state, outputs=[pair_waiting, tabs, file_for_report],
-    #                     every=5)
-    # demo.load(None, None, None, js="() => {document.body.classList.remove('dark')}")
-if __name__ == "__main__":
-    screen_block.queue(max_size=3)
-    identify_block.queue(max_size=3)
-    infer_block.queue(max_size=3)
-    report.queue(max_size=3)
-    # SCHEDULER.add_job(func=file_cleanup(), trigger="interval", seconds=60)
-    # SCHEDULER.start()
-    demo.launch(
-        show_api=False,
-    )

+from email.utils import formatdate, make_msgid
+from email.mime.multipart import MIMEMultipart
+from email.mime.text import MIMEText
+import smtplib
+from markdown import markdown
+def send_email(receiver, job_info):
+    email_serv = "smtpdm.aliyun.com" # "ciddr-lab.ac.cn" # "srvsmtp.xjtlu.edu.cn"
+    email_port = 80 # 1025 # 587 # 25
+    email_addr = "[email protected]"
+    email_pass = "ciddrw447JkpB"
+    email_form = """
+Dear user,
+Your DeepSEQreen job is {status}.
+**Job details:**
+ - Job id: {id}
+ - Job type: {type}
+ - Start time: {start_time}
+ - End time: {end_time}
+ - Expiry time: {expiry_time}
+ - Error: {error}
+Please visit the [DeepSEQreen web server](https://www.ciddr-lab.ac.cn/deepseqreen/) to check the job status or retrieve the results.
+Best,
+CIDDR Team
 """
+    server = smtplib.SMTP(email_serv, email_port)
+    # server.starttls()
+    server.login(email_addr, email_pass)
+    msg = MIMEMultipart("alternative")
+    msg["From"] = email_addr
+    msg["To"] = receiver
+    msg["Subject"] = f"DeepSEQreen Job {job_info['status']}: {job_info['id']}"
+    msg["Date"] = formatdate(localtime=True)
+    msg["Message-ID"] = make_msgid()
+    msg.attach(MIMEText(markdown(email_form.format(**job_info)), 'html'))
+    msg.attach(MIMEText(email_form.format(**job_info), 'plain'))
+    server.sendmail(email_addr, receiver, msg.as_string())
+    server.quit()
+send_email('xinran.[email protected]', {'id': 'a1b2c3d', 'type': 'Drug Hit Screening', 'status': 'RUNNING', 'start_time': '2021-10-10 10:00:00', 'end_time': 'TBD', 'expiry_time': 'TBD', 'error': 'TBD'})