Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| from fuse.data.tokenizers.modular_tokenizer.op import ModularTokenizerOp | |
| from biomed-multi-alignment.examples.dti_bindingdb_kd.task import DtiBindingdbKdTask | |
| from biomed-multi-alignment.keys import * | |
| from biomed-multi-alignment.model import Mammal | |
| model_paths = dict() | |
| # Protein protein interaction: | |
| ppi = "Protein-Protein Interaction (PPI)" | |
| model_paths[ppi] = "ibm/biomed.omics.bl.sm.ma-ted-458m" | |
| # | |
| dti = "Drug-Target Binding Affinity" | |
| model_paths[dti] = "ibm/biomed.omics.bl.sm.ma-ted-458m.dti_bindingdb_pkd" | |
| # load models (should probably be lazy) | |
| models = dict() | |
| tokenizer_op = dict() | |
| for task, model_path in model_paths.items(): | |
| if task not in models: | |
| models[task] = Mammal.from_pretrained(model_path) | |
| models[task].eval() | |
| # Load Tokenizer | |
| tokenizer_op[task] = ModularTokenizerOp.from_pretrained(model_path) | |
| ### PPI: | |
| # token for positive binding | |
| positive_token_id = tokenizer_op[ppi].get_token_id("<1>") | |
| # Default input proteins | |
| protein_calmodulin = "MADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMISELDQDGFIDKEDLHDGDGKISFEEFLNLVNKEMTADVDGDGQVNYEEFVTMMTSK" | |
| protein_calcineurin = "MSSKLLLAGLDIERVLAEKNFYKEWDTWIIEAMNVGDEEVDRIKEFKEDEIFEEAKTLGTAEMQEYKKQKLEEAIEGAFDIFDKDGNGYISAAELRHVMTNLGEKLTDEEVDEMIRQMWDQNGDWDRIKELKFGEIKKLSAKDTRGTIFIKVFENLGTGVDSEYEDVSKYMLKHQ" | |
| def format_prompt_ppi(prot1, prot2): | |
| # Formatting prompt to match pre-training syntax | |
| return f"<@TOKENIZER-TYPE=AA><BINDING_AFFINITY_CLASS><SENTINEL_ID_0><MOLECULAR_ENTITY><MOLECULAR_ENTITY_GENERAL_PROTEIN><SEQUENCE_NATURAL_START>{prot1}<SEQUENCE_NATURAL_END><MOLECULAR_ENTITY><MOLECULAR_ENTITY_GENERAL_PROTEIN><SEQUENCE_NATURAL_START>{prot2}<SEQUENCE_NATURAL_END><EOS>" | |
| def run_prompt(prompt): | |
| # Create and load sample | |
| sample_dict = dict() | |
| sample_dict[ENCODER_INPUTS_STR] = prompt | |
| # Tokenize | |
| sample_dict = tokenizer_op[ppi]( | |
| sample_dict=sample_dict, | |
| key_in=ENCODER_INPUTS_STR, | |
| key_out_tokens_ids=ENCODER_INPUTS_TOKENS, | |
| key_out_attention_mask=ENCODER_INPUTS_ATTENTION_MASK, | |
| ) | |
| sample_dict[ENCODER_INPUTS_TOKENS] = torch.tensor( | |
| sample_dict[ENCODER_INPUTS_TOKENS] | |
| ) | |
| sample_dict[ENCODER_INPUTS_ATTENTION_MASK] = torch.tensor( | |
| sample_dict[ENCODER_INPUTS_ATTENTION_MASK] | |
| ) | |
| # Generate Prediction | |
| batch_dict = models[ppi].generate( | |
| [sample_dict], | |
| output_scores=True, | |
| return_dict_in_generate=True, | |
| max_new_tokens=5, | |
| ) | |
| # Get output | |
| generated_output = tokenizer_op[ppi]._tokenizer.decode(batch_dict[CLS_PRED][0]) | |
| score = batch_dict["model.out.scores"][0][1][positive_token_id].item() | |
| return generated_output, score | |
| def create_and_run_prompt(protein1, protein2): | |
| prompt = format_prompt_ppi(protein1, protein2) | |
| res = prompt, *run_prompt(prompt=prompt) | |
| return res | |
| def create_ppi_demo(): | |
| markup_text = f""" | |
| # Mammal based Protein-Protein Interaction (PPI) demonstration | |
| Given two protein sequences, estimate if the proteins interact or not. | |
| ### Using the model from | |
| ```{model_paths[ppi]} ``` | |
| """ | |
| with gr.Group() as ppi_demo: | |
| gr.Markdown(markup_text) | |
| with gr.Row(): | |
| prot1 = gr.Textbox( | |
| label="Protein 1 sequence", | |
| # info="standard", | |
| interactive=True, | |
| lines=3, | |
| value=protein_calmodulin, | |
| ) | |
| prot2 = gr.Textbox( | |
| label="Protein 2 sequence", | |
| # info="standard", | |
| interactive=True, | |
| lines=3, | |
| value=protein_calcineurin, | |
| ) | |
| with gr.Row(): | |
| run_mammal = gr.Button( | |
| "Run Mammal prompt for Protein-Protein Interaction", variant="primary" | |
| ) | |
| with gr.Row(): | |
| prompt_box = gr.Textbox(label="Mammal prompt", lines=5) | |
| with gr.Row(): | |
| decoded = gr.Textbox(label="Mammal output") | |
| run_mammal.click( | |
| fn=create_and_run_prompt, | |
| inputs=[prot1, prot2], | |
| outputs=[prompt_box, decoded, gr.Number(label="PPI score")], | |
| ) | |
| with gr.Row(): | |
| gr.Markdown( | |
| "```<SENTINEL_ID_0>``` contains the binding affinity class, which is ```<1>``` for interacting and ```<0>``` for non-interacting" | |
| ) | |
| ppi_demo.visible = False | |
| return ppi_demo | |
| ### DTI: | |
| # input | |
| target_seq = "NLMKRCTRGFRKLGKCTTLEEEKCKTLYPRGQCTCSDSKMNTHSCDCKSC" | |
| drug_seq = "CC(=O)NCCC1=CNc2c1cc(OC)cc2" | |
| # token for positive binding | |
| positive_token_id = tokenizer_op[dti].get_token_id("<1>") | |
| def format_prompt_dti(prot, drug): | |
| sample_dict = {"target_seq": target_seq, "drug_seq": drug_seq} | |
| sample_dict = DtiBindingdbKdTask.data_preprocessing( | |
| sample_dict=sample_dict, | |
| tokenizer_op=tokenizer_op[dti], | |
| target_sequence_key="target_seq", | |
| drug_sequence_key="drug_seq", | |
| norm_y_mean=None, | |
| norm_y_std=None, | |
| device=models[dti].device, | |
| ) | |
| return sample_dict | |
| def create_and_run_prompt_dtb(prot, drug): | |
| sample_dict = format_prompt_dti(prot, drug) | |
| # Post-process the model's output | |
| # batch_dict = model_dti.forward_encoder_only([sample_dict]) | |
| batch_dict = models[dti].forward_encoder_only([sample_dict]) | |
| batch_dict = DtiBindingdbKdTask.process_model_output( | |
| batch_dict, | |
| scalars_preds_processed_key="model.out.dti_bindingdb_kd", | |
| norm_y_mean=5.79384684128215, | |
| norm_y_std=1.33808027428196, | |
| ) | |
| ans = [ | |
| "model.out.dti_bindingdb_kd", | |
| float(batch_dict["model.out.dti_bindingdb_kd"][0]), | |
| ] | |
| res = sample_dict["data.query.encoder_input"], *ans | |
| return res | |
| def create_tdb_demo(): | |
| markup_text = f""" | |
| # Mammal based Target-Drug binding affinity demonstration | |
| Given a protein sequence and a drug (in SMILES), estimate the binding affinity. | |
| ### Using the model from | |
| ```{model_paths[dti]} ``` | |
| """ | |
| with gr.Group() as tdb_demo: | |
| gr.Markdown(markup_text) | |
| with gr.Row(): | |
| prot = gr.Textbox( | |
| label="Protein sequence", | |
| # info="standard", | |
| interactive=True, | |
| lines=3, | |
| value=target_seq, | |
| ) | |
| drug = gr.Textbox( | |
| label="drug sequence (SMILES)", | |
| # info="standard", | |
| interactive=True, | |
| lines=3, | |
| value=drug_seq, | |
| ) | |
| with gr.Row(): | |
| run_mammal = gr.Button( | |
| "Run Mammal prompt for Target Drug Affinity", variant="primary" | |
| ) | |
| with gr.Row(): | |
| prompt_box = gr.Textbox(label="Mammal prompt", lines=5) | |
| with gr.Row(): | |
| decoded = gr.Textbox(label="Mammal output") | |
| run_mammal.click( | |
| fn=create_and_run_prompt_dtb, | |
| inputs=[prot, drug], | |
| outputs=[prompt_box, decoded, gr.Number(label="DTI score")], | |
| ) | |
| tdb_demo.visible = False | |
| return tdb_demo | |
| def create_application(): | |
| with gr.Blocks() as demo: | |
| main_dropdown = gr.Dropdown(choices=["select demo", ppi, dti]) | |
| main_dropdown.interactive = True | |
| ppi_demo = create_ppi_demo() | |
| dtb_demo = create_tdb_demo() | |
| def set_ppi_vis(main_text): | |
| return gr.Group(visible=main_text == ppi), gr.Group( | |
| visible=main_text == dti | |
| ) | |
| main_dropdown.change( | |
| set_ppi_vis, inputs=main_dropdown, outputs=[ppi_demo, dtb_demo] | |
| ) | |
| return demo | |
| def main(): | |
| demo = create_application() | |
| demo.launch(show_error=True, share=True) | |
| if __name__ == "__main__": | |
| main() | |