Spaces:
Running
Running
File size: 3,945 Bytes
27e2770 b2a3d53 27e2770 79b6488 b2a3d53 79b6488 dfecb5b 5a6f640 27e2770 dfecb5b e9e46e1 dfecb5b 20d73a0 dfecb5b 27e2770 79b6488 b2a3d53 79b6488 1d25e2a b2a3d53 1d25e2a dfecb5b 27e2770 cdbbabd b2a3d53 e9e46e1 b2a3d53 20d73a0 b2a3d53 dfecb5b b2a3d53 dfecb5b b2a3d53 dfecb5b 79b6488 b2a3d53 79b6488 1d25e2a dfecb5b 79b6488 b2a3d53 dfecb5b b2a3d53 79b6488 b2a3d53 dfecb5b b2a3d53 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import gradio as gr
from get_index import get_engine
from protein_viz import get_gene_names, get_protein_name, render_html
index_repo = "ronig/siamese_protein_index"
model_repo = "ronig/protein_search_engine"
engine = get_engine(index_repo, model_repo)
def search_and_display(seq, n_res):
n_res = int(limit_n_results(n_res))
search_res = engine.search_by_sequence(seq, n=n_res)
results_options = update_dropdown_menu(search_res)
formatted_search_results = format_search_results(search_res)
return formatted_search_results, results_options
def limit_n_results(n):
return max(min(n, 20), 1)
def update_dropdown_menu(search_res):
choices = []
for row in search_res:
gene = row["gene_ids"][0]
if gene != "Unknown":
choice_parts = [row["pdb_name"], row["chain_id"], gene]
else:
choice_parts = [row["pdb_name"], row["chain_id"]]
choice = ",".join(choice_parts)
choices.append(choice)
return gr.Dropdown.update(
choices=choices, interactive=True, value=choices[0], visible=True
)
def format_search_results(raw_search_results):
formatted_search_results = {}
for res in raw_search_results:
prot = res["pdb_name"]
chain = res["chain_id"]
value = res["score"]
genes = ",".join(res["gene_ids"])
key = f"PDB: {prot} | Chain: {chain}"
if genes != "Unknown":
key += f" | Gene: {genes}"
formatted_search_results[key] = value
return formatted_search_results
def switch_viz(new_choice):
choice_parts = new_choice.split(",")
pdb_id, chain = choice_parts[0], choice_parts[1]
title_update = gr.Markdown.update(visible=True)
protein_name = get_protein_name(pdb_id)
new_value = f"""**PDB Title**: {protein_name}"""
if len(choice_parts) > 2:
gene = choice_parts[2]
gene_name = get_gene_names([gene])[0]
new_value += f"""\n\n**Gene Name**: {gene_name.title()}"""
description_update = gr.Markdown.update(value=new_value, visible=True)
return render_html(pdb_id=pdb_id, chain=chain), title_update, description_update
with gr.Blocks() as demo:
with gr.Column():
gr.Markdown(
"""
# Protein Binding Search Engine
This application examines all files uploaded to [PDB](https://www.rcsb.org/)
to find the chains with which a given protein sequence is most likely to bind.
If the results are linked to a specific gene, their IDs will also be displayed.
"""
)
with gr.Column():
with gr.Row():
with gr.Column():
seq_input = gr.Textbox(
value="KFLIYQMECSTMIFGL", label="Input Sequence"
)
n_results = gr.Number(5, label="N Results")
search_button = gr.Button("Search", variant="primary")
search_results = gr.Label(num_top_classes=20, label="Search Results")
viz_header = gr.Markdown("## Visualization", visible=False)
results_selector = gr.Dropdown(
choices=[],
multiselect=False,
visible=False,
label="Visualized Search Result",
)
viz_body = gr.Markdown("", visible=False)
protein_viz = gr.HTML(
value=render_html(pdb_id=None, chain=None),
label="Protein Visualization",
)
gr.Examples(
["KFLIYQMECSTMIFGL", "PHFAMPPIHEDHLE", "AEERIISLD"], inputs=[seq_input]
)
search_button.click(
search_and_display,
inputs=[seq_input, n_results],
outputs=[search_results, results_selector],
)
results_selector.change(
switch_viz, inputs=results_selector, outputs=[protein_viz, viz_header, viz_body]
)
if __name__ == "__main__":
demo.launch()
|