roni
showing gene info
b2a3d53
raw
history blame
3.95 kB
import gradio as gr
from get_index import get_engine
from protein_viz import get_gene_names, get_protein_name, render_html
index_repo = "ronig/siamese_protein_index"
model_repo = "ronig/protein_search_engine"
engine = get_engine(index_repo, model_repo)
def search_and_display(seq, n_res):
n_res = int(limit_n_results(n_res))
search_res = engine.search_by_sequence(seq, n=n_res)
results_options = update_dropdown_menu(search_res)
formatted_search_results = format_search_results(search_res)
return formatted_search_results, results_options
def limit_n_results(n):
return max(min(n, 20), 1)
def update_dropdown_menu(search_res):
choices = []
for row in search_res:
gene = row["gene_ids"][0]
if gene != "Unknown":
choice_parts = [row["pdb_name"], row["chain_id"], gene]
else:
choice_parts = [row["pdb_name"], row["chain_id"]]
choice = ",".join(choice_parts)
choices.append(choice)
return gr.Dropdown.update(
choices=choices, interactive=True, value=choices[0], visible=True
)
def format_search_results(raw_search_results):
formatted_search_results = {}
for res in raw_search_results:
prot = res["pdb_name"]
chain = res["chain_id"]
value = res["score"]
genes = ",".join(res["gene_ids"])
key = f"PDB: {prot} | Chain: {chain}"
if genes != "Unknown":
key += f" | Gene: {genes}"
formatted_search_results[key] = value
return formatted_search_results
def switch_viz(new_choice):
choice_parts = new_choice.split(",")
pdb_id, chain = choice_parts[0], choice_parts[1]
title_update = gr.Markdown.update(visible=True)
protein_name = get_protein_name(pdb_id)
new_value = f"""**PDB Title**: {protein_name}"""
if len(choice_parts) > 2:
gene = choice_parts[2]
gene_name = get_gene_names([gene])[0]
new_value += f"""\n\n**Gene Name**: {gene_name.title()}"""
description_update = gr.Markdown.update(value=new_value, visible=True)
return render_html(pdb_id=pdb_id, chain=chain), title_update, description_update
with gr.Blocks() as demo:
with gr.Column():
gr.Markdown(
"""
# Protein Binding Search Engine
This application examines all files uploaded to [PDB](https://www.rcsb.org/)
to find the chains with which a given protein sequence is most likely to bind.
If the results are linked to a specific gene, their IDs will also be displayed.
"""
)
with gr.Column():
with gr.Row():
with gr.Column():
seq_input = gr.Textbox(
value="KFLIYQMECSTMIFGL", label="Input Sequence"
)
n_results = gr.Number(5, label="N Results")
search_button = gr.Button("Search", variant="primary")
search_results = gr.Label(num_top_classes=20, label="Search Results")
viz_header = gr.Markdown("## Visualization", visible=False)
results_selector = gr.Dropdown(
choices=[],
multiselect=False,
visible=False,
label="Visualized Search Result",
)
viz_body = gr.Markdown("", visible=False)
protein_viz = gr.HTML(
value=render_html(pdb_id=None, chain=None),
label="Protein Visualization",
)
gr.Examples(
["KFLIYQMECSTMIFGL", "PHFAMPPIHEDHLE", "AEERIISLD"], inputs=[seq_input]
)
search_button.click(
search_and_display,
inputs=[seq_input, n_results],
outputs=[search_results, results_selector],
)
results_selector.change(
switch_viz, inputs=results_selector, outputs=[protein_viz, viz_header, viz_body]
)
if __name__ == "__main__":
demo.launch()