Spaces:
Running
Running
File size: 3,167 Bytes
27e2770 dfecb5b 27e2770 79b6488 dfecb5b 5a6f640 27e2770 dfecb5b e9e46e1 dfecb5b 20d73a0 dfecb5b 27e2770 79b6488 dfecb5b 27e2770 cdbbabd e9e46e1 20d73a0 cdbbabd dfecb5b 79b6488 dfecb5b 79b6488 dfecb5b 79b6488 dfecb5b 79b6488 dfecb5b 79b6488 dfecb5b db68005 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import gradio as gr
from get_index import get_engine
from protein_viz import render_html
index_repo = "ronig/siamese_protein_index"
model_repo = "ronig/protein_search_engine"
engine = get_engine(index_repo, model_repo)
def search_and_display(seq, n_res):
n_res = int(limit_n_results(n_res))
search_res = engine.search_by_sequence(seq, n=n_res)
results_options = update_dropdown_menu(search_res)
formatted_search_results = format_search_results(search_res)
return formatted_search_results, results_options
def limit_n_results(n):
return max(min(n, 20), 1)
def update_dropdown_menu(search_res):
choices = [
','.join([row['pdb_name'], row['chain_id']])
for row in search_res
]
return gr.Dropdown.update(
choices=choices, interactive=True, value=choices[0], visible=True
)
def format_search_results(raw_search_results):
formatted_search_results = {}
for res in raw_search_results:
prot = res["pdb_name"]
chain = res["chain_id"]
value = res["score"]
genes = ",".join(res["gene_ids"])
key = f"PDB: {prot} | Chain: {chain}"
if genes != "Unknown":
key += f" | Gene: {genes}"
formatted_search_results[key] = value
return formatted_search_results
def switch_viz(new_choice):
pdb_id, chain = new_choice.split(',')
title_update = gr.Markdown.update(visible=True)
return render_html(pdb_id=pdb_id, chain=chain), title_update
with gr.Blocks() as demo:
with gr.Column():
gr.Markdown("""
# Protein Binding Search Engine
This application examines all files uploaded to [PDB](https://www.rcsb.org/) to find the chains with which a given protein sequence is most likely to bind.
If the results are linked to a specific gene, their IDs will also be displayed.
""")
with gr.Column():
with gr.Row():
with gr.Column():
seq_input = gr.Textbox(
value="KFLIYQMECSTMIFGL",
label="Input Sequence"
)
n_results = gr.Number(5, label="N Results")
search_button = gr.Button("Search", variant='primary')
search_results = gr.Label(num_top_classes=20, label="Search Results")
viz_header = gr.Markdown("## Visualization", visible=False)
results_selector = gr.Dropdown(
choices=[], multiselect=False, visible=False,
label="Visualized Search Result"
)
protein_viz = gr.HTML(
value=render_html(pdb_id=None, chain=None),
label="Protein Visualization"
)
gr.Examples([
"KFLIYQMECSTMIFGL",
"PHFAMPPIHEDHLE",
"AEERIISLD"
], inputs=[seq_input])
search_button.click(
search_and_display,
inputs=[seq_input, n_results],
outputs=[search_results, results_selector]
)
results_selector.change(
switch_viz, inputs=results_selector, outputs=[protein_viz, viz_header]
)
demo.launch()
|