File size: 3,167 Bytes
27e2770
 
 
dfecb5b
27e2770
 
 
 
 
 
79b6488
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dfecb5b
 
 
5a6f640
 
27e2770
dfecb5b
e9e46e1
dfecb5b
20d73a0
dfecb5b
 
27e2770
 
79b6488
 
 
 
dfecb5b
 
27e2770
cdbbabd
 
e9e46e1
 
20d73a0
cdbbabd
dfecb5b
 
 
 
79b6488
dfecb5b
 
 
 
 
79b6488
 
 
 
 
dfecb5b
79b6488
dfecb5b
 
79b6488
 
 
 
 
dfecb5b
 
 
79b6488
 
 
 
dfecb5b
 
db68005
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import gradio as gr

from get_index import get_engine
from protein_viz import render_html

index_repo = "ronig/siamese_protein_index"
model_repo = "ronig/protein_search_engine"
engine = get_engine(index_repo, model_repo)


def search_and_display(seq, n_res):
    n_res = int(limit_n_results(n_res))
    search_res = engine.search_by_sequence(seq, n=n_res)
    results_options = update_dropdown_menu(search_res)
    formatted_search_results = format_search_results(search_res)
    return formatted_search_results, results_options


def limit_n_results(n):
    return max(min(n, 20), 1)


def update_dropdown_menu(search_res):
    choices = [
        ','.join([row['pdb_name'], row['chain_id']])
        for row in search_res
    ]
    return gr.Dropdown.update(
        choices=choices, interactive=True, value=choices[0], visible=True
    )


def format_search_results(raw_search_results):
    formatted_search_results = {}
    for res in raw_search_results:
        prot = res["pdb_name"]
        chain = res["chain_id"]
        value = res["score"]
        genes = ",".join(res["gene_ids"])
        key = f"PDB: {prot} | Chain: {chain}"
        if genes != "Unknown":
            key += f" | Gene: {genes}"
        formatted_search_results[key] = value
    return formatted_search_results


def switch_viz(new_choice):
    pdb_id, chain = new_choice.split(',')
    title_update = gr.Markdown.update(visible=True)
    return render_html(pdb_id=pdb_id, chain=chain), title_update


with gr.Blocks() as demo:
    with gr.Column():
        gr.Markdown("""
        # Protein Binding Search Engine
        This application examines all files uploaded to [PDB](https://www.rcsb.org/) to find the chains with which a given protein sequence is most likely to bind.
        If the results are linked to a specific gene, their IDs will also be displayed.
        """)
        with gr.Column():
            with gr.Row():
                with gr.Column():
                    seq_input = gr.Textbox(
                        value="KFLIYQMECSTMIFGL",
                        label="Input Sequence"
                    )
                    n_results = gr.Number(5, label="N Results")
                    search_button = gr.Button("Search", variant='primary')
                search_results = gr.Label(num_top_classes=20, label="Search Results")
            viz_header = gr.Markdown("## Visualization", visible=False)
            results_selector = gr.Dropdown(
                choices=[], multiselect=False, visible=False,
                label="Visualized Search Result"
            )
            protein_viz = gr.HTML(
                value=render_html(pdb_id=None, chain=None),
                label="Protein Visualization"
            )
            gr.Examples([
                "KFLIYQMECSTMIFGL",
                "PHFAMPPIHEDHLE",
                "AEERIISLD"
            ], inputs=[seq_input])
    search_button.click(
        search_and_display,
        inputs=[seq_input, n_results],
        outputs=[search_results, results_selector]
    )
    results_selector.change(
        switch_viz, inputs=results_selector, outputs=[protein_viz, viz_header]
    )

demo.launch()