File size: 4,579 Bytes
27e2770
 
6509a73
1694358
27e2770
 
 
6509a73
 
97ca4c0
 
 
 
 
5e7a3eb
27e2770
1694358
6509a73
79b6488
6509a73
79b6488
 
 
 
 
 
 
5e7a3eb
79b6488
 
 
b2a3d53
 
6509a73
 
 
 
 
 
 
 
 
 
 
 
79b6488
 
dfecb5b
 
1694358
 
dfecb5b
 
27e2770
 
6509a73
1d11011
 
 
1694358
 
3380f3c
1694358
 
1d11011
 
 
79b6488
6509a73
 
 
 
 
 
 
 
 
b2a3d53
6509a73
b2a3d53
6509a73
 
 
dfecb5b
 
27e2770
cdbbabd
97ca4c0
dfecb5b
 
 
6509a73
9608c6c
6509a73
 
6441ca8
6509a73
 
 
 
b2a3d53
5e7a3eb
 
 
79b6488
 
b2a3d53
 
 
 
79b6488
1d25e2a
dfecb5b
79b6488
b2a3d53
 
 
94ce4d7
b092b28
dfecb5b
 
 
6509a73
b2a3d53
79b6488
 
b2a3d53
dfecb5b
 
b2a3d53
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import gradio as gr

from get_index import get_engines
from protein_viz import get_protein_name, render_html

index_repo = "ronig/siamese_protein_index"
model_repo = "ronig/protein_search_engine"
engines = get_engines(index_repo, model_repo)
available_indexes = list(engines.keys())
app_description = """
# Protein Binding Search Engine
This application enables a quick protein-peptide binding search based on sequences. 
You can use it to search the full [PDB](https://www.rcsb.org/) database or in a specific organism genome.
"""
max_results = 100


def search_and_display(seq, n_res, index_selection):
    n_res = int(limit_n_results(n_res))
    engine = engines[index_selection]
    search_res = engine.search_by_sequence(seq, n=n_res)
    results_options = update_dropdown_menu(search_res)
    formatted_search_results = format_search_results(search_res)
    return formatted_search_results, results_options


def limit_n_results(n):
    return max(min(n, max_results), 1)


def update_dropdown_menu(search_res):
    choices = []
    for row in search_res:
        if "pdb_name" in row and "chain_id" in row:
            choice = ".".join([row["pdb_name"], row["chain_id"]])
            choices.append(choice)
    if choices:
        update = gr.Dropdown.update(
            choices=choices, interactive=True, value=choices[0], visible=True
        )
    else:
        update = gr.Dropdown.update(
            choices=choices, interactive=True, visible=False, value=None
        )
    return update


def format_search_results(raw_search_results):
    formatted_search_results = {}
    for res in raw_search_results:
        key, value = parse_pdb_search_result(res)
        formatted_search_results[key] = value
    return formatted_search_results


def parse_pdb_search_result(raw_result):
    prot = raw_result["pdb_name"]
    chain = raw_result["chain_id"]
    value = raw_result["score"]
    gene_names = raw_result["genes"]
    species = raw_result["organism"]
    key = f"PDB: {prot}.{chain}"
    if gene_names is not None:
        key += f" | Genes: {gene_names} | Organism: {species}"
    return key, value


def switch_viz(new_choice):
    if new_choice is None:
        html = ""
        title_update = gr.Markdown.update(visible=False)
        description_update = gr.Markdown.update(value=None, visible=False)
    else:
        choice_parts = new_choice.split(".")
        pdb_id, chain = choice_parts[0], choice_parts[1]
        title_update = gr.Markdown.update(visible=True)
        protein_name = get_protein_name(pdb_id)

        new_value = f"""**PDB Title**: {protein_name}"""

        description_update = gr.Markdown.update(value=new_value, visible=True)
        html = render_html(pdb_id=pdb_id, chain=chain)
    return html, title_update, description_update


with gr.Blocks() as demo:
    with gr.Column():
        gr.Markdown(app_description)
        with gr.Column():
            with gr.Row():
                with gr.Column():
                    seq_input = gr.Textbox(value="APTMPPPLPP", label="Input Sequence")
                    n_results = gr.Number(10, label="N Results")
                    index_selector = gr.Dropdown(
                        choices=available_indexes,
                        value="PDB",
                        multiselect=False,
                        visible=True,
                        label="Index",
                    )
                    search_button = gr.Button("Search", variant="primary")
                search_results = gr.Label(
                    num_top_classes=max_results, label="Search Results"
                )
            viz_header = gr.Markdown("## Visualization", visible=False)
            results_selector = gr.Dropdown(
                choices=[],
                multiselect=False,
                visible=False,
                label="Visualized Search Result",
            )
            viz_body = gr.Markdown("", visible=False)
            protein_viz = gr.HTML(
                value=render_html(pdb_id=None, chain=None),
                label="Protein Visualization",
            )
            gr.Examples(
                ["APTMPPPLPP", "KFLIYQMECSTMIFGL", "PHFAMPPIHEDHLE", "AEERIISLD"],
                inputs=[seq_input],
            )
    search_button.click(
        search_and_display,
        inputs=[seq_input, n_results, index_selector],
        outputs=[search_results, results_selector],
    )
    results_selector.change(
        switch_viz, inputs=results_selector, outputs=[protein_viz, viz_header, viz_body]
    )

if __name__ == "__main__":
    demo.launch()