File size: 5,232 Bytes
27e2770
 
1d11011
6509a73
1d11011
27e2770
 
 
6509a73
 
97ca4c0
 
 
 
 
27e2770
 
6509a73
79b6488
6509a73
79b6488
 
 
 
 
 
 
 
 
 
 
b2a3d53
 
6509a73
 
 
 
 
 
 
 
 
 
 
 
79b6488
 
dfecb5b
 
1d11011
 
 
 
 
dfecb5b
 
27e2770
 
1d11011
6509a73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d11011
 
 
3380f3c
 
743e6bd
3380f3c
1d11011
 
 
6509a73
 
 
 
 
 
79b6488
6509a73
 
 
 
 
 
 
 
 
b2a3d53
6509a73
b2a3d53
6509a73
 
 
dfecb5b
 
27e2770
cdbbabd
97ca4c0
dfecb5b
 
 
6509a73
dfecb5b
6509a73
 
4bde526
6509a73
 
 
 
b2a3d53
dfecb5b
79b6488
 
b2a3d53
 
 
 
79b6488
1d25e2a
dfecb5b
79b6488
b2a3d53
 
 
94ce4d7
b092b28
dfecb5b
 
 
6509a73
b2a3d53
79b6488
 
b2a3d53
dfecb5b
 
b2a3d53
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import gradio as gr

from concurrency import execute_multithread
from get_index import get_engines
from protein_viz import get_gene_name, get_protein_name, render_html

index_repo = "ronig/siamese_protein_index"
model_repo = "ronig/protein_search_engine"
engines = get_engines(index_repo, model_repo)
available_indexes = list(engines.keys())
app_description = """
# Protein Binding Search Engine
This application enables a quick protein-peptide binding search based on sequences. 
You can use it to search the full [PDB](https://www.rcsb.org/) database or in a specific organism genome.
"""


def search_and_display(seq, n_res, index_selection):
    n_res = int(limit_n_results(n_res))
    engine = engines[index_selection]
    search_res = engine.search_by_sequence(seq, n=n_res)
    results_options = update_dropdown_menu(search_res)
    formatted_search_results = format_search_results(search_res)
    return formatted_search_results, results_options


def limit_n_results(n):
    return max(min(n, 20), 1)


def update_dropdown_menu(search_res):
    choices = []
    for row in search_res:
        if "pdb_name" in row and "chain_id" in row:
            choice = ".".join([row["pdb_name"], row["chain_id"]])
            choices.append(choice)
    if choices:
        update = gr.Dropdown.update(
            choices=choices, interactive=True, value=choices[0], visible=True
        )
    else:
        update = gr.Dropdown.update(
            choices=choices, interactive=True, visible=False, value=None
        )
    return update


def format_search_results(raw_search_results):
    formatted_search_results = {}
    for key, value in execute_multithread(
        func=format_search_result,
        inputs=({"raw_result": res} for res in raw_search_results),
        n_workers=len(raw_search_results),
    ):
        formatted_search_results[key] = value
    return formatted_search_results


def format_search_result(raw_result):
    is_pdb = "pdb_name" in raw_result
    if is_pdb:
        key, value = parse_pdb_search_result(raw_result)
    else:
        key, value = parse_fasta_search_result(raw_result)
    return key, value


def parse_fasta_search_result(raw_result):
    gene = parse_gene_from_fasta_entry(raw_result["description"])
    key = f"Gene: {gene}"
    value = raw_result["score"]
    return key, value


def parse_pdb_search_result(raw_result):
    prot = raw_result["pdb_name"]
    chain = raw_result["chain_id"]
    value = raw_result["score"]
    gene_name, species = get_gene_name(pdb_id=prot, chain_id=chain)
    key = f"PDB: {prot}.{chain}"
    if gene_name is not None:
        key += f" | Gene: {gene_name} | Organism: {species}"
    return key, value


def parse_gene_from_fasta_entry(description):
    after = description.split("GN=")[1]
    gene = after.split(" ")[0]
    return gene


def switch_viz(new_choice):
    if new_choice is None:
        html = ""
        title_update = gr.Markdown.update(visible=False)
        description_update = gr.Markdown.update(value=None, visible=False)
    else:
        choice_parts = new_choice.split(".")
        pdb_id, chain = choice_parts[0], choice_parts[1]
        title_update = gr.Markdown.update(visible=True)
        protein_name = get_protein_name(pdb_id)

        new_value = f"""**PDB Title**: {protein_name}"""

        description_update = gr.Markdown.update(value=new_value, visible=True)
        html = render_html(pdb_id=pdb_id, chain=chain)
    return html, title_update, description_update


with gr.Blocks() as demo:
    with gr.Column():
        gr.Markdown(app_description)
        with gr.Column():
            with gr.Row():
                with gr.Column():
                    seq_input = gr.Textbox(value="APTMPPPLPP", label="Input Sequence")
                    n_results = gr.Number(5, label="N Results")
                    index_selector = gr.Dropdown(
                        choices=available_indexes,
                        value="Pdb",
                        multiselect=False,
                        visible=True,
                        label="Index",
                    )
                    search_button = gr.Button("Search", variant="primary")
                search_results = gr.Label(num_top_classes=20, label="Search Results")
            viz_header = gr.Markdown("## Visualization", visible=False)
            results_selector = gr.Dropdown(
                choices=[],
                multiselect=False,
                visible=False,
                label="Visualized Search Result",
            )
            viz_body = gr.Markdown("", visible=False)
            protein_viz = gr.HTML(
                value=render_html(pdb_id=None, chain=None),
                label="Protein Visualization",
            )
            gr.Examples(
                ["APTMPPPLPP", "KFLIYQMECSTMIFGL", "PHFAMPPIHEDHLE", "AEERIISLD"],
                inputs=[seq_input],
            )
    search_button.click(
        search_and_display,
        inputs=[seq_input, n_results, index_selector],
        outputs=[search_results, results_selector],
    )
    results_selector.change(
        switch_viz, inputs=results_selector, outputs=[protein_viz, viz_header, viz_body]
    )

if __name__ == "__main__":
    demo.launch()