File size: 3,945 Bytes
27e2770
 
 
b2a3d53
27e2770
 
 
 
 
 
79b6488
 
 
 
 
 
 
 
 
 
 
 
 
b2a3d53
 
 
 
 
 
 
 
 
 
79b6488
 
 
 
 
dfecb5b
 
 
5a6f640
 
27e2770
dfecb5b
e9e46e1
dfecb5b
20d73a0
dfecb5b
 
27e2770
 
79b6488
b2a3d53
 
79b6488
1d25e2a
b2a3d53
 
 
 
 
 
 
 
1d25e2a
dfecb5b
 
27e2770
cdbbabd
b2a3d53
 
e9e46e1
b2a3d53
 
20d73a0
b2a3d53
 
dfecb5b
 
 
 
b2a3d53
dfecb5b
 
b2a3d53
dfecb5b
79b6488
 
b2a3d53
 
 
 
79b6488
1d25e2a
dfecb5b
79b6488
b2a3d53
 
 
 
dfecb5b
 
 
 
b2a3d53
79b6488
 
b2a3d53
dfecb5b
 
b2a3d53
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import gradio as gr

from get_index import get_engine
from protein_viz import get_gene_names, get_protein_name, render_html

index_repo = "ronig/siamese_protein_index"
model_repo = "ronig/protein_search_engine"
engine = get_engine(index_repo, model_repo)


def search_and_display(seq, n_res):
    n_res = int(limit_n_results(n_res))
    search_res = engine.search_by_sequence(seq, n=n_res)
    results_options = update_dropdown_menu(search_res)
    formatted_search_results = format_search_results(search_res)
    return formatted_search_results, results_options


def limit_n_results(n):
    return max(min(n, 20), 1)


def update_dropdown_menu(search_res):
    choices = []
    for row in search_res:
        gene = row["gene_ids"][0]
        if gene != "Unknown":
            choice_parts = [row["pdb_name"], row["chain_id"], gene]
        else:
            choice_parts = [row["pdb_name"], row["chain_id"]]
        choice = ",".join(choice_parts)
        choices.append(choice)

    return gr.Dropdown.update(
        choices=choices, interactive=True, value=choices[0], visible=True
    )


def format_search_results(raw_search_results):
    formatted_search_results = {}
    for res in raw_search_results:
        prot = res["pdb_name"]
        chain = res["chain_id"]
        value = res["score"]
        genes = ",".join(res["gene_ids"])
        key = f"PDB: {prot} | Chain: {chain}"
        if genes != "Unknown":
            key += f" | Gene: {genes}"
        formatted_search_results[key] = value
    return formatted_search_results


def switch_viz(new_choice):
    choice_parts = new_choice.split(",")
    pdb_id, chain = choice_parts[0], choice_parts[1]
    title_update = gr.Markdown.update(visible=True)
    protein_name = get_protein_name(pdb_id)

    new_value = f"""**PDB Title**: {protein_name}"""
    if len(choice_parts) > 2:
        gene = choice_parts[2]
        gene_name = get_gene_names([gene])[0]
        new_value += f"""\n\n**Gene Name**: {gene_name.title()}"""

    description_update = gr.Markdown.update(value=new_value, visible=True)
    return render_html(pdb_id=pdb_id, chain=chain), title_update, description_update


with gr.Blocks() as demo:
    with gr.Column():
        gr.Markdown(
            """
        # Protein Binding Search Engine
        This application examines all files uploaded to [PDB](https://www.rcsb.org/) 
        to find the chains with which a given protein sequence is most likely to bind.
        If the results are linked to a specific gene, their IDs will also be displayed.
        """
        )
        with gr.Column():
            with gr.Row():
                with gr.Column():
                    seq_input = gr.Textbox(
                        value="KFLIYQMECSTMIFGL", label="Input Sequence"
                    )
                    n_results = gr.Number(5, label="N Results")
                    search_button = gr.Button("Search", variant="primary")
                search_results = gr.Label(num_top_classes=20, label="Search Results")
            viz_header = gr.Markdown("## Visualization", visible=False)
            results_selector = gr.Dropdown(
                choices=[],
                multiselect=False,
                visible=False,
                label="Visualized Search Result",
            )
            viz_body = gr.Markdown("", visible=False)
            protein_viz = gr.HTML(
                value=render_html(pdb_id=None, chain=None),
                label="Protein Visualization",
            )
            gr.Examples(
                ["KFLIYQMECSTMIFGL", "PHFAMPPIHEDHLE", "AEERIISLD"], inputs=[seq_input]
            )
    search_button.click(
        search_and_display,
        inputs=[seq_input, n_results],
        outputs=[search_results, results_selector],
    )
    results_selector.change(
        switch_viz, inputs=results_selector, outputs=[protein_viz, viz_header, viz_body]
    )

if __name__ == "__main__":
    demo.launch()