File size: 2,453 Bytes
27e2770
 
 
dfecb5b
27e2770
 
 
 
 
 
dfecb5b
 
 
5a6f640
 
27e2770
dfecb5b
e9e46e1
dfecb5b
20d73a0
dfecb5b
 
27e2770
 
 
 
 
 
dfecb5b
 
 
 
 
 
 
 
 
 
 
 
 
 
27e2770
cdbbabd
 
e9e46e1
 
20d73a0
cdbbabd
dfecb5b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5a6f640
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import gradio as gr

from get_index import get_engine
from protein_viz import render_html

index_repo = "ronig/siamese_protein_index"
model_repo = "ronig/protein_search_engine"
engine = get_engine(index_repo, model_repo)


def format_search_results(raw_search_results):
    formatted_search_results = {}
    for res in raw_search_results:
        prot = res["pdb_name"]
        chain = res["chain_id"]
        value = res["score"]
        genes = ",".join(res["gene_ids"])
        key = f"PDB: {prot} | Chain: {chain}"
        if genes != "Unknown":
            key += f" | Gene: {genes}"
        formatted_search_results[key] = value
    return formatted_search_results


def limit_n_results(n):
    return max(min(n, 20), 1)


def update_html(search_res):
    first_entry = search_res[0]
    pdb_id = first_entry['pdb_name']
    return render_html(pdb_id=pdb_id, chain=first_entry['chain_id'])


def search_and_display(seq, n_res):
    n_res = int(limit_n_results(n_res))
    search_res = engine.search_by_sequence(seq, n=n_res)
    formatted_search_results = format_search_results(search_res)
    new_html = update_html(search_res)
    return formatted_search_results, new_html


with gr.Blocks() as demo:
    with gr.Column():
        gr.Markdown("""
        # Protein Binding Search Engine
        This application examines all files uploaded to [PDB](https://www.rcsb.org/) to find the chains with which a given protein sequence is most likely to bind.
        If the results are linked to a specific gene, their IDs will also be displayed.
        """)
        with gr.Column():
            with gr.Row():
                with gr.Column():
                    seq_input = gr.Textbox(
                        placeholder="KFLIYQMECSTMIFGL",
                        label="Input Sequence"
                    )
                    n_results = gr.Number(5, label="N Results")
                    search_button = gr.Button("Search", variant='primary')
                search_results = gr.Label(num_top_classes=20, label="Search Results")
            protein_viz = gr.HTML(
                value=render_html(pdb_id="2POR", chain='A'),
                label="Protein Visualization"
            )
            gr.Examples(["KFLIYQMECSTMIFGL"], inputs=[seq_input])
    search_button.click(
        search_and_display,
        inputs=[seq_input, n_results],
        outputs=[search_results, protein_viz]
    )

if __name__ == "__main__":
    demo.launch()