Spaces:
Running
Running
roni
commited on
Commit
·
dfecb5b
1
Parent(s):
20d73a0
visualizigin search results
Browse files- app.py +44 -18
- get_index.py +1 -1
- protein_viz.py +21 -0
app.py
CHANGED
@@ -1,32 +1,45 @@
|
|
1 |
import gradio as gr
|
2 |
|
3 |
from get_index import get_engine
|
|
|
4 |
|
5 |
index_repo = "ronig/siamese_protein_index"
|
6 |
model_repo = "ronig/protein_search_engine"
|
7 |
engine = get_engine(index_repo, model_repo)
|
8 |
|
9 |
|
10 |
-
def
|
11 |
-
|
12 |
-
|
13 |
-
outputs = {}
|
14 |
-
for res in search_results:
|
15 |
prot = res["pdb_name"]
|
16 |
chain = res["chain_id"]
|
17 |
value = res["score"]
|
18 |
-
genes =
|
19 |
key = f"PDB: {prot} | Chain: {chain}"
|
20 |
-
if genes !=
|
21 |
key += f" | Gene: {genes}"
|
22 |
-
|
23 |
-
return
|
24 |
|
25 |
|
26 |
def limit_n_results(n):
|
27 |
return max(min(n, 20), 1)
|
28 |
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
with gr.Blocks() as demo:
|
31 |
with gr.Column():
|
32 |
gr.Markdown("""
|
@@ -34,13 +47,26 @@ with gr.Blocks() as demo:
|
|
34 |
This application examines all files uploaded to [PDB](https://www.rcsb.org/) to find the chains with which a given protein sequence is most likely to bind.
|
35 |
If the results are linked to a specific gene, their IDs will also be displayed.
|
36 |
""")
|
37 |
-
with gr.
|
38 |
-
with gr.
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
|
3 |
from get_index import get_engine
|
4 |
+
from protein_viz import render_html
|
5 |
|
6 |
index_repo = "ronig/siamese_protein_index"
|
7 |
model_repo = "ronig/protein_search_engine"
|
8 |
engine = get_engine(index_repo, model_repo)
|
9 |
|
10 |
|
11 |
+
def format_search_results(raw_search_results):
|
12 |
+
formatted_search_results = {}
|
13 |
+
for res in raw_search_results:
|
|
|
|
|
14 |
prot = res["pdb_name"]
|
15 |
chain = res["chain_id"]
|
16 |
value = res["score"]
|
17 |
+
genes = ",".join(res["gene_ids"])
|
18 |
key = f"PDB: {prot} | Chain: {chain}"
|
19 |
+
if genes != "Unknown":
|
20 |
key += f" | Gene: {genes}"
|
21 |
+
formatted_search_results[key] = value
|
22 |
+
return formatted_search_results
|
23 |
|
24 |
|
25 |
def limit_n_results(n):
|
26 |
return max(min(n, 20), 1)
|
27 |
|
28 |
|
29 |
+
def update_html(search_res):
|
30 |
+
first_entry = search_res[0]
|
31 |
+
pdb_id = first_entry['pdb_name']
|
32 |
+
return render_html(pdb_id=pdb_id, chain=first_entry['chain_id'])
|
33 |
+
|
34 |
+
|
35 |
+
def search_and_display(seq, n_res):
|
36 |
+
n_res = int(limit_n_results(n_res))
|
37 |
+
search_res = engine.search_by_sequence(seq, n=n_res)
|
38 |
+
formatted_search_results = format_search_results(search_res)
|
39 |
+
new_html = update_html(search_res)
|
40 |
+
return formatted_search_results, new_html
|
41 |
+
|
42 |
+
|
43 |
with gr.Blocks() as demo:
|
44 |
with gr.Column():
|
45 |
gr.Markdown("""
|
|
|
47 |
This application examines all files uploaded to [PDB](https://www.rcsb.org/) to find the chains with which a given protein sequence is most likely to bind.
|
48 |
If the results are linked to a specific gene, their IDs will also be displayed.
|
49 |
""")
|
50 |
+
with gr.Column():
|
51 |
+
with gr.Row():
|
52 |
+
with gr.Column():
|
53 |
+
seq_input = gr.Textbox(
|
54 |
+
placeholder="KFLIYQMECSTMIFGL",
|
55 |
+
label="Input Sequence"
|
56 |
+
)
|
57 |
+
n_results = gr.Number(5, label="N Results")
|
58 |
+
search_button = gr.Button("Search", variant='primary')
|
59 |
+
search_results = gr.Label(num_top_classes=20, label="Search Results")
|
60 |
+
protein_viz = gr.HTML(
|
61 |
+
value=render_html(pdb_id="2POR", chain='A'),
|
62 |
+
label="Protein Visualization"
|
63 |
+
)
|
64 |
+
gr.Examples(["KFLIYQMECSTMIFGL"], inputs=[seq_input])
|
65 |
+
search_button.click(
|
66 |
+
search_and_display,
|
67 |
+
inputs=[seq_input, n_results],
|
68 |
+
outputs=[search_results, protein_viz]
|
69 |
+
)
|
70 |
+
|
71 |
+
if __name__ == "__main__":
|
72 |
demo.launch()
|
get_index.py
CHANGED
@@ -19,5 +19,5 @@ def get_engine(index_repo: str, model_repo: str):
|
|
19 |
)
|
20 |
sys.path.append(str(local_arch_path))
|
21 |
from protein_index import ProteinSearchEngine # pylint: disable=import-error,import-outside-toplevel
|
22 |
-
|
23 |
return ProteinSearchEngine(data_path=index_path)
|
|
|
19 |
)
|
20 |
sys.path.append(str(local_arch_path))
|
21 |
from protein_index import ProteinSearchEngine # pylint: disable=import-error,import-outside-toplevel
|
22 |
+
index_path = Path("/data/indexes/pdb")
|
23 |
return ProteinSearchEngine(data_path=index_path)
|
protein_viz.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def render_html(pdb_id, chain):
|
2 |
+
html = f"""
|
3 |
+
"<html>
|
4 |
+
<header>
|
5 |
+
<script src="https://3Dmol.org/build/3Dmol-min.js"></script>
|
6 |
+
<script src="https://3Dmol.org/build/3Dmol.ui-min.js"></script>
|
7 |
+
</header>
|
8 |
+
<body>
|
9 |
+
<div style="height: 400px; position: relative;" class="viewer_3Dmoljs"
|
10 |
+
data-pdb="{pdb_id}"
|
11 |
+
data-select1="chain:{chain}"
|
12 |
+
data-style1="cartoon:color=spectrum"
|
13 |
+
/>
|
14 |
+
</body>
|
15 |
+
</html>
|
16 |
+
"""
|
17 |
+
iframe = f"""
|
18 |
+
<iframe style="width: 100%; height: 480px" name="protein-vis"
|
19 |
+
frameborder="0" srcdoc='{html}'></iframe>
|
20 |
+
"""
|
21 |
+
return iframe
|