roni commited on
Commit
dfecb5b
·
1 Parent(s): 20d73a0

visualizigin search results

Browse files
Files changed (3) hide show
  1. app.py +44 -18
  2. get_index.py +1 -1
  3. protein_viz.py +21 -0
app.py CHANGED
@@ -1,32 +1,45 @@
1
  import gradio as gr
2
 
3
  from get_index import get_engine
 
4
 
5
  index_repo = "ronig/siamese_protein_index"
6
  model_repo = "ronig/protein_search_engine"
7
  engine = get_engine(index_repo, model_repo)
8
 
9
 
10
- def search(seq, n_res):
11
- n_res = int(limit_n_results(n_res))
12
- search_results = engine.search_by_sequence(seq, n=n_res)
13
- outputs = {}
14
- for res in search_results:
15
  prot = res["pdb_name"]
16
  chain = res["chain_id"]
17
  value = res["score"]
18
- genes = ','.join(res["gene_ids"])
19
  key = f"PDB: {prot} | Chain: {chain}"
20
- if genes != 'Unknown':
21
  key += f" | Gene: {genes}"
22
- outputs[key] = value
23
- return outputs
24
 
25
 
26
  def limit_n_results(n):
27
  return max(min(n, 20), 1)
28
 
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  with gr.Blocks() as demo:
31
  with gr.Column():
32
  gr.Markdown("""
@@ -34,13 +47,26 @@ with gr.Blocks() as demo:
34
  This application examines all files uploaded to [PDB](https://www.rcsb.org/) to find the chains with which a given protein sequence is most likely to bind.
35
  If the results are linked to a specific gene, their IDs will also be displayed.
36
  """)
37
- with gr.Row():
38
- with gr.Column():
39
- seq_input = gr.Textbox("KFLIYQMECSTMIFGL", label="Input Sequence")
40
- n_results = gr.Number(5, label="N Results")
41
- search_button = gr.Button("Search")
42
- output = gr.Label(num_top_classes=20, label="Search Results")
43
- search_button.click(search, inputs=[seq_input, n_results], outputs=output)
44
-
45
- if __name__ == '__main__':
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  demo.launch()
 
1
  import gradio as gr
2
 
3
  from get_index import get_engine
4
+ from protein_viz import render_html
5
 
6
  index_repo = "ronig/siamese_protein_index"
7
  model_repo = "ronig/protein_search_engine"
8
  engine = get_engine(index_repo, model_repo)
9
 
10
 
11
+ def format_search_results(raw_search_results):
12
+ formatted_search_results = {}
13
+ for res in raw_search_results:
 
 
14
  prot = res["pdb_name"]
15
  chain = res["chain_id"]
16
  value = res["score"]
17
+ genes = ",".join(res["gene_ids"])
18
  key = f"PDB: {prot} | Chain: {chain}"
19
+ if genes != "Unknown":
20
  key += f" | Gene: {genes}"
21
+ formatted_search_results[key] = value
22
+ return formatted_search_results
23
 
24
 
25
  def limit_n_results(n):
26
  return max(min(n, 20), 1)
27
 
28
 
29
+ def update_html(search_res):
30
+ first_entry = search_res[0]
31
+ pdb_id = first_entry['pdb_name']
32
+ return render_html(pdb_id=pdb_id, chain=first_entry['chain_id'])
33
+
34
+
35
+ def search_and_display(seq, n_res):
36
+ n_res = int(limit_n_results(n_res))
37
+ search_res = engine.search_by_sequence(seq, n=n_res)
38
+ formatted_search_results = format_search_results(search_res)
39
+ new_html = update_html(search_res)
40
+ return formatted_search_results, new_html
41
+
42
+
43
  with gr.Blocks() as demo:
44
  with gr.Column():
45
  gr.Markdown("""
 
47
  This application examines all files uploaded to [PDB](https://www.rcsb.org/) to find the chains with which a given protein sequence is most likely to bind.
48
  If the results are linked to a specific gene, their IDs will also be displayed.
49
  """)
50
+ with gr.Column():
51
+ with gr.Row():
52
+ with gr.Column():
53
+ seq_input = gr.Textbox(
54
+ placeholder="KFLIYQMECSTMIFGL",
55
+ label="Input Sequence"
56
+ )
57
+ n_results = gr.Number(5, label="N Results")
58
+ search_button = gr.Button("Search", variant='primary')
59
+ search_results = gr.Label(num_top_classes=20, label="Search Results")
60
+ protein_viz = gr.HTML(
61
+ value=render_html(pdb_id="2POR", chain='A'),
62
+ label="Protein Visualization"
63
+ )
64
+ gr.Examples(["KFLIYQMECSTMIFGL"], inputs=[seq_input])
65
+ search_button.click(
66
+ search_and_display,
67
+ inputs=[seq_input, n_results],
68
+ outputs=[search_results, protein_viz]
69
+ )
70
+
71
+ if __name__ == "__main__":
72
  demo.launch()
get_index.py CHANGED
@@ -19,5 +19,5 @@ def get_engine(index_repo: str, model_repo: str):
19
  )
20
  sys.path.append(str(local_arch_path))
21
  from protein_index import ProteinSearchEngine # pylint: disable=import-error,import-outside-toplevel
22
-
23
  return ProteinSearchEngine(data_path=index_path)
 
19
  )
20
  sys.path.append(str(local_arch_path))
21
  from protein_index import ProteinSearchEngine # pylint: disable=import-error,import-outside-toplevel
22
+ index_path = Path("/data/indexes/pdb")
23
  return ProteinSearchEngine(data_path=index_path)
protein_viz.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def render_html(pdb_id, chain):
2
+ html = f"""
3
+ "<html>
4
+ <header>
5
+ <script src="https://3Dmol.org/build/3Dmol-min.js"></script>
6
+ <script src="https://3Dmol.org/build/3Dmol.ui-min.js"></script>
7
+ </header>
8
+ <body>
9
+ <div style="height: 400px; position: relative;" class="viewer_3Dmoljs"
10
+ data-pdb="{pdb_id}"
11
+ data-select1="chain:{chain}"
12
+ data-style1="cartoon:color=spectrum"
13
+ />
14
+ </body>
15
+ </html>
16
+ """
17
+ iframe = f"""
18
+ <iframe style="width: 100%; height: 480px" name="protein-vis"
19
+ frameborder="0" srcdoc='{html}'></iframe>
20
+ """
21
+ return iframe