doragera commited on
Commit
46a04b7
·
1 Parent(s): b80a35a

Generative drug screening Blueprint

Browse files
examples/Generative drug screening.lynxkite.json CHANGED
The diff for this file is too large to render. See raw diff
 
lynxkite-bio/src/lynxkite_bio/nims.py CHANGED
@@ -1,32 +1,37 @@
1
  """Wrappers for BioNeMo NIMs."""
2
 
3
- from enum import Enum
4
  from lynxkite_graph_analytics import Bundle
5
  from lynxkite.core import ops
6
  import joblib
 
 
7
  import os
8
 
9
- NIM_URLS = os.environ.get("NIM_URLS", "http://localhost:8000").split(",")
10
 
11
  mem = joblib.Memory(".joblib-cache")
12
  ENV = "LynxKite Graph Analytics"
13
  op = ops.op_registration(ENV)
14
 
 
15
 
16
- class MSASearchTypes(Enum):
17
- ALPHAFOLD2 = "ALPHAFOLD2"
18
- ESM2 = "ESM2"
19
 
20
-
21
- class AlignmentFormats(Enum):
22
- FASTA = "fasta"
23
- A3M = "a3m"
24
- STOCKHOLM = "stockholm"
25
- CLUSTAL = "clustal"
26
- PDB = "pdb"
27
- PIR = "pir"
28
- MSF = "msf"
29
- TSV = "tsv"
 
 
 
 
 
 
 
30
 
31
 
32
  @op("MSA-search")
@@ -38,14 +43,25 @@ def msa_search(
38
  protein_column: str,
39
  e_value: float = 0.0001,
40
  iterations: int = 1,
41
- search_type: MSASearchTypes = MSASearchTypes.ALPHAFOLD2,
42
- output_alignment_formats: list[AlignmentFormats] = [
43
- AlignmentFormats.FASTA,
44
- AlignmentFormats.A3M,
45
- ],
46
- databases: str = '["Uniref30_2302", "colabfold_envdb_202108", "PDB70_220313"]',
47
  ):
48
  bundle = bundle.copy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  return bundle
50
 
51
 
@@ -58,11 +74,24 @@ def query_openfold2(
58
  protein_column: str,
59
  alignment_table: str,
60
  alignment_column: str,
61
- use_templates: bool = False,
62
- relaxed_prediction: bool = False,
63
- databases: str = '["Uniref30_2302", "colabfold_envdb_202108", "PDB70_220313"]',
64
  ):
65
  bundle = bundle.copy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  return bundle
67
 
68
 
@@ -113,6 +142,20 @@ def query_genmol(
113
  scoring: str = "QED",
114
  ):
115
  bundle = bundle.copy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  return bundle
117
 
118
 
@@ -126,8 +169,35 @@ def query_diffdock(
126
  protein_column: str,
127
  ligand_table: str,
128
  ligand_column: str,
 
129
  num_poses=10,
130
  time_divisions=20,
131
  num_steps=18,
132
  ):
133
- return proteins
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """Wrappers for BioNeMo NIMs."""
2
 
 
3
  from lynxkite_graph_analytics import Bundle
4
  from lynxkite.core import ops
5
  import joblib
6
+ import requests
7
+ import pandas as pd
8
  import os
9
 
 
10
 
11
  mem = joblib.Memory(".joblib-cache")
12
  ENV = "LynxKite Graph Analytics"
13
  op = ops.op_registration(ENV)
14
 
15
+ key = os.getenv("NVCF_RUN_KEY")
16
 
 
 
 
17
 
18
+ def query_bionemo_nim(
19
+ url: str,
20
+ payload: dict,
21
+ ):
22
+ headers = {
23
+ "Authorization": f"Bearer {key}",
24
+ "NVCF-POLL-SECONDS": "500",
25
+ "Content-Type": "application/json",
26
+ }
27
+ try:
28
+ print(f"Sending request to {url}")
29
+ response = requests.post(url, json=payload, headers=headers)
30
+ print(f"Received response from {url}", response.status_code)
31
+ response.raise_for_status()
32
+ return response.json()
33
+ except requests.exceptions.RequestException as e:
34
+ raise ValueError(f"Query failed: {e}")
35
 
36
 
37
  @op("MSA-search")
 
43
  protein_column: str,
44
  e_value: float = 0.0001,
45
  iterations: int = 1,
46
+ search_type: str = "alphafold2",
47
+ output_alignment_formats: str = "a3m",
48
+ databases: str = "Uniref30_2302,colabfold_envdb_202108",
 
 
 
49
  ):
50
  bundle = bundle.copy()
51
+ response = query_bionemo_nim(
52
+ url="https://health.api.nvidia.com/v1/biology/colabfold/msa-search/predict",
53
+ payload={
54
+ "sequence": bundle.dfs[protein_table][protein_column].iloc[0],
55
+ "e_value": e_value,
56
+ "iterations": iterations,
57
+ "search_type": search_type,
58
+ "output_alignment_formats": [
59
+ format for format in output_alignment_formats.split(",")
60
+ ],
61
+ "databases": [db for db in databases.split(",")],
62
+ },
63
+ )
64
+ bundle.dfs[protein_table]["alignments"] = [response["alignments"]]
65
  return bundle
66
 
67
 
 
74
  protein_column: str,
75
  alignment_table: str,
76
  alignment_column: str,
77
+ selected_models: str = "1,2",
78
+ relax_prediction: bool = False,
 
79
  ):
80
  bundle = bundle.copy()
81
+ protein = bundle.dfs[protein_table][protein_column].iloc[0]
82
+ alignments = bundle.dfs[alignment_table][alignment_column].iloc[0]
83
+ selected_models = [int(model) for model in selected_models.split(",")]
84
+ response = query_bionemo_nim(
85
+ url="https://health.api.nvidia.com/v1/biology/openfold/openfold2/predict-structure-from-msa-and-template",
86
+ payload={
87
+ "sequence": protein,
88
+ "alignments": alignments,
89
+ "selected_models": selected_models,
90
+ "relax_prediction": relax_prediction,
91
+ },
92
+ )
93
+ folded_protein = response["structures_in_ranked_order"].pop(0)["structure"]
94
+ bundle.dfs[protein_table]["folded_protein"] = folded_protein
95
  return bundle
96
 
97
 
 
142
  scoring: str = "QED",
143
  ):
144
  bundle = bundle.copy()
145
+
146
+ response = query_bionemo_nim(
147
+ url="https://health.api.nvidia.com/v1/biology/nvidia/genmol/generate",
148
+ payload={
149
+ "smiles": bundle.dfs[molecule_table][molecule_column].iloc[0],
150
+ "num_molecules": num_molecules,
151
+ "temperature": temperature,
152
+ "noise": noise,
153
+ "step_size": step_size,
154
+ "scoring": scoring,
155
+ },
156
+ )
157
+ generated_ligands = "\n".join([v["smiles"] for v in response["molecules"]])
158
+ bundle.dfs[molecule_table]["ligands"] = generated_ligands
159
  return bundle
160
 
161
 
 
169
  protein_column: str,
170
  ligand_table: str,
171
  ligand_column: str,
172
+ ligand_file_type: str = "txt",
173
  num_poses=10,
174
  time_divisions=20,
175
  num_steps=18,
176
  ):
177
+ response = query_bionemo_nim(
178
+ url="https://health.api.nvidia.com/v1/biology/mit/diffdock",
179
+ payload={
180
+ "protein": proteins.dfs[protein_table][protein_column].iloc[0],
181
+ "ligand": ligands.dfs[ligand_table][ligand_column].iloc[0],
182
+ "ligand_file_type": ligand_file_type,
183
+ "num_poses": num_poses,
184
+ "time_divisions": time_divisions,
185
+ "num_steps": num_steps,
186
+ },
187
+ )
188
+ bundle = Bundle()
189
+ bundle.dfs["diffdock_table"] = pd.DataFrame()
190
+ bundle.dfs["diffdock_table"]["protein"] = [response["protein"]] * len(
191
+ response["status"]
192
+ )
193
+ bundle.dfs["diffdock_table"]["ligand"] = [response["ligand"]] * len(
194
+ response["status"]
195
+ )
196
+ bundle.dfs["diffdock_table"]["trajectory"] = response["trajectory"]
197
+ bundle.dfs["diffdock_table"]["ligand_positions"] = response["ligand_positions"]
198
+ bundle.dfs["diffdock_table"]["position_confidence"] = response[
199
+ "position_confidence"
200
+ ]
201
+ bundle.dfs["diffdock_table"]["status"] = response["status"]
202
+
203
+ return bundle