mgbam commited on
Commit
d69e21a
·
verified ·
1 Parent(s): 9b0c279

Update genesis/api_clients/chembl_api.py

Browse files
Files changed (1) hide show
  1. genesis/api_clients/chembl_api.py +75 -40
genesis/api_clients/chembl_api.py CHANGED
@@ -3,69 +3,104 @@ import os
3
  import requests
4
  from typing import Dict, List, Optional
5
 
6
- CHEMBL_BASE = "https://www.ebi.ac.uk/chembl/api/data"
7
- CHEMBL_API_KEY = os.getenv("CHEMBL_API_KEY") # If you have API key access, else can be None (public API works without auth)
8
 
9
- def search_molecule(query: str, max_results: int = 10) -> List[Dict]:
 
 
 
10
  """
11
- Search ChEMBL for a molecule by name, synonym, or ChEMBL ID.
12
  """
13
- params = {"format": "json", "limit": max_results, "molecule_synonyms__icontains": query}
14
- r = requests.get(f"{CHEMBL_BASE}/molecule", params=params)
 
 
 
15
  r.raise_for_status()
16
- data = r.json()
17
- return data.get("molecules", [])
18
 
 
 
 
 
 
 
 
 
 
19
 
20
  def get_molecule_details(chembl_id: str) -> Dict:
21
  """
22
- Retrieve full details for a specific molecule (properties, SMILES, InChI).
23
  """
24
- r = requests.get(f"{CHEMBL_BASE}/molecule/{chembl_id}.json")
25
- r.raise_for_status()
26
- return r.json()
27
 
28
-
29
- def get_target_details(target_chembl_id: str) -> Dict:
30
  """
31
- Retrieve details about a protein target.
32
  """
33
- r = requests.get(f"{CHEMBL_BASE}/target/{target_chembl_id}.json")
34
- r.raise_for_status()
35
- return r.json()
36
-
37
 
38
- def get_bioactivities(chembl_id: str, max_results: int = 20) -> List[Dict]:
 
 
 
39
  """
40
- Retrieve bioactivity assays for a given molecule.
41
  """
42
- params = {"format": "json", "limit": max_results}
43
- r = requests.get(f"{CHEMBL_BASE}/activity?molecule_chembl_id={chembl_id}", params=params)
44
- r.raise_for_status()
45
- return r.json().get("activities", [])
46
 
47
-
48
- def get_mechanism_of_action(chembl_id: str) -> List[Dict]:
49
  """
50
- Retrieve mechanism of action data for a drug.
51
  """
52
- r = requests.get(f"{CHEMBL_BASE}/mechanism.json?molecule_chembl_id={chembl_id}")
53
- r.raise_for_status()
54
- return r.json().get("mechanisms", [])
55
 
 
 
 
 
 
 
 
 
 
56
 
57
- def get_clinical_trials(chembl_id: str) -> List[Dict]:
58
  """
59
- Retrieve clinical trial data linked to a drug.
60
  """
61
- r = requests.get(f"{CHEMBL_BASE}/clinical_trial.json?molecule_chembl_id={chembl_id}")
62
- r.raise_for_status()
63
- return r.json().get("clinical_trials", [])
64
 
 
 
 
 
 
 
 
 
 
65
 
66
- def get_structural_image(chembl_id: str, size: str = "500") -> Optional[str]:
 
 
 
67
  """
68
- Retrieve PNG image of molecule from ChEMBL's structure rendering service.
69
- Returns image URL.
70
  """
71
- return f"https://www.ebi.ac.uk/chembl/api/utils/image/{chembl_id}?dimensions={size}"
 
 
 
 
 
 
 
 
 
 
 
3
  import requests
4
  from typing import Dict, List, Optional
5
 
6
+ CHEMBL_BASE = "https://www.ebi.ac.uk/chembl/api/data/"
7
+ CHEMBL_API_KEY = os.getenv("CHEMBL_API_KEY") # Optional if you have API key
8
 
9
+ # -------------------------
10
+ # Core Request Helper
11
+ # -------------------------
12
+ def chembl_request(endpoint: str, params: Dict) -> Dict:
13
  """
14
+ Helper to make ChEMBL API requests.
15
  """
16
+ url = f"{CHEMBL_BASE}{endpoint}"
17
+ headers = {"Accept": "application/json"}
18
+ if CHEMBL_API_KEY:
19
+ headers["Authorization"] = f"Bearer {CHEMBL_API_KEY}"
20
+ r = requests.get(url, headers=headers, params=params)
21
  r.raise_for_status()
22
+ return r.json()
 
23
 
24
+ # -------------------------
25
+ # Search Molecules
26
+ # -------------------------
27
+ def search_molecule(query: str, max_results: int = 10) -> List[Dict]:
28
+ """
29
+ Search for molecules in ChEMBL by name or ChEMBL ID.
30
+ """
31
+ data = chembl_request("molecule/search", {"q": query, "limit": max_results})
32
+ return data.get("molecules", [])
33
 
34
  def get_molecule_details(chembl_id: str) -> Dict:
35
  """
36
+ Get detailed molecule info by ChEMBL ID.
37
  """
38
+ return chembl_request(f"molecule/{chembl_id}", {})
 
 
39
 
40
+ def get_molecule_image(chembl_id: str) -> str:
 
41
  """
42
+ Get image (SVG) of a molecule structure.
43
  """
44
+ return f"https://www.ebi.ac.uk/chembl/api/data/image/{chembl_id}.svg"
 
 
 
45
 
46
+ # -------------------------
47
+ # Targets
48
+ # -------------------------
49
+ def search_target(query: str, max_results: int = 10) -> List[Dict]:
50
  """
51
+ Search for biological targets in ChEMBL.
52
  """
53
+ data = chembl_request("target/search", {"q": query, "limit": max_results})
54
+ return data.get("targets", [])
 
 
55
 
56
+ def get_target_details(chembl_id: str) -> Dict:
 
57
  """
58
+ Get detailed target info from ChEMBL.
59
  """
60
+ return chembl_request(f"target/{chembl_id}", {})
 
 
61
 
62
+ # -------------------------
63
+ # Bioactivity
64
+ # -------------------------
65
+ def get_bioactivity_for_molecule(chembl_id: str, max_results: int = 20) -> List[Dict]:
66
+ """
67
+ Get bioactivity data for a molecule.
68
+ """
69
+ data = chembl_request("activity", {"molecule_chembl_id": chembl_id, "limit": max_results})
70
+ return data.get("activities", [])
71
 
72
+ def get_bioactivity_for_target(chembl_id: str, max_results: int = 20) -> List[Dict]:
73
  """
74
+ Get bioactivity data for a biological target.
75
  """
76
+ data = chembl_request("activity", {"target_chembl_id": chembl_id, "limit": max_results})
77
+ return data.get("activities", [])
 
78
 
79
+ # -------------------------
80
+ # Assays
81
+ # -------------------------
82
+ def get_assays_for_molecule(chembl_id: str, max_results: int = 20) -> List[Dict]:
83
+ """
84
+ Get assay data for a molecule.
85
+ """
86
+ data = chembl_request("assay", {"molecule_chembl_id": chembl_id, "limit": max_results})
87
+ return data.get("assays", [])
88
 
89
+ # -------------------------
90
+ # Integration Helpers
91
+ # -------------------------
92
+ def molecule_to_gene_links(chembl_id: str) -> List[Dict]:
93
  """
94
+ Crosslink molecule targets to gene symbols for integration with NCBI.
 
95
  """
96
+ targets = get_bioactivity_for_molecule(chembl_id)
97
+ gene_links = []
98
+ for t in targets:
99
+ if "target_chembl_id" in t:
100
+ target_info = get_target_details(t["target_chembl_id"])
101
+ gene_links.append({
102
+ "target_id": t["target_chembl_id"],
103
+ "gene_symbol": target_info.get("target_components", [{}])[0].get("accession", ""),
104
+ "organism": target_info.get("organism", "")
105
+ })
106
+ return gene_links