mgbam's picture
Update genesis/api_clients/chembl_api.py
21f575e verified
# genesis/api_clients/chembl_api.py
import os
import requests
from typing import Dict, List, Optional
CHEMBL_BASE = "https://www.ebi.ac.uk/chembl/api/data/"
CHEMBL_API_KEY = os.getenv("CHEMBL_API_KEY") # Optional if you have API key
# -------------------------
# Core Request Helper
# -------------------------
def chembl_request(endpoint: str, params: Dict) -> Dict:
"""
Helper to make ChEMBL API requests.
"""
url = f"{CHEMBL_BASE}{endpoint}"
headers = {"Accept": "application/json"}
if CHEMBL_API_KEY:
headers["Authorization"] = f"Bearer {CHEMBL_API_KEY}"
r = requests.get(url, headers=headers, params=params)
r.raise_for_status()
return r.json()
# -------------------------
# Search Molecules
# -------------------------
def search_molecule(query: str, max_results: int = 10) -> List[Dict]:
"""
Search for molecules in ChEMBL by name or ChEMBL ID.
"""
data = chembl_request("molecule/search", {"q": query, "limit": max_results})
return data.get("molecules", [])
def get_molecule_details(chembl_id: str) -> Dict:
"""
Get detailed molecule info by ChEMBL ID.
"""
return chembl_request(f"molecule/{chembl_id}", {})
def get_molecule_image(chembl_id: str) -> str:
"""
Get image (SVG) of a molecule structure.
"""
return f"https://www.ebi.ac.uk/chembl/api/data/image/{chembl_id}.svg"
# -------------------------
# Targets
# -------------------------
def search_target(query: str, max_results: int = 10) -> List[Dict]:
"""
Search for biological targets in ChEMBL.
"""
data = chembl_request("target/search", {"q": query, "limit": max_results})
return data.get("targets", [])
def get_target_details(chembl_id: str) -> Dict:
"""
Get detailed target info from ChEMBL.
"""
return chembl_request(f"target/{chembl_id}", {})
# -------------------------
# Bioactivity
# -------------------------
def get_bioactivity_for_molecule(chembl_id: str, max_results: int = 20) -> List[Dict]:
"""
Get bioactivity data for a molecule.
"""
data = chembl_request("activity", {"molecule_chembl_id": chembl_id, "limit": max_results})
return data.get("activities", [])
def get_bioactivity_for_target(chembl_id: str, max_results: int = 20) -> List[Dict]:
"""
Get bioactivity data for a biological target.
"""
data = chembl_request("activity", {"target_chembl_id": chembl_id, "limit": max_results})
return data.get("activities", [])
# -------------------------
# Assays
# -------------------------
def get_assays_for_molecule(chembl_id: str, max_results: int = 20) -> List[Dict]:
"""
Get assay data for a molecule.
"""
data = chembl_request("assay", {"molecule_chembl_id": chembl_id, "limit": max_results})
return data.get("assays", [])
# -------------------------
# Integration Helpers
# -------------------------
def molecule_to_gene_links(chembl_id: str) -> List[Dict]:
"""
Crosslink molecule targets to gene symbols for integration with NCBI.
"""
targets = get_bioactivity_for_molecule(chembl_id)
gene_links = []
for t in targets:
if "target_chembl_id" in t:
target_info = get_target_details(t["target_chembl_id"])
gene_links.append({
"target_id": t["target_chembl_id"],
"gene_symbol": target_info.get("target_components", [{}])[0].get("accession", ""),
"organism": target_info.get("organism", "")
})
return gene_links
# -------------------------
# Alias for Pipeline
# -------------------------
def get_molecule_data(query: str) -> Dict:
"""
High-level function for the pipeline:
Searches for molecule and returns first hit with details, image, and bioactivity.
"""
results = search_molecule(query, max_results=1)
if not results:
return {}
chembl_id = results[0].get("molecule_chembl_id")
details = get_molecule_details(chembl_id)
image_url = get_molecule_image(chembl_id)
bioactivity = get_bioactivity_for_molecule(chembl_id)
return {
"chembl_id": chembl_id,
"details": details,
"image_url": image_url,
"bioactivity": bioactivity
}