# genesis/api_clients/chembl_api.py import os import requests from typing import Dict, List, Optional CHEMBL_BASE = "https://www.ebi.ac.uk/chembl/api/data/" CHEMBL_API_KEY = os.getenv("CHEMBL_API_KEY") # Optional if you have API key # ------------------------- # Core Request Helper # ------------------------- def chembl_request(endpoint: str, params: Dict) -> Dict: """ Helper to make ChEMBL API requests. """ url = f"{CHEMBL_BASE}{endpoint}" headers = {"Accept": "application/json"} if CHEMBL_API_KEY: headers["Authorization"] = f"Bearer {CHEMBL_API_KEY}" r = requests.get(url, headers=headers, params=params) r.raise_for_status() return r.json() # ------------------------- # Search Molecules # ------------------------- def search_molecule(query: str, max_results: int = 10) -> List[Dict]: """ Search for molecules in ChEMBL by name or ChEMBL ID. """ data = chembl_request("molecule/search", {"q": query, "limit": max_results}) return data.get("molecules", []) def get_molecule_details(chembl_id: str) -> Dict: """ Get detailed molecule info by ChEMBL ID. """ return chembl_request(f"molecule/{chembl_id}", {}) def get_molecule_image(chembl_id: str) -> str: """ Get image (SVG) of a molecule structure. """ return f"https://www.ebi.ac.uk/chembl/api/data/image/{chembl_id}.svg" # ------------------------- # Targets # ------------------------- def search_target(query: str, max_results: int = 10) -> List[Dict]: """ Search for biological targets in ChEMBL. """ data = chembl_request("target/search", {"q": query, "limit": max_results}) return data.get("targets", []) def get_target_details(chembl_id: str) -> Dict: """ Get detailed target info from ChEMBL. """ return chembl_request(f"target/{chembl_id}", {}) # ------------------------- # Bioactivity # ------------------------- def get_bioactivity_for_molecule(chembl_id: str, max_results: int = 20) -> List[Dict]: """ Get bioactivity data for a molecule. """ data = chembl_request("activity", {"molecule_chembl_id": chembl_id, "limit": max_results}) return data.get("activities", []) def get_bioactivity_for_target(chembl_id: str, max_results: int = 20) -> List[Dict]: """ Get bioactivity data for a biological target. """ data = chembl_request("activity", {"target_chembl_id": chembl_id, "limit": max_results}) return data.get("activities", []) # ------------------------- # Assays # ------------------------- def get_assays_for_molecule(chembl_id: str, max_results: int = 20) -> List[Dict]: """ Get assay data for a molecule. """ data = chembl_request("assay", {"molecule_chembl_id": chembl_id, "limit": max_results}) return data.get("assays", []) # ------------------------- # Integration Helpers # ------------------------- def molecule_to_gene_links(chembl_id: str) -> List[Dict]: """ Crosslink molecule targets to gene symbols for integration with NCBI. """ targets = get_bioactivity_for_molecule(chembl_id) gene_links = [] for t in targets: if "target_chembl_id" in t: target_info = get_target_details(t["target_chembl_id"]) gene_links.append({ "target_id": t["target_chembl_id"], "gene_symbol": target_info.get("target_components", [{}])[0].get("accession", ""), "organism": target_info.get("organism", "") }) return gene_links # ------------------------- # Alias for Pipeline # ------------------------- def get_molecule_data(query: str) -> Dict: """ High-level function for the pipeline: Searches for molecule and returns first hit with details, image, and bioactivity. """ results = search_molecule(query, max_results=1) if not results: return {} chembl_id = results[0].get("molecule_chembl_id") details = get_molecule_details(chembl_id) image_url = get_molecule_image(chembl_id) bioactivity = get_bioactivity_for_molecule(chembl_id) return { "chembl_id": chembl_id, "details": details, "image_url": image_url, "bioactivity": bioactivity }