Spaces:
Running
Running
# genesis/api_clients/chembl_api.py | |
import os | |
import requests | |
from typing import Dict, List, Optional | |
CHEMBL_BASE = "https://www.ebi.ac.uk/chembl/api/data/" | |
CHEMBL_API_KEY = os.getenv("CHEMBL_API_KEY") # Optional if you have API key | |
# ------------------------- | |
# Core Request Helper | |
# ------------------------- | |
def chembl_request(endpoint: str, params: Dict) -> Dict: | |
""" | |
Helper to make ChEMBL API requests. | |
""" | |
url = f"{CHEMBL_BASE}{endpoint}" | |
headers = {"Accept": "application/json"} | |
if CHEMBL_API_KEY: | |
headers["Authorization"] = f"Bearer {CHEMBL_API_KEY}" | |
r = requests.get(url, headers=headers, params=params) | |
r.raise_for_status() | |
return r.json() | |
# ------------------------- | |
# Search Molecules | |
# ------------------------- | |
def search_molecule(query: str, max_results: int = 10) -> List[Dict]: | |
""" | |
Search for molecules in ChEMBL by name or ChEMBL ID. | |
""" | |
data = chembl_request("molecule/search", {"q": query, "limit": max_results}) | |
return data.get("molecules", []) | |
def get_molecule_details(chembl_id: str) -> Dict: | |
""" | |
Get detailed molecule info by ChEMBL ID. | |
""" | |
return chembl_request(f"molecule/{chembl_id}", {}) | |
def get_molecule_image(chembl_id: str) -> str: | |
""" | |
Get image (SVG) of a molecule structure. | |
""" | |
return f"https://www.ebi.ac.uk/chembl/api/data/image/{chembl_id}.svg" | |
# ------------------------- | |
# Targets | |
# ------------------------- | |
def search_target(query: str, max_results: int = 10) -> List[Dict]: | |
""" | |
Search for biological targets in ChEMBL. | |
""" | |
data = chembl_request("target/search", {"q": query, "limit": max_results}) | |
return data.get("targets", []) | |
def get_target_details(chembl_id: str) -> Dict: | |
""" | |
Get detailed target info from ChEMBL. | |
""" | |
return chembl_request(f"target/{chembl_id}", {}) | |
# ------------------------- | |
# Bioactivity | |
# ------------------------- | |
def get_bioactivity_for_molecule(chembl_id: str, max_results: int = 20) -> List[Dict]: | |
""" | |
Get bioactivity data for a molecule. | |
""" | |
data = chembl_request("activity", {"molecule_chembl_id": chembl_id, "limit": max_results}) | |
return data.get("activities", []) | |
def get_bioactivity_for_target(chembl_id: str, max_results: int = 20) -> List[Dict]: | |
""" | |
Get bioactivity data for a biological target. | |
""" | |
data = chembl_request("activity", {"target_chembl_id": chembl_id, "limit": max_results}) | |
return data.get("activities", []) | |
# ------------------------- | |
# Assays | |
# ------------------------- | |
def get_assays_for_molecule(chembl_id: str, max_results: int = 20) -> List[Dict]: | |
""" | |
Get assay data for a molecule. | |
""" | |
data = chembl_request("assay", {"molecule_chembl_id": chembl_id, "limit": max_results}) | |
return data.get("assays", []) | |
# ------------------------- | |
# Integration Helpers | |
# ------------------------- | |
def molecule_to_gene_links(chembl_id: str) -> List[Dict]: | |
""" | |
Crosslink molecule targets to gene symbols for integration with NCBI. | |
""" | |
targets = get_bioactivity_for_molecule(chembl_id) | |
gene_links = [] | |
for t in targets: | |
if "target_chembl_id" in t: | |
target_info = get_target_details(t["target_chembl_id"]) | |
gene_links.append({ | |
"target_id": t["target_chembl_id"], | |
"gene_symbol": target_info.get("target_components", [{}])[0].get("accession", ""), | |
"organism": target_info.get("organism", "") | |
}) | |
return gene_links | |
# ------------------------- | |
# Alias for Pipeline | |
# ------------------------- | |
def get_molecule_data(query: str) -> Dict: | |
""" | |
High-level function for the pipeline: | |
Searches for molecule and returns first hit with details, image, and bioactivity. | |
""" | |
results = search_molecule(query, max_results=1) | |
if not results: | |
return {} | |
chembl_id = results[0].get("molecule_chembl_id") | |
details = get_molecule_details(chembl_id) | |
image_url = get_molecule_image(chembl_id) | |
bioactivity = get_bioactivity_for_molecule(chembl_id) | |
return { | |
"chembl_id": chembl_id, | |
"details": details, | |
"image_url": image_url, | |
"bioactivity": bioactivity | |
} | |