File size: 4,231 Bytes
792fe00
d68c20d
53ae019
d68c20d
5bfa97f
d69e21a
 
5bfa97f
d69e21a
 
 
 
5bfa97f
d69e21a
5bfa97f
d69e21a
 
 
 
 
5bfa97f
d69e21a
792fe00
d69e21a
 
 
 
 
 
 
 
 
792fe00
53ae019
5404a41
d69e21a
5404a41
d69e21a
5404a41
d69e21a
5404a41
d69e21a
5404a41
d69e21a
5404a41
d69e21a
 
 
 
5404a41
d69e21a
5404a41
d69e21a
 
6fa7402
d69e21a
6fa7402
d69e21a
27cd148
d69e21a
27cd148
d69e21a
 
 
 
 
 
 
 
 
27cd148
d69e21a
27cd148
d69e21a
27cd148
d69e21a
 
27cd148
d69e21a
 
 
 
 
 
 
 
 
27cd148
d69e21a
 
 
 
27cd148
d69e21a
27cd148
d69e21a
 
 
 
 
 
 
 
 
 
 
21f575e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# genesis/api_clients/chembl_api.py
import os
import requests
from typing import Dict, List, Optional

CHEMBL_BASE = "https://www.ebi.ac.uk/chembl/api/data/"
CHEMBL_API_KEY = os.getenv("CHEMBL_API_KEY")  # Optional if you have API key

# -------------------------
# Core Request Helper
# -------------------------
def chembl_request(endpoint: str, params: Dict) -> Dict:
    """
    Helper to make ChEMBL API requests.
    """
    url = f"{CHEMBL_BASE}{endpoint}"
    headers = {"Accept": "application/json"}
    if CHEMBL_API_KEY:
        headers["Authorization"] = f"Bearer {CHEMBL_API_KEY}"
    r = requests.get(url, headers=headers, params=params)
    r.raise_for_status()
    return r.json()

# -------------------------
# Search Molecules
# -------------------------
def search_molecule(query: str, max_results: int = 10) -> List[Dict]:
    """
    Search for molecules in ChEMBL by name or ChEMBL ID.
    """
    data = chembl_request("molecule/search", {"q": query, "limit": max_results})
    return data.get("molecules", [])

def get_molecule_details(chembl_id: str) -> Dict:
    """
    Get detailed molecule info by ChEMBL ID.
    """
    return chembl_request(f"molecule/{chembl_id}", {})

def get_molecule_image(chembl_id: str) -> str:
    """
    Get image (SVG) of a molecule structure.
    """
    return f"https://www.ebi.ac.uk/chembl/api/data/image/{chembl_id}.svg"

# -------------------------
# Targets
# -------------------------
def search_target(query: str, max_results: int = 10) -> List[Dict]:
    """
    Search for biological targets in ChEMBL.
    """
    data = chembl_request("target/search", {"q": query, "limit": max_results})
    return data.get("targets", [])

def get_target_details(chembl_id: str) -> Dict:
    """
    Get detailed target info from ChEMBL.
    """
    return chembl_request(f"target/{chembl_id}", {})

# -------------------------
# Bioactivity
# -------------------------
def get_bioactivity_for_molecule(chembl_id: str, max_results: int = 20) -> List[Dict]:
    """
    Get bioactivity data for a molecule.
    """
    data = chembl_request("activity", {"molecule_chembl_id": chembl_id, "limit": max_results})
    return data.get("activities", [])

def get_bioactivity_for_target(chembl_id: str, max_results: int = 20) -> List[Dict]:
    """
    Get bioactivity data for a biological target.
    """
    data = chembl_request("activity", {"target_chembl_id": chembl_id, "limit": max_results})
    return data.get("activities", [])

# -------------------------
# Assays
# -------------------------
def get_assays_for_molecule(chembl_id: str, max_results: int = 20) -> List[Dict]:
    """
    Get assay data for a molecule.
    """
    data = chembl_request("assay", {"molecule_chembl_id": chembl_id, "limit": max_results})
    return data.get("assays", [])

# -------------------------
# Integration Helpers
# -------------------------
def molecule_to_gene_links(chembl_id: str) -> List[Dict]:
    """
    Crosslink molecule targets to gene symbols for integration with NCBI.
    """
    targets = get_bioactivity_for_molecule(chembl_id)
    gene_links = []
    for t in targets:
        if "target_chembl_id" in t:
            target_info = get_target_details(t["target_chembl_id"])
            gene_links.append({
                "target_id": t["target_chembl_id"],
                "gene_symbol": target_info.get("target_components", [{}])[0].get("accession", ""),
                "organism": target_info.get("organism", "")
            })
    return gene_links

# -------------------------
# Alias for Pipeline
# -------------------------
def get_molecule_data(query: str) -> Dict:
    """
    High-level function for the pipeline:
    Searches for molecule and returns first hit with details, image, and bioactivity.
    """
    results = search_molecule(query, max_results=1)
    if not results:
        return {}
    
    chembl_id = results[0].get("molecule_chembl_id")
    details = get_molecule_details(chembl_id)
    image_url = get_molecule_image(chembl_id)
    bioactivity = get_bioactivity_for_molecule(chembl_id)
    
    return {
        "chembl_id": chembl_id,
        "details": details,
        "image_url": image_url,
        "bioactivity": bioactivity
    }