Spaces:
Sleeping
Sleeping
Update genesis/api_clients/bioportal_api.py
Browse files
genesis/api_clients/bioportal_api.py
CHANGED
@@ -3,87 +3,101 @@ import os
|
|
3 |
import requests
|
4 |
from typing import Dict, List, Optional
|
5 |
|
6 |
-
BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY")
|
7 |
BIOPORTAL_BASE = "https://data.bioontology.org"
|
8 |
-
|
9 |
-
if not BIOPORTAL_API_KEY:
|
10 |
-
raise EnvironmentError("Missing BIOPORTAL_API_KEY in environment variables.")
|
11 |
|
12 |
# -------------------------
|
13 |
-
#
|
14 |
# -------------------------
|
15 |
-
def
|
16 |
-
"""Attach BioPortal API key."""
|
17 |
-
params["apikey"] = BIOPORTAL_API_KEY
|
18 |
-
return params
|
19 |
-
|
20 |
-
def search_term(term: str, ontologies: Optional[List[str]] = None, max_results: int = 10) -> List[Dict]:
|
21 |
"""
|
22 |
-
|
23 |
-
`ontologies` is optional list like ["GO", "CHEBI", "NCBITAXON"]
|
24 |
"""
|
25 |
-
|
26 |
-
"
|
27 |
-
|
28 |
-
}
|
29 |
-
|
30 |
-
params["ontologies"] = ",".join(ontologies)
|
31 |
-
|
32 |
-
r = requests.get(f"{BIOPORTAL_BASE}/search", params=params)
|
33 |
r.raise_for_status()
|
34 |
-
|
35 |
-
|
36 |
-
results = []
|
37 |
-
for collection in data.get("collection", []):
|
38 |
-
results.append({
|
39 |
-
"prefLabel": collection.get("prefLabel"),
|
40 |
-
"ontology": collection.get("links", {}).get("ontology"),
|
41 |
-
"definition": collection.get("definition"),
|
42 |
-
"cui": collection.get("cui"),
|
43 |
-
"uri": collection.get("@id")
|
44 |
-
})
|
45 |
-
return results
|
46 |
|
47 |
-
|
|
|
|
|
|
|
48 |
"""
|
49 |
-
|
|
|
50 |
"""
|
51 |
-
params =
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
55 |
|
56 |
-
|
|
|
|
|
|
|
57 |
"""
|
58 |
-
|
59 |
-
Useful for linking to UMLS, NCBI, ChEMBL.
|
60 |
"""
|
61 |
-
|
62 |
-
r = requests.get(f"{term_uri}/mappings", params=params)
|
63 |
-
r.raise_for_status()
|
64 |
-
data = r.json()
|
65 |
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
return
|
75 |
|
76 |
-
def
|
77 |
"""
|
78 |
-
|
79 |
"""
|
80 |
-
|
81 |
-
|
82 |
-
for
|
83 |
-
mappings =
|
84 |
-
|
85 |
-
"term":
|
86 |
-
"
|
87 |
"mappings": mappings
|
88 |
})
|
89 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
import requests
|
4 |
from typing import Dict, List, Optional
|
5 |
|
|
|
6 |
BIOPORTAL_BASE = "https://data.bioontology.org"
|
7 |
+
BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY") # Saved in Hugging Face / .env
|
|
|
|
|
8 |
|
9 |
# -------------------------
|
10 |
+
# Core Request Helper
|
11 |
# -------------------------
|
12 |
+
def bioportal_request(endpoint: str, params: Dict) -> Dict:
|
|
|
|
|
|
|
|
|
|
|
13 |
"""
|
14 |
+
Helper for BioPortal API requests.
|
|
|
15 |
"""
|
16 |
+
if not BIOPORTAL_API_KEY:
|
17 |
+
raise ValueError("BIOPORTAL_API_KEY not set in environment variables")
|
18 |
+
|
19 |
+
headers = {"Authorization": f"apikey token={BIOPORTAL_API_KEY}"}
|
20 |
+
r = requests.get(f"{BIOPORTAL_BASE}{endpoint}", headers=headers, params=params)
|
|
|
|
|
|
|
21 |
r.raise_for_status()
|
22 |
+
return r.json()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
+
# -------------------------
|
25 |
+
# Search Ontology Terms
|
26 |
+
# -------------------------
|
27 |
+
def search_terms(query: str, ontology: Optional[str] = None, max_results: int = 10) -> List[Dict]:
|
28 |
"""
|
29 |
+
Search ontology terms across all ontologies or within a specific ontology.
|
30 |
+
ontology: Optional ontology acronym (e.g., MESH, GO, SNOMEDCT)
|
31 |
"""
|
32 |
+
params = {"q": query, "pagesize": max_results}
|
33 |
+
if ontology:
|
34 |
+
params["ontology"] = ontology
|
35 |
+
data = bioportal_request("/search", params)
|
36 |
+
return data.get("collection", [])
|
37 |
|
38 |
+
# -------------------------
|
39 |
+
# Get Term Details
|
40 |
+
# -------------------------
|
41 |
+
def get_term_details(ontology: str, term_id: str) -> Dict:
|
42 |
"""
|
43 |
+
Retrieve details for a specific ontology term.
|
|
|
44 |
"""
|
45 |
+
return bioportal_request(f"/ontologies/{ontology}/classes/{term_id}", {})
|
|
|
|
|
|
|
46 |
|
47 |
+
# -------------------------
|
48 |
+
# Mapping & Crosslinks
|
49 |
+
# -------------------------
|
50 |
+
def get_term_mappings(term_id: str) -> List[Dict]:
|
51 |
+
"""
|
52 |
+
Retrieve mappings for an ontology term to other ontologies.
|
53 |
+
"""
|
54 |
+
data = bioportal_request(f"/mappings/{term_id}", {})
|
55 |
+
return data.get("collection", [])
|
56 |
|
57 |
+
def search_and_map(query: str, ontology: Optional[str] = None) -> List[Dict]:
|
58 |
"""
|
59 |
+
Search for a term and retrieve mappings.
|
60 |
"""
|
61 |
+
terms = search_terms(query, ontology)
|
62 |
+
mapped_results = []
|
63 |
+
for t in terms:
|
64 |
+
mappings = get_term_mappings(t["@id"])
|
65 |
+
mapped_results.append({
|
66 |
+
"term": t.get("prefLabel", ""),
|
67 |
+
"ontology": t.get("links", {}).get("ontology", ""),
|
68 |
"mappings": mappings
|
69 |
})
|
70 |
+
return mapped_results
|
71 |
+
|
72 |
+
# -------------------------
|
73 |
+
# Integration Helpers
|
74 |
+
# -------------------------
|
75 |
+
def ontology_to_entities(query: str) -> Dict:
|
76 |
+
"""
|
77 |
+
Map ontology term → related drugs (ChEMBL) + genes (NCBI) + literature (PubMed).
|
78 |
+
This is where cross-domain linking happens.
|
79 |
+
"""
|
80 |
+
from genesis.api_clients import chembl_api, pubmed_api, ncbi_api # Lazy import to avoid cycles
|
81 |
+
|
82 |
+
mapped_data = {
|
83 |
+
"ontology_term": query,
|
84 |
+
"drugs": [],
|
85 |
+
"genes": [],
|
86 |
+
"publications": []
|
87 |
+
}
|
88 |
+
|
89 |
+
# Ontology search
|
90 |
+
terms = search_terms(query)
|
91 |
+
if terms:
|
92 |
+
mapped_data["ontology_id"] = terms[0].get("@id", "")
|
93 |
+
|
94 |
+
# Link to ChEMBL molecules
|
95 |
+
mapped_data["drugs"] = chembl_api.search_molecule(query)
|
96 |
+
|
97 |
+
# Link to NCBI genes
|
98 |
+
mapped_data["genes"] = ncbi_api.search_gene(query)
|
99 |
+
|
100 |
+
# Link to PubMed literature
|
101 |
+
mapped_data["publications"] = pubmed_api.search_pubmed(query)
|
102 |
+
|
103 |
+
return mapped_data
|