Spaces:
Sleeping
Sleeping
Update genesis/api_clients/bioportal_api.py
Browse files
genesis/api_clients/bioportal_api.py
CHANGED
@@ -1,97 +1,73 @@
|
|
1 |
# genesis/api_clients/bioportal_api.py
|
2 |
-
import requests
|
3 |
-
from typing import Dict, List, Optional
|
4 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
-
|
7 |
-
|
8 |
|
9 |
-
|
|
|
|
|
|
|
|
|
10 |
"""
|
11 |
Search BioPortal for ontology terms.
|
12 |
-
|
13 |
-
exact_match: if True, only return exact matches
|
14 |
"""
|
15 |
-
if not BIOPORTAL_API_KEY:
|
16 |
-
raise ValueError("BIOPORTAL_API_KEY not set in environment.")
|
17 |
-
|
18 |
params = {
|
19 |
-
"
|
20 |
-
"
|
21 |
-
"pagesize": max_results,
|
22 |
-
"require_exact_match": "true" if exact_match else "false"
|
23 |
}
|
24 |
-
|
25 |
if ontologies:
|
26 |
params["ontologies"] = ",".join(ontologies)
|
27 |
-
|
28 |
-
r = requests.get(f"{
|
29 |
r.raise_for_status()
|
30 |
data = r.json()
|
31 |
-
|
32 |
results = []
|
33 |
for item in data.get("collection", []):
|
34 |
results.append({
|
35 |
-
"prefLabel": item.get("prefLabel"
|
36 |
-
"definition":
|
37 |
"synonyms": item.get("synonym", []),
|
38 |
"ontology": item.get("links", {}).get("ontology"),
|
39 |
-
"
|
40 |
"cui": item.get("cui", []),
|
41 |
"semanticType": item.get("semanticType", []),
|
42 |
-
"parents": item.get("links", {}).get("parents"),
|
43 |
-
"children": item.get("links", {}).get("children")
|
44 |
})
|
45 |
-
|
46 |
return results
|
47 |
|
48 |
|
49 |
-
def
|
50 |
"""
|
51 |
-
|
52 |
"""
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
params = {
|
57 |
-
"apikey": BIOPORTAL_API_KEY,
|
58 |
-
"q": prefix,
|
59 |
-
"pagesize": max_results
|
60 |
-
}
|
61 |
-
|
62 |
-
if ontologies:
|
63 |
-
params["ontologies"] = ",".join(ontologies)
|
64 |
-
|
65 |
-
r = requests.get(f"{BIOPORTAL_BASE_URL}/search", params=params)
|
66 |
r.raise_for_status()
|
67 |
-
|
68 |
-
|
69 |
-
return [item.get("prefLabel", "") for item in data.get("collection", []) if item.get("prefLabel")]
|
70 |
|
71 |
|
72 |
-
def
|
73 |
"""
|
74 |
-
|
75 |
"""
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
r = requests.get(f"{BIOPORTAL_BASE_URL}/ontologies/{ontology_acronym}/classes/{term_id}", params={"apikey": BIOPORTAL_API_KEY})
|
80 |
r.raise_for_status()
|
81 |
return r.json()
|
82 |
|
83 |
|
84 |
-
def
|
85 |
"""
|
86 |
-
|
|
|
87 |
"""
|
88 |
-
|
89 |
-
|
90 |
-
for m in mappings:
|
91 |
-
if "cui" in m and m["cui"]:
|
92 |
-
umls_concepts.append({
|
93 |
-
"term": m["prefLabel"],
|
94 |
-
"cui": m["cui"],
|
95 |
-
"semanticType": m["semanticType"]
|
96 |
-
})
|
97 |
-
return umls_concepts
|
|
|
1 |
# genesis/api_clients/bioportal_api.py
|
|
|
|
|
2 |
import os
|
3 |
+
import requests
|
4 |
+
from typing import List, Dict, Optional
|
5 |
+
|
6 |
+
# BioPortal API
|
7 |
+
BIOPORTAL_BASE = "https://data.bioontology.org"
|
8 |
+
BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY") # You must add this to Hugging Face secrets or .env
|
9 |
|
10 |
+
if not BIOPORTAL_API_KEY:
|
11 |
+
raise EnvironmentError("BIOPORTAL_API_KEY is missing. Add it to your environment variables or Hugging Face secrets.")
|
12 |
|
13 |
+
HEADERS = {
|
14 |
+
"Authorization": f"apikey token={BIOPORTAL_API_KEY}"
|
15 |
+
}
|
16 |
+
|
17 |
+
def search_terms(query: str, ontologies: Optional[List[str]] = None, max_results: int = 20) -> List[Dict]:
|
18 |
"""
|
19 |
Search BioPortal for ontology terms.
|
20 |
+
Optionally limit search to specific ontologies like 'GO', 'CHEBI', 'DOID', etc.
|
|
|
21 |
"""
|
|
|
|
|
|
|
22 |
params = {
|
23 |
+
"q": query,
|
24 |
+
"pagesize": max_results
|
|
|
|
|
25 |
}
|
|
|
26 |
if ontologies:
|
27 |
params["ontologies"] = ",".join(ontologies)
|
28 |
+
|
29 |
+
r = requests.get(f"{BIOPORTAL_BASE}/search", headers=HEADERS, params=params)
|
30 |
r.raise_for_status()
|
31 |
data = r.json()
|
32 |
+
|
33 |
results = []
|
34 |
for item in data.get("collection", []):
|
35 |
results.append({
|
36 |
+
"prefLabel": item.get("prefLabel"),
|
37 |
+
"definition": item.get("definition", []),
|
38 |
"synonyms": item.get("synonym", []),
|
39 |
"ontology": item.get("links", {}).get("ontology"),
|
40 |
+
"uri": item.get("@id"),
|
41 |
"cui": item.get("cui", []),
|
42 |
"semanticType": item.get("semanticType", []),
|
|
|
|
|
43 |
})
|
|
|
44 |
return results
|
45 |
|
46 |
|
47 |
+
def get_term_details(term_uri: str) -> Dict:
|
48 |
"""
|
49 |
+
Fetch full details for a given ontology term URI.
|
50 |
"""
|
51 |
+
encoded_uri = requests.utils.quote(term_uri, safe="")
|
52 |
+
r = requests.get(f"{BIOPORTAL_BASE}/ontologies/{encoded_uri}", headers=HEADERS)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
r.raise_for_status()
|
54 |
+
return r.json()
|
|
|
|
|
55 |
|
56 |
|
57 |
+
def find_related_terms(term_uri: str) -> List[Dict]:
|
58 |
"""
|
59 |
+
Find related terms for a given ontology concept.
|
60 |
"""
|
61 |
+
encoded_uri = requests.utils.quote(term_uri, safe="")
|
62 |
+
r = requests.get(f"{BIOPORTAL_BASE}/ontologies/{encoded_uri}/related", headers=HEADERS)
|
|
|
|
|
63 |
r.raise_for_status()
|
64 |
return r.json()
|
65 |
|
66 |
|
67 |
+
def map_to_ontology(entity_name: str, preferred_ontologies: Optional[List[str]] = None) -> Optional[Dict]:
|
68 |
"""
|
69 |
+
Try to map a free-text biomedical entity name to an ontology concept.
|
70 |
+
Useful for linking PubMed entities, ChEMBL molecules, clinical trials to ontology terms.
|
71 |
"""
|
72 |
+
matches = search_terms(entity_name, ontologies=preferred_ontologies, max_results=1)
|
73 |
+
return matches[0] if matches else None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|