mgbam's picture
Update genesis/api_clients/bioportal_api.py
7bfdf8c verified
raw
history blame
2.53 kB
# genesis/api_clients/bioportal_api.py
import os
import requests
from typing import List, Dict, Optional
# BioPortal API
BIOPORTAL_BASE = "https://data.bioontology.org"
BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY") # You must add this to Hugging Face secrets or .env
if not BIOPORTAL_API_KEY:
raise EnvironmentError("BIOPORTAL_API_KEY is missing. Add it to your environment variables or Hugging Face secrets.")
HEADERS = {
"Authorization": f"apikey token={BIOPORTAL_API_KEY}"
}
def search_terms(query: str, ontologies: Optional[List[str]] = None, max_results: int = 20) -> List[Dict]:
"""
Search BioPortal for ontology terms.
Optionally limit search to specific ontologies like 'GO', 'CHEBI', 'DOID', etc.
"""
params = {
"q": query,
"pagesize": max_results
}
if ontologies:
params["ontologies"] = ",".join(ontologies)
r = requests.get(f"{BIOPORTAL_BASE}/search", headers=HEADERS, params=params)
r.raise_for_status()
data = r.json()
results = []
for item in data.get("collection", []):
results.append({
"prefLabel": item.get("prefLabel"),
"definition": item.get("definition", []),
"synonyms": item.get("synonym", []),
"ontology": item.get("links", {}).get("ontology"),
"uri": item.get("@id"),
"cui": item.get("cui", []),
"semanticType": item.get("semanticType", []),
})
return results
def get_term_details(term_uri: str) -> Dict:
"""
Fetch full details for a given ontology term URI.
"""
encoded_uri = requests.utils.quote(term_uri, safe="")
r = requests.get(f"{BIOPORTAL_BASE}/ontologies/{encoded_uri}", headers=HEADERS)
r.raise_for_status()
return r.json()
def find_related_terms(term_uri: str) -> List[Dict]:
"""
Find related terms for a given ontology concept.
"""
encoded_uri = requests.utils.quote(term_uri, safe="")
r = requests.get(f"{BIOPORTAL_BASE}/ontologies/{encoded_uri}/related", headers=HEADERS)
r.raise_for_status()
return r.json()
def map_to_ontology(entity_name: str, preferred_ontologies: Optional[List[str]] = None) -> Optional[Dict]:
"""
Try to map a free-text biomedical entity name to an ontology concept.
Useful for linking PubMed entities, ChEMBL molecules, clinical trials to ontology terms.
"""
matches = search_terms(entity_name, ontologies=preferred_ontologies, max_results=1)
return matches[0] if matches else None