mgbam's picture
Update genesis/api_clients/bioportal_api.py
99929f8 verified
# genesis/api_clients/bioportal_api.py
import os
import requests
from typing import List, Dict
BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY") # Set in Hugging Face secrets or .env
BIOPORTAL_BASE = "https://data.bioontology.org"
# -------------------------
# Search Ontology Terms
# -------------------------
def search_ontology(term: str, max_results: int = 5) -> List[Dict]:
"""
Search BioPortal for matching ontology terms across multiple vocabularies.
"""
if not BIOPORTAL_API_KEY:
raise ValueError("BioPortal API key not set in environment variables.")
params = {
"q": term,
"pagesize": max_results,
"apikey": BIOPORTAL_API_KEY
}
r = requests.get(f"{BIOPORTAL_BASE}/search", params=params)
r.raise_for_status()
results = r.json().get("collection", [])
terms = []
for item in results:
terms.append({
"prefLabel": item.get("prefLabel"),
"ontology": item.get("links", {}).get("ontology"),
"definition": item.get("definition", [""])[0] if item.get("definition") else "",
"iri": item.get("@id"),
"synonyms": item.get("synonym", []),
"cui": item.get("cui", []), # UMLS Concept Unique Identifiers if available
})
return terms
# -------------------------
# Expand Query with Related Ontology Terms
# -------------------------
def expand_with_bioportal(term: str, max_results: int = 5) -> List[str]:
"""
Expand a biomedical term into related ontology labels.
This is the function called by pipeline.py
"""
try:
results = search_ontology(term, max_results)
expanded = []
for res in results:
if res["prefLabel"] and res["prefLabel"].lower() != term.lower():
expanded.append(res["prefLabel"])
expanded.extend(res.get("synonyms", []))
return list(set(expanded)) # unique terms only
except Exception as e:
print(f"[BioPortal] Expansion failed for '{term}': {e}")
return []
# -------------------------
# Get Term Details
# -------------------------
def get_term_details(ontology_acronym: str, term_id: str) -> Dict:
"""
Get full details for a specific ontology term.
"""
params = {
"apikey": BIOPORTAL_API_KEY
}
r = requests.get(f"{BIOPORTAL_BASE}/ontologies/{ontology_acronym}/classes/{term_id}", params=params)
r.raise_for_status()
return r.json()
# -------------------------
# Map Term to UMLS CUI
# -------------------------
def map_to_umls(term: str) -> List[str]:
"""
Map a given term to UMLS Concept Unique Identifiers (CUIs).
"""
results = search_ontology(term)
cuis = []
for res in results:
if res.get("cui"):
cuis.extend(res["cui"])
return list(set(cuis))
# -------------------------
# Cross-Domain Semantic Context
# -------------------------
def ontology_context(term: str) -> Dict:
"""
Return ontology info + UMLS mapping + linked literature from PubMed.
"""
from genesis.api_clients import pubmed_api # Lazy import
ontology_terms = search_ontology(term)
cuis = []
for res in ontology_terms:
if res.get("cui"):
cuis.extend(res["cui"])
return {
"ontology_terms": ontology_terms,
"umls_cuis": list(set(cuis)),
"literature": pubmed_api.search_pubmed_literature(term) # fixed call name
}