# genesis/api_clients/bioportal_api.py import os import requests from typing import List, Dict, Optional # BioPortal API BIOPORTAL_BASE = "https://data.bioontology.org" BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY") # You must add this to Hugging Face secrets or .env if not BIOPORTAL_API_KEY: raise EnvironmentError("BIOPORTAL_API_KEY is missing. Add it to your environment variables or Hugging Face secrets.") HEADERS = { "Authorization": f"apikey token={BIOPORTAL_API_KEY}" } def search_terms(query: str, ontologies: Optional[List[str]] = None, max_results: int = 20) -> List[Dict]: """ Search BioPortal for ontology terms. Optionally limit search to specific ontologies like 'GO', 'CHEBI', 'DOID', etc. """ params = { "q": query, "pagesize": max_results } if ontologies: params["ontologies"] = ",".join(ontologies) r = requests.get(f"{BIOPORTAL_BASE}/search", headers=HEADERS, params=params) r.raise_for_status() data = r.json() results = [] for item in data.get("collection", []): results.append({ "prefLabel": item.get("prefLabel"), "definition": item.get("definition", []), "synonyms": item.get("synonym", []), "ontology": item.get("links", {}).get("ontology"), "uri": item.get("@id"), "cui": item.get("cui", []), "semanticType": item.get("semanticType", []), }) return results def get_term_details(term_uri: str) -> Dict: """ Fetch full details for a given ontology term URI. """ encoded_uri = requests.utils.quote(term_uri, safe="") r = requests.get(f"{BIOPORTAL_BASE}/ontologies/{encoded_uri}", headers=HEADERS) r.raise_for_status() return r.json() def find_related_terms(term_uri: str) -> List[Dict]: """ Find related terms for a given ontology concept. """ encoded_uri = requests.utils.quote(term_uri, safe="") r = requests.get(f"{BIOPORTAL_BASE}/ontologies/{encoded_uri}/related", headers=HEADERS) r.raise_for_status() return r.json() def map_to_ontology(entity_name: str, preferred_ontologies: Optional[List[str]] = None) -> Optional[Dict]: """ Try to map a free-text biomedical entity name to an ontology concept. Useful for linking PubMed entities, ChEMBL molecules, clinical trials to ontology terms. """ matches = search_terms(entity_name, ontologies=preferred_ontologies, max_results=1) return matches[0] if matches else None