# genesis/ontology.py """ Ontology Expansion & Merging for GENESIS-AI Uses UMLS & BioPortal to find related terms, synonyms, and hierarchies. """ import os import requests import logging from typing import List, Dict, Union logging.basicConfig(level=logging.INFO) UMLS_API_KEY = os.getenv("UMLS_API_KEY") BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY") # ------------------------- # BioPortal Expansion # ------------------------- def expand_with_bioportal(term: str) -> Union[List[str], Dict]: """Expand term using BioPortal ontology search.""" try: url = f"https://data.bioontology.org/search?q={term}&apikey={BIOPORTAL_API_KEY}" r = requests.get(url, timeout=20) r.raise_for_status() data = r.json() return list({res["prefLabel"] for res in data.get("collection", []) if "prefLabel" in res}) except Exception as e: logging.error(f"[Ontology] BioPortal expansion failed: {e}") return {"error": str(e)} # ------------------------- # UMLS Expansion # ------------------------- def expand_with_umls(term: str) -> Union[List[str], Dict]: """Expand term using UMLS API.""" try: url = f"https://uts-ws.nlm.nih.gov/rest/search/current?string={term}&apiKey={UMLS_API_KEY}" r = requests.get(url, timeout=20) r.raise_for_status() data = r.json() return [res["name"] for res in data.get("result", {}).get("results", []) if "name" in res] except Exception as e: logging.error(f"[Ontology] UMLS expansion failed: {e}") return {"error": str(e)} # ------------------------- # Merge Ontology Terms # ------------------------- def merge_ontology_terms(query_term: str, umls_terms: Union[List[str], Dict, None] = None, bioportal_terms: Union[List[str], Dict, None] = None) -> List[str]: """ Merge the original query term with expanded terms from UMLS and BioPortal. Args: query_term: Original search term. umls_terms: List or error dict from expand_with_umls(). bioportal_terms: List or error dict from expand_with_bioportal(). Returns: List of unique merged terms (strings). """ logging.info(f"[Ontology] Merging terms for query: {query_term}") results = set() if query_term: results.add(query_term.strip()) if isinstance(umls_terms, list): results.update(t.strip() for t in umls_terms if t and isinstance(t, str)) if isinstance(bioportal_terms, list): results.update(t.strip() for t in bioportal_terms if t and isinstance(t, str)) merged_list = list(results) logging.info(f"[Ontology] Merged {len(merged_list)} unique terms.") return merged_list __all__ = [ "expand_with_bioportal", "expand_with_umls", "merge_ontology_terms" ]