Spaces:
Sleeping
Sleeping
File size: 3,454 Bytes
5b6700c 325ee75 7bfdf8c 6409f04 7bfdf8c 6409f04 7bfdf8c ea8c68b 6409f04 ea8c68b 6409f04 5755f5b 6409f04 67ef408 79df878 6409f04 f96a85c 5b6700c 6409f04 325ee75 99929f8 79df878 6409f04 325ee75 6409f04 325ee75 6409f04 ea8c68b 79df878 6409f04 79df878 6409f04 79df878 6409f04 ea8c68b 6409f04 79df878 6409f04 79df878 6409f04 79df878 6409f04 79df878 6409f04 79df878 6409f04 79df878 6409f04 99929f8 6409f04 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
# genesis/api_clients/bioportal_api.py
import os
import requests
from typing import List, Dict
BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY") # Set in Hugging Face secrets or .env
BIOPORTAL_BASE = "https://data.bioontology.org"
# -------------------------
# Search Ontology Terms
# -------------------------
def search_ontology(term: str, max_results: int = 5) -> List[Dict]:
"""
Search BioPortal for matching ontology terms across multiple vocabularies.
"""
if not BIOPORTAL_API_KEY:
raise ValueError("BioPortal API key not set in environment variables.")
params = {
"q": term,
"pagesize": max_results,
"apikey": BIOPORTAL_API_KEY
}
r = requests.get(f"{BIOPORTAL_BASE}/search", params=params)
r.raise_for_status()
results = r.json().get("collection", [])
terms = []
for item in results:
terms.append({
"prefLabel": item.get("prefLabel"),
"ontology": item.get("links", {}).get("ontology"),
"definition": item.get("definition", [""])[0] if item.get("definition") else "",
"iri": item.get("@id"),
"synonyms": item.get("synonym", []),
"cui": item.get("cui", []), # UMLS Concept Unique Identifiers if available
})
return terms
# -------------------------
# Expand Query with Related Ontology Terms
# -------------------------
def expand_with_bioportal(term: str, max_results: int = 5) -> List[str]:
"""
Expand a biomedical term into related ontology labels.
This is the function called by pipeline.py
"""
try:
results = search_ontology(term, max_results)
expanded = []
for res in results:
if res["prefLabel"] and res["prefLabel"].lower() != term.lower():
expanded.append(res["prefLabel"])
expanded.extend(res.get("synonyms", []))
return list(set(expanded)) # unique terms only
except Exception as e:
print(f"[BioPortal] Expansion failed for '{term}': {e}")
return []
# -------------------------
# Get Term Details
# -------------------------
def get_term_details(ontology_acronym: str, term_id: str) -> Dict:
"""
Get full details for a specific ontology term.
"""
params = {
"apikey": BIOPORTAL_API_KEY
}
r = requests.get(f"{BIOPORTAL_BASE}/ontologies/{ontology_acronym}/classes/{term_id}", params=params)
r.raise_for_status()
return r.json()
# -------------------------
# Map Term to UMLS CUI
# -------------------------
def map_to_umls(term: str) -> List[str]:
"""
Map a given term to UMLS Concept Unique Identifiers (CUIs).
"""
results = search_ontology(term)
cuis = []
for res in results:
if res.get("cui"):
cuis.extend(res["cui"])
return list(set(cuis))
# -------------------------
# Cross-Domain Semantic Context
# -------------------------
def ontology_context(term: str) -> Dict:
"""
Return ontology info + UMLS mapping + linked literature from PubMed.
"""
from genesis.api_clients import pubmed_api # Lazy import
ontology_terms = search_ontology(term)
cuis = []
for res in ontology_terms:
if res.get("cui"):
cuis.extend(res["cui"])
return {
"ontology_terms": ontology_terms,
"umls_cuis": list(set(cuis)),
"literature": pubmed_api.search_pubmed_literature(term) # fixed call name
}
|