File size: 2,492 Bytes
5b6700c
67ef408
d24e0a6
5755f5b
5b6700c
d24e0a6
5755f5b
67ef408
c37fc6d
d24e0a6
5755f5b
d24e0a6
 
 
5755f5b
 
d24e0a6
5755f5b
d24e0a6
 
 
 
 
 
67ef408
 
d24e0a6
 
67ef408
d24e0a6
 
5755f5b
d24e0a6
 
 
 
c37fc6d
 
d24e0a6
c37fc6d
d24e0a6
c37fc6d
d24e0a6
 
 
 
 
5b6700c
5755f5b
d24e0a6
c37fc6d
d24e0a6
c37fc6d
d24e0a6
 
 
 
 
 
 
5755f5b
 
d24e0a6
5755f5b
d24e0a6
5755f5b
d24e0a6
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# genesis/api_clients/bioportal_api.py
import requests
import os
from urllib.parse import quote

BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY")  # Store this in Hugging Face secrets
BIOPORTAL_BASE_URL = "https://data.bioontology.org"

if not BIOPORTAL_API_KEY:
    raise ValueError("BIOPORTAL_API_KEY not found in environment variables.")

HEADERS = {
    "Authorization": f"apikey token={BIOPORTAL_API_KEY}"
}


def search_concept(term: str, ontology: str = None, max_results: int = 10):
    """
    Search for a biomedical concept across BioPortal ontologies.
    
    Args:
        term (str): The search term (e.g., "BRCA1", "glioblastoma").
        ontology (str): Optional ontology acronym (e.g., "NCIT", "SNOMEDCT").
        max_results (int): Maximum number of results.
    """
    params = {
        "q": term,
        "pagesize": max_results
    }
    if ontology:
        params["ontology"] = ontology

    url = f"{BIOPORTAL_BASE_URL}/search"
    response = requests.get(url, headers=HEADERS, params=params)
    response.raise_for_status()
    return response.json().get("collection", [])


def get_concept_details(ontology_acronym: str, concept_id: str):
    """
    Fetch detailed information about a concept given ontology acronym and concept ID.
    """
    encoded_id = quote(concept_id, safe="")
    url = f"{BIOPORTAL_BASE_URL}/ontologies/{ontology_acronym}/classes/{encoded_id}"
    response = requests.get(url, headers=HEADERS)
    response.raise_for_status()
    return response.json()


def get_ontologies():
    """
    List all available ontologies in BioPortal.
    """
    url = f"{BIOPORTAL_BASE_URL}/ontologies"
    response = requests.get(url, headers=HEADERS)
    response.raise_for_status()
    return [
        {"acronym": o.get("acronym"), "name": o.get("name"), "description": o.get("description")}
        for o in response.json()
    ]


def map_term_across_ontologies(term: str, ontologies: list = None):
    """
    Map a term across multiple ontologies to unify synonyms and cross-references.
    """
    mappings = {}
    target_ontologies = ontologies or [o["acronym"] for o in get_ontologies()]
    
    for ont in target_ontologies:
        results = search_concept(term, ontology=ont, max_results=1)
        if results:
            mappings[ont] = {
                "label": results[0].get("prefLabel"),
                "uri": results[0].get("@id"),
                "synonyms": results[0].get("synonym", []),
            }
    return mappings