File size: 2,534 Bytes
5b6700c
325ee75
7bfdf8c
 
 
 
 
 
5b6700c
7bfdf8c
 
67ef408
7bfdf8c
 
 
 
 
5755f5b
325ee75
7bfdf8c
67ef408
 
7bfdf8c
 
67ef408
f96a85c
 
7bfdf8c
 
f96a85c
 
7bfdf8c
f96a85c
325ee75
f96a85c
7bfdf8c
 
325ee75
 
7bfdf8c
325ee75
 
f96a85c
 
5b6700c
5755f5b
7bfdf8c
c37fc6d
7bfdf8c
c37fc6d
7bfdf8c
 
f96a85c
7bfdf8c
325ee75
 
7bfdf8c
325ee75
7bfdf8c
325ee75
7bfdf8c
 
325ee75
 
5755f5b
 
7bfdf8c
5755f5b
7bfdf8c
 
5755f5b
7bfdf8c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# genesis/api_clients/bioportal_api.py
import os
import requests
from typing import List, Dict, Optional

# BioPortal API
BIOPORTAL_BASE = "https://data.bioontology.org"
BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY")  # You must add this to Hugging Face secrets or .env

if not BIOPORTAL_API_KEY:
    raise EnvironmentError("BIOPORTAL_API_KEY is missing. Add it to your environment variables or Hugging Face secrets.")

HEADERS = {
    "Authorization": f"apikey token={BIOPORTAL_API_KEY}"
}

def search_terms(query: str, ontologies: Optional[List[str]] = None, max_results: int = 20) -> List[Dict]:
    """
    Search BioPortal for ontology terms.
    Optionally limit search to specific ontologies like 'GO', 'CHEBI', 'DOID', etc.
    """
    params = {
        "q": query,
        "pagesize": max_results
    }
    if ontologies:
        params["ontologies"] = ",".join(ontologies)
    
    r = requests.get(f"{BIOPORTAL_BASE}/search", headers=HEADERS, params=params)
    r.raise_for_status()
    data = r.json()
    
    results = []
    for item in data.get("collection", []):
        results.append({
            "prefLabel": item.get("prefLabel"),
            "definition": item.get("definition", []),
            "synonyms": item.get("synonym", []),
            "ontology": item.get("links", {}).get("ontology"),
            "uri": item.get("@id"),
            "cui": item.get("cui", []),
            "semanticType": item.get("semanticType", []),
        })
    return results


def get_term_details(term_uri: str) -> Dict:
    """
    Fetch full details for a given ontology term URI.
    """
    encoded_uri = requests.utils.quote(term_uri, safe="")
    r = requests.get(f"{BIOPORTAL_BASE}/ontologies/{encoded_uri}", headers=HEADERS)
    r.raise_for_status()
    return r.json()


def find_related_terms(term_uri: str) -> List[Dict]:
    """
    Find related terms for a given ontology concept.
    """
    encoded_uri = requests.utils.quote(term_uri, safe="")
    r = requests.get(f"{BIOPORTAL_BASE}/ontologies/{encoded_uri}/related", headers=HEADERS)
    r.raise_for_status()
    return r.json()


def map_to_ontology(entity_name: str, preferred_ontologies: Optional[List[str]] = None) -> Optional[Dict]:
    """
    Try to map a free-text biomedical entity name to an ontology concept.
    Useful for linking PubMed entities, ChEMBL molecules, clinical trials to ontology terms.
    """
    matches = search_terms(entity_name, ontologies=preferred_ontologies, max_results=1)
    return matches[0] if matches else None