mgbam commited on
Commit
79df878
·
verified ·
1 Parent(s): d69e21a

Update genesis/api_clients/bioportal_api.py

Browse files
Files changed (1) hide show
  1. genesis/api_clients/bioportal_api.py +79 -65
genesis/api_clients/bioportal_api.py CHANGED
@@ -3,87 +3,101 @@ import os
3
  import requests
4
  from typing import Dict, List, Optional
5
 
6
- BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY")
7
  BIOPORTAL_BASE = "https://data.bioontology.org"
8
-
9
- if not BIOPORTAL_API_KEY:
10
- raise EnvironmentError("Missing BIOPORTAL_API_KEY in environment variables.")
11
 
12
  # -------------------------
13
- # CORE HELPERS
14
  # -------------------------
15
- def _auth_params(params: Dict) -> Dict:
16
- """Attach BioPortal API key."""
17
- params["apikey"] = BIOPORTAL_API_KEY
18
- return params
19
-
20
- def search_term(term: str, ontologies: Optional[List[str]] = None, max_results: int = 10) -> List[Dict]:
21
  """
22
- Search BioPortal for ontology terms.
23
- `ontologies` is optional list like ["GO", "CHEBI", "NCBITAXON"]
24
  """
25
- params = _auth_params({
26
- "q": term,
27
- "pagesize": max_results
28
- })
29
- if ontologies:
30
- params["ontologies"] = ",".join(ontologies)
31
-
32
- r = requests.get(f"{BIOPORTAL_BASE}/search", params=params)
33
  r.raise_for_status()
34
- data = r.json()
35
-
36
- results = []
37
- for collection in data.get("collection", []):
38
- results.append({
39
- "prefLabel": collection.get("prefLabel"),
40
- "ontology": collection.get("links", {}).get("ontology"),
41
- "definition": collection.get("definition"),
42
- "cui": collection.get("cui"),
43
- "uri": collection.get("@id")
44
- })
45
- return results
46
 
47
- def get_term_details(term_uri: str) -> Dict:
 
 
 
48
  """
49
- Get detailed metadata for a term given its URI.
 
50
  """
51
- params = _auth_params({})
52
- r = requests.get(term_uri, params=params)
53
- r.raise_for_status()
54
- return r.json()
 
55
 
56
- def get_mappings(term_uri: str) -> List[Dict]:
 
 
 
57
  """
58
- Get cross-ontology mappings for a term.
59
- Useful for linking to UMLS, NCBI, ChEMBL.
60
  """
61
- params = _auth_params({})
62
- r = requests.get(f"{term_uri}/mappings", params=params)
63
- r.raise_for_status()
64
- data = r.json()
65
 
66
- mappings = []
67
- for m in data:
68
- mappings.append({
69
- "source": m.get("classes", [{}])[0].get("links", {}).get("ontology"),
70
- "target": m.get("classes", [{}])[-1].get("links", {}).get("ontology"),
71
- "target_uri": m.get("classes", [{}])[-1].get("@id"),
72
- "target_label": m.get("classes", [{}])[-1].get("prefLabel")
73
- })
74
- return mappings
75
 
76
- def find_related_terms(term: str, ontologies: Optional[List[str]] = None) -> List[Dict]:
77
  """
78
- Find related ontology terms using BioPortal search + mappings.
79
  """
80
- results = search_term(term, ontologies=ontologies)
81
- related = []
82
- for r in results:
83
- mappings = get_mappings(r["uri"])
84
- related.append({
85
- "term": r["prefLabel"],
86
- "uri": r["uri"],
87
  "mappings": mappings
88
  })
89
- return related
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import requests
4
  from typing import Dict, List, Optional
5
 
 
6
  BIOPORTAL_BASE = "https://data.bioontology.org"
7
+ BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY") # Saved in Hugging Face / .env
 
 
8
 
9
  # -------------------------
10
+ # Core Request Helper
11
  # -------------------------
12
+ def bioportal_request(endpoint: str, params: Dict) -> Dict:
 
 
 
 
 
13
  """
14
+ Helper for BioPortal API requests.
 
15
  """
16
+ if not BIOPORTAL_API_KEY:
17
+ raise ValueError("BIOPORTAL_API_KEY not set in environment variables")
18
+
19
+ headers = {"Authorization": f"apikey token={BIOPORTAL_API_KEY}"}
20
+ r = requests.get(f"{BIOPORTAL_BASE}{endpoint}", headers=headers, params=params)
 
 
 
21
  r.raise_for_status()
22
+ return r.json()
 
 
 
 
 
 
 
 
 
 
 
23
 
24
+ # -------------------------
25
+ # Search Ontology Terms
26
+ # -------------------------
27
+ def search_terms(query: str, ontology: Optional[str] = None, max_results: int = 10) -> List[Dict]:
28
  """
29
+ Search ontology terms across all ontologies or within a specific ontology.
30
+ ontology: Optional ontology acronym (e.g., MESH, GO, SNOMEDCT)
31
  """
32
+ params = {"q": query, "pagesize": max_results}
33
+ if ontology:
34
+ params["ontology"] = ontology
35
+ data = bioportal_request("/search", params)
36
+ return data.get("collection", [])
37
 
38
+ # -------------------------
39
+ # Get Term Details
40
+ # -------------------------
41
+ def get_term_details(ontology: str, term_id: str) -> Dict:
42
  """
43
+ Retrieve details for a specific ontology term.
 
44
  """
45
+ return bioportal_request(f"/ontologies/{ontology}/classes/{term_id}", {})
 
 
 
46
 
47
+ # -------------------------
48
+ # Mapping & Crosslinks
49
+ # -------------------------
50
+ def get_term_mappings(term_id: str) -> List[Dict]:
51
+ """
52
+ Retrieve mappings for an ontology term to other ontologies.
53
+ """
54
+ data = bioportal_request(f"/mappings/{term_id}", {})
55
+ return data.get("collection", [])
56
 
57
+ def search_and_map(query: str, ontology: Optional[str] = None) -> List[Dict]:
58
  """
59
+ Search for a term and retrieve mappings.
60
  """
61
+ terms = search_terms(query, ontology)
62
+ mapped_results = []
63
+ for t in terms:
64
+ mappings = get_term_mappings(t["@id"])
65
+ mapped_results.append({
66
+ "term": t.get("prefLabel", ""),
67
+ "ontology": t.get("links", {}).get("ontology", ""),
68
  "mappings": mappings
69
  })
70
+ return mapped_results
71
+
72
+ # -------------------------
73
+ # Integration Helpers
74
+ # -------------------------
75
+ def ontology_to_entities(query: str) -> Dict:
76
+ """
77
+ Map ontology term → related drugs (ChEMBL) + genes (NCBI) + literature (PubMed).
78
+ This is where cross-domain linking happens.
79
+ """
80
+ from genesis.api_clients import chembl_api, pubmed_api, ncbi_api # Lazy import to avoid cycles
81
+
82
+ mapped_data = {
83
+ "ontology_term": query,
84
+ "drugs": [],
85
+ "genes": [],
86
+ "publications": []
87
+ }
88
+
89
+ # Ontology search
90
+ terms = search_terms(query)
91
+ if terms:
92
+ mapped_data["ontology_id"] = terms[0].get("@id", "")
93
+
94
+ # Link to ChEMBL molecules
95
+ mapped_data["drugs"] = chembl_api.search_molecule(query)
96
+
97
+ # Link to NCBI genes
98
+ mapped_data["genes"] = ncbi_api.search_gene(query)
99
+
100
+ # Link to PubMed literature
101
+ mapped_data["publications"] = pubmed_api.search_pubmed(query)
102
+
103
+ return mapped_data