mgbam commited on
Commit
6409f04
·
verified ·
1 Parent(s): eea1b53

Update genesis/api_clients/bioportal_api.py

Browse files
Files changed (1) hide show
  1. genesis/api_clients/bioportal_api.py +57 -72
genesis/api_clients/bioportal_api.py CHANGED
@@ -1,103 +1,88 @@
1
  # genesis/api_clients/bioportal_api.py
2
  import os
3
  import requests
4
- from typing import Dict, List, Optional
5
 
 
6
  BIOPORTAL_BASE = "https://data.bioontology.org"
7
- BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY") # Saved in Hugging Face / .env
8
 
9
  # -------------------------
10
- # Core Request Helper
11
  # -------------------------
12
- def bioportal_request(endpoint: str, params: Dict) -> Dict:
13
  """
14
- Helper for BioPortal API requests.
15
  """
16
  if not BIOPORTAL_API_KEY:
17
- raise ValueError("BIOPORTAL_API_KEY not set in environment variables")
18
-
19
- headers = {"Authorization": f"apikey token={BIOPORTAL_API_KEY}"}
20
- r = requests.get(f"{BIOPORTAL_BASE}{endpoint}", headers=headers, params=params)
 
 
 
 
21
  r.raise_for_status()
22
- return r.json()
23
 
24
- # -------------------------
25
- # Search Ontology Terms
26
- # -------------------------
27
- def search_terms(query: str, ontology: Optional[str] = None, max_results: int = 10) -> List[Dict]:
28
- """
29
- Search ontology terms across all ontologies or within a specific ontology.
30
- ontology: Optional ontology acronym (e.g., MESH, GO, SNOMEDCT)
31
- """
32
- params = {"q": query, "pagesize": max_results}
33
- if ontology:
34
- params["ontology"] = ontology
35
- data = bioportal_request("/search", params)
36
- return data.get("collection", [])
37
 
38
  # -------------------------
39
  # Get Term Details
40
  # -------------------------
41
- def get_term_details(ontology: str, term_id: str) -> Dict:
42
  """
43
- Retrieve details for a specific ontology term.
44
  """
45
- return bioportal_request(f"/ontologies/{ontology}/classes/{term_id}", {})
 
 
 
 
 
46
 
47
  # -------------------------
48
- # Mapping & Crosslinks
49
  # -------------------------
50
- def get_term_mappings(term_id: str) -> List[Dict]:
51
- """
52
- Retrieve mappings for an ontology term to other ontologies.
53
  """
54
- data = bioportal_request(f"/mappings/{term_id}", {})
55
- return data.get("collection", [])
56
-
57
- def search_and_map(query: str, ontology: Optional[str] = None) -> List[Dict]:
58
- """
59
- Search for a term and retrieve mappings.
60
  """
61
- terms = search_terms(query, ontology)
62
- mapped_results = []
63
- for t in terms:
64
- mappings = get_term_mappings(t["@id"])
65
- mapped_results.append({
66
- "term": t.get("prefLabel", ""),
67
- "ontology": t.get("links", {}).get("ontology", ""),
68
- "mappings": mappings
69
- })
70
- return mapped_results
71
 
72
  # -------------------------
73
- # Integration Helpers
74
  # -------------------------
75
- def ontology_to_entities(query: str) -> Dict:
76
  """
77
- Map ontology term related drugs (ChEMBL) + genes (NCBI) + literature (PubMed).
78
- This is where cross-domain linking happens.
79
  """
80
- from genesis.api_clients import chembl_api, pubmed_api, ncbi_api # Lazy import to avoid cycles
81
 
82
- mapped_data = {
83
- "ontology_term": query,
84
- "drugs": [],
85
- "genes": [],
86
- "publications": []
87
- }
88
-
89
- # Ontology search
90
- terms = search_terms(query)
91
- if terms:
92
- mapped_data["ontology_id"] = terms[0].get("@id", "")
93
-
94
- # Link to ChEMBL molecules
95
- mapped_data["drugs"] = chembl_api.search_molecule(query)
96
 
97
- # Link to NCBI genes
98
- mapped_data["genes"] = ncbi_api.search_gene(query)
99
-
100
- # Link to PubMed literature
101
- mapped_data["publications"] = pubmed_api.search_pubmed(query)
102
-
103
- return mapped_data
 
1
  # genesis/api_clients/bioportal_api.py
2
  import os
3
  import requests
4
+ from typing import List, Dict
5
 
6
+ BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY") # Set in Hugging Face secrets or .env
7
  BIOPORTAL_BASE = "https://data.bioontology.org"
 
8
 
9
  # -------------------------
10
+ # Search Ontology Terms
11
  # -------------------------
12
+ def search_ontology(term: str, max_results: int = 5) -> List[Dict]:
13
  """
14
+ Search BioPortal for matching ontology terms across multiple vocabularies.
15
  """
16
  if not BIOPORTAL_API_KEY:
17
+ raise ValueError("BioPortal API key not set in environment variables.")
18
+
19
+ params = {
20
+ "q": term,
21
+ "pagesize": max_results,
22
+ "apikey": BIOPORTAL_API_KEY
23
+ }
24
+ r = requests.get(f"{BIOPORTAL_BASE}/search", params=params)
25
  r.raise_for_status()
 
26
 
27
+ results = r.json().get("collection", [])
28
+ terms = []
29
+ for item in results:
30
+ terms.append({
31
+ "prefLabel": item.get("prefLabel"),
32
+ "ontology": item.get("links", {}).get("ontology"),
33
+ "definition": item.get("definition", [""])[0] if item.get("definition") else "",
34
+ "iri": item.get("@id"),
35
+ "synonyms": item.get("synonym", []),
36
+ "cui": item.get("cui", []), # UMLS Concept Unique Identifiers if available
37
+ })
38
+
39
+ return terms
40
 
41
  # -------------------------
42
  # Get Term Details
43
  # -------------------------
44
+ def get_term_details(ontology_acronym: str, term_id: str) -> Dict:
45
  """
46
+ Get full details for a specific ontology term.
47
  """
48
+ params = {
49
+ "apikey": BIOPORTAL_API_KEY
50
+ }
51
+ r = requests.get(f"{BIOPORTAL_BASE}/ontologies/{ontology_acronym}/classes/{term_id}", params=params)
52
+ r.raise_for_status()
53
+ return r.json()
54
 
55
  # -------------------------
56
+ # Map Term to UMLS CUI
57
  # -------------------------
58
+ def map_to_umls(term: str) -> List[str]:
 
 
59
  """
60
+ Map a given term to UMLS Concept Unique Identifiers (CUIs).
 
 
 
 
 
61
  """
62
+ results = search_ontology(term)
63
+ cuis = []
64
+ for res in results:
65
+ if res.get("cui"):
66
+ cuis.extend(res["cui"])
67
+ return list(set(cuis))
 
 
 
 
68
 
69
  # -------------------------
70
+ # Cross-Domain Semantic Context
71
  # -------------------------
72
+ def ontology_context(term: str) -> Dict:
73
  """
74
+ Return ontology info + UMLS mapping + linked literature from PubMed.
 
75
  """
76
+ from genesis.api_clients import pubmed_api # Lazy import
77
 
78
+ ontology_terms = search_ontology(term)
79
+ cuis = []
80
+ for res in ontology_terms:
81
+ if res.get("cui"):
82
+ cuis.extend(res["cui"])
 
 
 
 
 
 
 
 
 
83
 
84
+ return {
85
+ "ontology_terms": ontology_terms,
86
+ "umls_cuis": list(set(cuis)),
87
+ "literature": pubmed_api.search_pubmed(term)
88
+ }