mgbam commited on
Commit
7bfdf8c
·
verified ·
1 Parent(s): a5bfe49

Update genesis/api_clients/bioportal_api.py

Browse files
Files changed (1) hide show
  1. genesis/api_clients/bioportal_api.py +36 -60
genesis/api_clients/bioportal_api.py CHANGED
@@ -1,97 +1,73 @@
1
  # genesis/api_clients/bioportal_api.py
2
- import requests
3
- from typing import Dict, List, Optional
4
  import os
 
 
 
 
 
 
5
 
6
- BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY") # Stored in Hugging Face secrets
7
- BIOPORTAL_BASE_URL = "https://data.bioontology.org"
8
 
9
- def bioportal_search(term: str, ontologies: Optional[List[str]] = None, exact_match: bool = False, max_results: int = 20) -> List[Dict]:
 
 
 
 
10
  """
11
  Search BioPortal for ontology terms.
12
- ontologies: list of ontology acronyms to restrict search (e.g., ["MESH", "SNOMEDCT", "GO"])
13
- exact_match: if True, only return exact matches
14
  """
15
- if not BIOPORTAL_API_KEY:
16
- raise ValueError("BIOPORTAL_API_KEY not set in environment.")
17
-
18
  params = {
19
- "apikey": BIOPORTAL_API_KEY,
20
- "q": term,
21
- "pagesize": max_results,
22
- "require_exact_match": "true" if exact_match else "false"
23
  }
24
-
25
  if ontologies:
26
  params["ontologies"] = ",".join(ontologies)
27
-
28
- r = requests.get(f"{BIOPORTAL_BASE_URL}/search", params=params)
29
  r.raise_for_status()
30
  data = r.json()
31
-
32
  results = []
33
  for item in data.get("collection", []):
34
  results.append({
35
- "prefLabel": item.get("prefLabel", ""),
36
- "definition": (item.get("definition") or ["No definition"])[0],
37
  "synonyms": item.get("synonym", []),
38
  "ontology": item.get("links", {}).get("ontology"),
39
- "iri": item.get("@id"),
40
  "cui": item.get("cui", []),
41
  "semanticType": item.get("semanticType", []),
42
- "parents": item.get("links", {}).get("parents"),
43
- "children": item.get("links", {}).get("children")
44
  })
45
-
46
  return results
47
 
48
 
49
- def bioportal_autocomplete(prefix: str, ontologies: Optional[List[str]] = None, max_results: int = 10) -> List[str]:
50
  """
51
- Autocomplete search for ontology terms by prefix.
52
  """
53
- if not BIOPORTAL_API_KEY:
54
- raise ValueError("BIOPORTAL_API_KEY not set in environment.")
55
-
56
- params = {
57
- "apikey": BIOPORTAL_API_KEY,
58
- "q": prefix,
59
- "pagesize": max_results
60
- }
61
-
62
- if ontologies:
63
- params["ontologies"] = ",".join(ontologies)
64
-
65
- r = requests.get(f"{BIOPORTAL_BASE_URL}/search", params=params)
66
  r.raise_for_status()
67
- data = r.json()
68
-
69
- return [item.get("prefLabel", "") for item in data.get("collection", []) if item.get("prefLabel")]
70
 
71
 
72
- def get_term_details(ontology_acronym: str, term_id: str) -> Dict:
73
  """
74
- Retrieve full details for a specific term in an ontology.
75
  """
76
- if not BIOPORTAL_API_KEY:
77
- raise ValueError("BIOPORTAL_API_KEY not set in environment.")
78
-
79
- r = requests.get(f"{BIOPORTAL_BASE_URL}/ontologies/{ontology_acronym}/classes/{term_id}", params={"apikey": BIOPORTAL_API_KEY})
80
  r.raise_for_status()
81
  return r.json()
82
 
83
 
84
- def map_to_umls(term: str) -> List[Dict]:
85
  """
86
- Map a biomedical term to UMLS concepts via BioPortal mappings.
 
87
  """
88
- mappings = bioportal_search(term)
89
- umls_concepts = []
90
- for m in mappings:
91
- if "cui" in m and m["cui"]:
92
- umls_concepts.append({
93
- "term": m["prefLabel"],
94
- "cui": m["cui"],
95
- "semanticType": m["semanticType"]
96
- })
97
- return umls_concepts
 
1
  # genesis/api_clients/bioportal_api.py
 
 
2
  import os
3
+ import requests
4
+ from typing import List, Dict, Optional
5
+
6
+ # BioPortal API
7
+ BIOPORTAL_BASE = "https://data.bioontology.org"
8
+ BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY") # You must add this to Hugging Face secrets or .env
9
 
10
+ if not BIOPORTAL_API_KEY:
11
+ raise EnvironmentError("BIOPORTAL_API_KEY is missing. Add it to your environment variables or Hugging Face secrets.")
12
 
13
+ HEADERS = {
14
+ "Authorization": f"apikey token={BIOPORTAL_API_KEY}"
15
+ }
16
+
17
+ def search_terms(query: str, ontologies: Optional[List[str]] = None, max_results: int = 20) -> List[Dict]:
18
  """
19
  Search BioPortal for ontology terms.
20
+ Optionally limit search to specific ontologies like 'GO', 'CHEBI', 'DOID', etc.
 
21
  """
 
 
 
22
  params = {
23
+ "q": query,
24
+ "pagesize": max_results
 
 
25
  }
 
26
  if ontologies:
27
  params["ontologies"] = ",".join(ontologies)
28
+
29
+ r = requests.get(f"{BIOPORTAL_BASE}/search", headers=HEADERS, params=params)
30
  r.raise_for_status()
31
  data = r.json()
32
+
33
  results = []
34
  for item in data.get("collection", []):
35
  results.append({
36
+ "prefLabel": item.get("prefLabel"),
37
+ "definition": item.get("definition", []),
38
  "synonyms": item.get("synonym", []),
39
  "ontology": item.get("links", {}).get("ontology"),
40
+ "uri": item.get("@id"),
41
  "cui": item.get("cui", []),
42
  "semanticType": item.get("semanticType", []),
 
 
43
  })
 
44
  return results
45
 
46
 
47
+ def get_term_details(term_uri: str) -> Dict:
48
  """
49
+ Fetch full details for a given ontology term URI.
50
  """
51
+ encoded_uri = requests.utils.quote(term_uri, safe="")
52
+ r = requests.get(f"{BIOPORTAL_BASE}/ontologies/{encoded_uri}", headers=HEADERS)
 
 
 
 
 
 
 
 
 
 
 
53
  r.raise_for_status()
54
+ return r.json()
 
 
55
 
56
 
57
+ def find_related_terms(term_uri: str) -> List[Dict]:
58
  """
59
+ Find related terms for a given ontology concept.
60
  """
61
+ encoded_uri = requests.utils.quote(term_uri, safe="")
62
+ r = requests.get(f"{BIOPORTAL_BASE}/ontologies/{encoded_uri}/related", headers=HEADERS)
 
 
63
  r.raise_for_status()
64
  return r.json()
65
 
66
 
67
+ def map_to_ontology(entity_name: str, preferred_ontologies: Optional[List[str]] = None) -> Optional[Dict]:
68
  """
69
+ Try to map a free-text biomedical entity name to an ontology concept.
70
+ Useful for linking PubMed entities, ChEMBL molecules, clinical trials to ontology terms.
71
  """
72
+ matches = search_terms(entity_name, ontologies=preferred_ontologies, max_results=1)
73
+ return matches[0] if matches else None