mgbam commited on
Commit
f96a85c
·
verified ·
1 Parent(s): 9fbaf8f

Update genesis/api_clients/bioportal_api.py

Browse files
Files changed (1) hide show
  1. genesis/api_clients/bioportal_api.py +48 -57
genesis/api_clients/bioportal_api.py CHANGED
@@ -1,78 +1,69 @@
1
  # genesis/api_clients/bioportal_api.py
2
  import requests
3
- import os
4
- from urllib.parse import quote
5
 
6
- BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY") # Store this in Hugging Face secrets
7
  BIOPORTAL_BASE_URL = "https://data.bioontology.org"
 
8
 
9
- if not BIOPORTAL_API_KEY:
10
- raise ValueError("BIOPORTAL_API_KEY not found in environment variables.")
11
-
12
- HEADERS = {
13
- "Authorization": f"apikey token={BIOPORTAL_API_KEY}"
14
- }
15
-
16
-
17
- def search_concept(term: str, ontology: str = None, max_results: int = 10):
18
  """
19
- Search for a biomedical concept across BioPortal ontologies.
20
-
21
- Args:
22
- term (str): The search term (e.g., "BRCA1", "glioblastoma").
23
- ontology (str): Optional ontology acronym (e.g., "NCIT", "SNOMEDCT").
24
- max_results (int): Maximum number of results.
25
  """
 
26
  params = {
27
  "q": term,
28
  "pagesize": max_results
29
  }
30
- if ontology:
31
- params["ontology"] = ontology
32
-
33
- url = f"{BIOPORTAL_BASE_URL}/search"
34
- response = requests.get(url, headers=HEADERS, params=params)
35
- response.raise_for_status()
36
- return response.json().get("collection", [])
37
 
 
 
 
38
 
39
- def get_concept_details(ontology_acronym: str, concept_id: str):
40
- """
41
- Fetch detailed information about a concept given ontology acronym and concept ID.
42
- """
43
- encoded_id = quote(concept_id, safe="")
44
- url = f"{BIOPORTAL_BASE_URL}/ontologies/{ontology_acronym}/classes/{encoded_id}"
45
- response = requests.get(url, headers=HEADERS)
46
- response.raise_for_status()
47
- return response.json()
 
 
48
 
49
 
50
- def get_ontologies():
51
  """
52
- List all available ontologies in BioPortal.
53
  """
54
- url = f"{BIOPORTAL_BASE_URL}/ontologies"
55
- response = requests.get(url, headers=HEADERS)
56
- response.raise_for_status()
57
- return [
58
- {"acronym": o.get("acronym"), "name": o.get("name"), "description": o.get("description")}
59
- for o in response.json()
60
- ]
 
 
 
 
 
 
61
 
62
 
63
- def map_term_across_ontologies(term: str, ontologies: list = None):
64
  """
65
- Map a term across multiple ontologies to unify synonyms and cross-references.
 
66
  """
67
- mappings = {}
68
- target_ontologies = ontologies or [o["acronym"] for o in get_ontologies()]
69
-
70
- for ont in target_ontologies:
71
- results = search_concept(term, ontology=ont, max_results=1)
72
- if results:
73
- mappings[ont] = {
74
- "label": results[0].get("prefLabel"),
75
- "uri": results[0].get("@id"),
76
- "synonyms": results[0].get("synonym", []),
77
- }
78
- return mappings
 
1
  # genesis/api_clients/bioportal_api.py
2
  import requests
3
+ from typing import List, Dict, Optional
 
4
 
 
5
  BIOPORTAL_BASE_URL = "https://data.bioontology.org"
6
+ BIOPORTAL_API_KEY = None # Load from secrets in Hugging Face or local .env
7
 
8
+ def search_term(term: str, ontologies: Optional[List[str]] = None, max_results: int = 20) -> List[Dict]:
 
 
 
 
 
 
 
 
9
  """
10
+ Search BioPortal for a term across one or more ontologies.
11
+ If no ontologies specified, searches all available ones.
 
 
 
 
12
  """
13
+ headers = {"Authorization": f"apikey token={BIOPORTAL_API_KEY}"}
14
  params = {
15
  "q": term,
16
  "pagesize": max_results
17
  }
18
+ if ontologies:
19
+ params["ontologies"] = ",".join(ontologies)
 
 
 
 
 
20
 
21
+ r = requests.get(f"{BIOPORTAL_BASE_URL}/search", headers=headers, params=params)
22
+ r.raise_for_status()
23
+ data = r.json()
24
 
25
+ results = []
26
+ for result in data.get("collection", []):
27
+ results.append({
28
+ "pref_label": result.get("prefLabel"),
29
+ "definition": result.get("definition", ["No definition found"])[0],
30
+ "synonyms": result.get("synonym", []),
31
+ "ontology": result.get("links", {}).get("ontology"),
32
+ "iri": result.get("@id"),
33
+ "score": result.get("score", 0)
34
+ })
35
+ return results
36
 
37
 
38
+ def get_concept_details(ontology_acronym: str, concept_id: str) -> Dict:
39
  """
40
+ Fetch detailed metadata for a specific ontology concept.
41
  """
42
+ headers = {"Authorization": f"apikey token={BIOPORTAL_API_KEY}"}
43
+ r = requests.get(f"{BIOPORTAL_BASE_URL}/ontologies/{ontology_acronym}/classes/{concept_id}",
44
+ headers=headers)
45
+ r.raise_for_status()
46
+ data = r.json()
47
+
48
+ return {
49
+ "pref_label": data.get("prefLabel"),
50
+ "definition": data.get("definition", ["No definition found"])[0],
51
+ "synonyms": data.get("synonym", []),
52
+ "properties": data.get("properties", {}),
53
+ "relations": data.get("links", {})
54
+ }
55
 
56
 
57
+ def map_to_ontologies(term: str, ontologies: List[str]) -> Dict[str, List[str]]:
58
  """
59
+ Map a given term to equivalent terms in multiple ontologies.
60
+ Returns a dictionary mapping ontology → list of equivalent labels.
61
  """
62
+ results = search_term(term, ontologies=ontologies)
63
+ mapping = {}
64
+ for res in results:
65
+ onto_name = res.get("ontology", "Unknown")
66
+ if onto_name not in mapping:
67
+ mapping[onto_name] = []
68
+ mapping[onto_name].append(res.get("pref_label"))
69
+ return mapping