mgbam commited on
Commit
8e4dca9
·
verified ·
1 Parent(s): ddd033a

Update genesis/ontology.py

Browse files
Files changed (1) hide show
  1. genesis/ontology.py +72 -8
genesis/ontology.py CHANGED
@@ -1,39 +1,59 @@
1
  # genesis/ontology.py
2
  """
3
- Ontology Expansion for GENESIS-AI
4
- Uses UMLS & BioPortal to find related terms, synonyms, and hierarchies.
 
5
  """
6
 
7
  import os
8
  import requests
 
 
 
 
9
 
10
  UMLS_API_KEY = os.getenv("UMLS_API_KEY")
11
  BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY")
12
 
13
- def expand_with_bioportal(term):
 
 
 
 
14
  """Expand term using BioPortal ontology search."""
15
  try:
16
  url = f"https://data.bioontology.org/search?q={term}&apikey={BIOPORTAL_API_KEY}"
17
- r = requests.get(url)
18
  r.raise_for_status()
19
  data = r.json()
20
  return list({res["prefLabel"] for res in data.get("collection", []) if "prefLabel" in res})
21
  except Exception as e:
 
22
  return {"error": str(e)}
23
 
24
- def expand_with_umls(term):
 
 
 
 
25
  """Expand term using UMLS API."""
26
  try:
27
  url = f"https://uts-ws.nlm.nih.gov/rest/search/current?string={term}&apiKey={UMLS_API_KEY}"
28
- r = requests.get(url)
29
  r.raise_for_status()
30
  data = r.json()
31
  return [res["name"] for res in data.get("result", {}).get("results", []) if "name" in res]
32
  except Exception as e:
 
33
  return {"error": str(e)}
34
 
35
- def expand_terms_with_ontology(term):
 
 
 
 
36
  """Combine BioPortal and UMLS expansions."""
 
37
  bioportal_terms = expand_with_bioportal(term)
38
  umls_terms = expand_with_umls(term)
39
  results = set()
@@ -41,4 +61,48 @@ def expand_terms_with_ontology(term):
41
  results.update(bioportal_terms)
42
  if isinstance(umls_terms, list):
43
  results.update(umls_terms)
44
- return list(results) if results else [term]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # genesis/ontology.py
2
  """
3
+ Ontology Expansion & Merging for GENESIS-AI
4
+ Uses UMLS & BioPortal to find related terms, synonyms, and hierarchies,
5
+ and provides merging utilities for deduplication.
6
  """
7
 
8
  import os
9
  import requests
10
+ import logging
11
+ from typing import List, Dict, Union
12
+
13
+ logging.basicConfig(level=logging.INFO)
14
 
15
  UMLS_API_KEY = os.getenv("UMLS_API_KEY")
16
  BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY")
17
 
18
+
19
+ # -------------------------
20
+ # BioPortal Expansion
21
+ # -------------------------
22
+ def expand_with_bioportal(term: str) -> Union[List[str], Dict]:
23
  """Expand term using BioPortal ontology search."""
24
  try:
25
  url = f"https://data.bioontology.org/search?q={term}&apikey={BIOPORTAL_API_KEY}"
26
+ r = requests.get(url, timeout=20)
27
  r.raise_for_status()
28
  data = r.json()
29
  return list({res["prefLabel"] for res in data.get("collection", []) if "prefLabel" in res})
30
  except Exception as e:
31
+ logging.error(f"[Ontology] BioPortal expansion failed: {e}")
32
  return {"error": str(e)}
33
 
34
+
35
+ # -------------------------
36
+ # UMLS Expansion
37
+ # -------------------------
38
+ def expand_with_umls(term: str) -> Union[List[str], Dict]:
39
  """Expand term using UMLS API."""
40
  try:
41
  url = f"https://uts-ws.nlm.nih.gov/rest/search/current?string={term}&apiKey={UMLS_API_KEY}"
42
+ r = requests.get(url, timeout=20)
43
  r.raise_for_status()
44
  data = r.json()
45
  return [res["name"] for res in data.get("result", {}).get("results", []) if "name" in res]
46
  except Exception as e:
47
+ logging.error(f"[Ontology] UMLS expansion failed: {e}")
48
  return {"error": str(e)}
49
 
50
+
51
+ # -------------------------
52
+ # Combined Expansion
53
+ # -------------------------
54
+ def expand_terms_with_ontology(term: str) -> List[str]:
55
  """Combine BioPortal and UMLS expansions."""
56
+ logging.info(f"[Ontology] Expanding term: {term}")
57
  bioportal_terms = expand_with_bioportal(term)
58
  umls_terms = expand_with_umls(term)
59
  results = set()
 
61
  results.update(bioportal_terms)
62
  if isinstance(umls_terms, list):
63
  results.update(umls_terms)
64
+ final_terms = list(results) if results else [term]
65
+ logging.info(f"[Ontology] Found {len(final_terms)} expanded terms.")
66
+ return final_terms
67
+
68
+
69
+ # -------------------------
70
+ # Merge Ontology Terms
71
+ # -------------------------
72
+ def merge_ontology_terms(ontology_lists: List[List[Dict]]) -> List[Dict]:
73
+ """
74
+ Merge ontology term lists from multiple sources and remove duplicates.
75
+
76
+ Args:
77
+ ontology_lists: A list where each element is a list of ontology term dicts.
78
+ Example term: {"id": "GO:0008150", "label": "biological_process"}
79
+
80
+ Returns:
81
+ Merged list of unique ontology terms.
82
+ """
83
+ logging.info("[Ontology] Merging ontology term lists...")
84
+ merged_terms = {}
85
+
86
+ for source_list in ontology_lists:
87
+ for term in source_list:
88
+ if not isinstance(term, dict):
89
+ continue
90
+ term_id = term.get("id") or term.get("iri") or term.get("label")
91
+ if term_id and term_id not in merged_terms:
92
+ merged_terms[term_id] = {
93
+ "id": term_id,
94
+ "label": term.get("label", ""),
95
+ "source": term.get("source", "unknown")
96
+ }
97
+
98
+ merged_list = list(merged_terms.values())
99
+ logging.info(f"[Ontology] Merged total {len(merged_list)} unique terms.")
100
+ return merged_list
101
+
102
+
103
+ __all__ = [
104
+ "expand_with_bioportal",
105
+ "expand_with_umls",
106
+ "expand_terms_with_ontology",
107
+ "merge_ontology_terms"
108
+ ]