Spaces:
Sleeping
Sleeping
Update genesis/ontology.py
Browse files- genesis/ontology.py +72 -8
genesis/ontology.py
CHANGED
@@ -1,39 +1,59 @@
|
|
1 |
# genesis/ontology.py
|
2 |
"""
|
3 |
-
Ontology Expansion for GENESIS-AI
|
4 |
-
Uses UMLS & BioPortal to find related terms, synonyms, and hierarchies
|
|
|
5 |
"""
|
6 |
|
7 |
import os
|
8 |
import requests
|
|
|
|
|
|
|
|
|
9 |
|
10 |
UMLS_API_KEY = os.getenv("UMLS_API_KEY")
|
11 |
BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY")
|
12 |
|
13 |
-
|
|
|
|
|
|
|
|
|
14 |
"""Expand term using BioPortal ontology search."""
|
15 |
try:
|
16 |
url = f"https://data.bioontology.org/search?q={term}&apikey={BIOPORTAL_API_KEY}"
|
17 |
-
r = requests.get(url)
|
18 |
r.raise_for_status()
|
19 |
data = r.json()
|
20 |
return list({res["prefLabel"] for res in data.get("collection", []) if "prefLabel" in res})
|
21 |
except Exception as e:
|
|
|
22 |
return {"error": str(e)}
|
23 |
|
24 |
-
|
|
|
|
|
|
|
|
|
25 |
"""Expand term using UMLS API."""
|
26 |
try:
|
27 |
url = f"https://uts-ws.nlm.nih.gov/rest/search/current?string={term}&apiKey={UMLS_API_KEY}"
|
28 |
-
r = requests.get(url)
|
29 |
r.raise_for_status()
|
30 |
data = r.json()
|
31 |
return [res["name"] for res in data.get("result", {}).get("results", []) if "name" in res]
|
32 |
except Exception as e:
|
|
|
33 |
return {"error": str(e)}
|
34 |
|
35 |
-
|
|
|
|
|
|
|
|
|
36 |
"""Combine BioPortal and UMLS expansions."""
|
|
|
37 |
bioportal_terms = expand_with_bioportal(term)
|
38 |
umls_terms = expand_with_umls(term)
|
39 |
results = set()
|
@@ -41,4 +61,48 @@ def expand_terms_with_ontology(term):
|
|
41 |
results.update(bioportal_terms)
|
42 |
if isinstance(umls_terms, list):
|
43 |
results.update(umls_terms)
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# genesis/ontology.py
|
2 |
"""
|
3 |
+
Ontology Expansion & Merging for GENESIS-AI
|
4 |
+
Uses UMLS & BioPortal to find related terms, synonyms, and hierarchies,
|
5 |
+
and provides merging utilities for deduplication.
|
6 |
"""
|
7 |
|
8 |
import os
|
9 |
import requests
|
10 |
+
import logging
|
11 |
+
from typing import List, Dict, Union
|
12 |
+
|
13 |
+
logging.basicConfig(level=logging.INFO)
|
14 |
|
15 |
UMLS_API_KEY = os.getenv("UMLS_API_KEY")
|
16 |
BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY")
|
17 |
|
18 |
+
|
19 |
+
# -------------------------
|
20 |
+
# BioPortal Expansion
|
21 |
+
# -------------------------
|
22 |
+
def expand_with_bioportal(term: str) -> Union[List[str], Dict]:
|
23 |
"""Expand term using BioPortal ontology search."""
|
24 |
try:
|
25 |
url = f"https://data.bioontology.org/search?q={term}&apikey={BIOPORTAL_API_KEY}"
|
26 |
+
r = requests.get(url, timeout=20)
|
27 |
r.raise_for_status()
|
28 |
data = r.json()
|
29 |
return list({res["prefLabel"] for res in data.get("collection", []) if "prefLabel" in res})
|
30 |
except Exception as e:
|
31 |
+
logging.error(f"[Ontology] BioPortal expansion failed: {e}")
|
32 |
return {"error": str(e)}
|
33 |
|
34 |
+
|
35 |
+
# -------------------------
|
36 |
+
# UMLS Expansion
|
37 |
+
# -------------------------
|
38 |
+
def expand_with_umls(term: str) -> Union[List[str], Dict]:
|
39 |
"""Expand term using UMLS API."""
|
40 |
try:
|
41 |
url = f"https://uts-ws.nlm.nih.gov/rest/search/current?string={term}&apiKey={UMLS_API_KEY}"
|
42 |
+
r = requests.get(url, timeout=20)
|
43 |
r.raise_for_status()
|
44 |
data = r.json()
|
45 |
return [res["name"] for res in data.get("result", {}).get("results", []) if "name" in res]
|
46 |
except Exception as e:
|
47 |
+
logging.error(f"[Ontology] UMLS expansion failed: {e}")
|
48 |
return {"error": str(e)}
|
49 |
|
50 |
+
|
51 |
+
# -------------------------
|
52 |
+
# Combined Expansion
|
53 |
+
# -------------------------
|
54 |
+
def expand_terms_with_ontology(term: str) -> List[str]:
|
55 |
"""Combine BioPortal and UMLS expansions."""
|
56 |
+
logging.info(f"[Ontology] Expanding term: {term}")
|
57 |
bioportal_terms = expand_with_bioportal(term)
|
58 |
umls_terms = expand_with_umls(term)
|
59 |
results = set()
|
|
|
61 |
results.update(bioportal_terms)
|
62 |
if isinstance(umls_terms, list):
|
63 |
results.update(umls_terms)
|
64 |
+
final_terms = list(results) if results else [term]
|
65 |
+
logging.info(f"[Ontology] Found {len(final_terms)} expanded terms.")
|
66 |
+
return final_terms
|
67 |
+
|
68 |
+
|
69 |
+
# -------------------------
|
70 |
+
# Merge Ontology Terms
|
71 |
+
# -------------------------
|
72 |
+
def merge_ontology_terms(ontology_lists: List[List[Dict]]) -> List[Dict]:
|
73 |
+
"""
|
74 |
+
Merge ontology term lists from multiple sources and remove duplicates.
|
75 |
+
|
76 |
+
Args:
|
77 |
+
ontology_lists: A list where each element is a list of ontology term dicts.
|
78 |
+
Example term: {"id": "GO:0008150", "label": "biological_process"}
|
79 |
+
|
80 |
+
Returns:
|
81 |
+
Merged list of unique ontology terms.
|
82 |
+
"""
|
83 |
+
logging.info("[Ontology] Merging ontology term lists...")
|
84 |
+
merged_terms = {}
|
85 |
+
|
86 |
+
for source_list in ontology_lists:
|
87 |
+
for term in source_list:
|
88 |
+
if not isinstance(term, dict):
|
89 |
+
continue
|
90 |
+
term_id = term.get("id") or term.get("iri") or term.get("label")
|
91 |
+
if term_id and term_id not in merged_terms:
|
92 |
+
merged_terms[term_id] = {
|
93 |
+
"id": term_id,
|
94 |
+
"label": term.get("label", ""),
|
95 |
+
"source": term.get("source", "unknown")
|
96 |
+
}
|
97 |
+
|
98 |
+
merged_list = list(merged_terms.values())
|
99 |
+
logging.info(f"[Ontology] Merged total {len(merged_list)} unique terms.")
|
100 |
+
return merged_list
|
101 |
+
|
102 |
+
|
103 |
+
__all__ = [
|
104 |
+
"expand_with_bioportal",
|
105 |
+
"expand_with_umls",
|
106 |
+
"expand_terms_with_ontology",
|
107 |
+
"merge_ontology_terms"
|
108 |
+
]
|