mgbam commited on
Commit
78f5084
·
verified ·
1 Parent(s): 05b255c

Update genesis/ontology.py

Browse files
Files changed (1) hide show
  1. genesis/ontology.py +23 -43
genesis/ontology.py CHANGED
@@ -1,8 +1,7 @@
1
  # genesis/ontology.py
2
  """
3
  Ontology Expansion & Merging for GENESIS-AI
4
- Uses UMLS & BioPortal to find related terms, synonyms, and hierarchies,
5
- and provides merging utilities for deduplication.
6
  """
7
 
8
  import os
@@ -48,61 +47,42 @@ def expand_with_umls(term: str) -> Union[List[str], Dict]:
48
  return {"error": str(e)}
49
 
50
 
51
- # -------------------------
52
- # Combined Expansion
53
- # -------------------------
54
- def expand_terms_with_ontology(term: str) -> List[str]:
55
- """Combine BioPortal and UMLS expansions."""
56
- logging.info(f"[Ontology] Expanding term: {term}")
57
- bioportal_terms = expand_with_bioportal(term)
58
- umls_terms = expand_with_umls(term)
59
- results = set()
60
- if isinstance(bioportal_terms, list):
61
- results.update(bioportal_terms)
62
- if isinstance(umls_terms, list):
63
- results.update(umls_terms)
64
- final_terms = list(results) if results else [term]
65
- logging.info(f"[Ontology] Found {len(final_terms)} expanded terms.")
66
- return final_terms
67
-
68
-
69
  # -------------------------
70
  # Merge Ontology Terms
71
  # -------------------------
72
- def merge_ontology_terms(ontology_lists: List[List[Dict]]) -> List[Dict]:
 
 
73
  """
74
- Merge ontology term lists from multiple sources and remove duplicates.
75
 
76
  Args:
77
- ontology_lists: A list where each element is a list of ontology term dicts.
78
- Example term: {"id": "GO:0008150", "label": "biological_process"}
 
79
 
80
  Returns:
81
- Merged list of unique ontology terms.
82
  """
83
- logging.info("[Ontology] Merging ontology term lists...")
84
- merged_terms = {}
85
-
86
- for source_list in ontology_lists:
87
- for term in source_list:
88
- if not isinstance(term, dict):
89
- continue
90
- term_id = term.get("id") or term.get("iri") or term.get("label")
91
- if term_id and term_id not in merged_terms:
92
- merged_terms[term_id] = {
93
- "id": term_id,
94
- "label": term.get("label", ""),
95
- "source": term.get("source", "unknown")
96
- }
97
-
98
- merged_list = list(merged_terms.values())
99
- logging.info(f"[Ontology] Merged total {len(merged_list)} unique terms.")
100
  return merged_list
101
 
102
 
103
  __all__ = [
104
  "expand_with_bioportal",
105
  "expand_with_umls",
106
- "expand_terms_with_ontology",
107
  "merge_ontology_terms"
108
  ]
 
1
  # genesis/ontology.py
2
  """
3
  Ontology Expansion & Merging for GENESIS-AI
4
+ Uses UMLS & BioPortal to find related terms, synonyms, and hierarchies.
 
5
  """
6
 
7
  import os
 
47
  return {"error": str(e)}
48
 
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  # -------------------------
51
  # Merge Ontology Terms
52
  # -------------------------
53
+ def merge_ontology_terms(query_term: str,
54
+ umls_terms: Union[List[str], Dict, None] = None,
55
+ bioportal_terms: Union[List[str], Dict, None] = None) -> List[str]:
56
  """
57
+ Merge the original query term with expanded terms from UMLS and BioPortal.
58
 
59
  Args:
60
+ query_term: Original search term.
61
+ umls_terms: List or error dict from expand_with_umls().
62
+ bioportal_terms: List or error dict from expand_with_bioportal().
63
 
64
  Returns:
65
+ List of unique merged terms (strings).
66
  """
67
+ logging.info(f"[Ontology] Merging terms for query: {query_term}")
68
+ results = set()
69
+
70
+ if query_term:
71
+ results.add(query_term.strip())
72
+
73
+ if isinstance(umls_terms, list):
74
+ results.update(t.strip() for t in umls_terms if t and isinstance(t, str))
75
+
76
+ if isinstance(bioportal_terms, list):
77
+ results.update(t.strip() for t in bioportal_terms if t and isinstance(t, str))
78
+
79
+ merged_list = list(results)
80
+ logging.info(f"[Ontology] Merged {len(merged_list)} unique terms.")
 
 
 
81
  return merged_list
82
 
83
 
84
  __all__ = [
85
  "expand_with_bioportal",
86
  "expand_with_umls",
 
87
  "merge_ontology_terms"
88
  ]