AD2000X commited on
Commit
611ee07
·
verified ·
1 Parent(s): d26b1f1

Update src/ontology_manager.py

Browse files
Files changed (1) hide show
  1. src/ontology_manager.py +448 -439
src/ontology_manager.py CHANGED
@@ -1,440 +1,449 @@
1
- # src/ontology_manager.py
2
-
3
- import json
4
- import networkx as nx
5
- from typing import Dict, List, Any, Optional, Union, Set
6
-
7
- class OntologyManager:
8
- """
9
- Manages the ontology model and provides methods for querying and navigating
10
- the ontological structure.
11
- """
12
-
13
- def __init__(self, ontology_path: str):
14
- """
15
- Initialize the ontology manager with a path to the ontology JSON file.
16
-
17
- Args:
18
- ontology_path: Path to the JSON file containing the ontology model
19
- """
20
- self.ontology_path = ontology_path
21
- self.ontology_data = self._load_ontology()
22
- self.graph = self._build_graph()
23
-
24
- def _load_ontology(self) -> Dict:
25
- """Load the ontology from the JSON file."""
26
- with open(self.ontology_path, 'r') as f:
27
- return json.load(f)
28
-
29
- def _build_graph(self) -> nx.MultiDiGraph:
30
- """Construct a directed graph from the ontology data."""
31
- G = nx.MultiDiGraph()
32
-
33
- # Add class nodes
34
- for class_id, class_data in self.ontology_data["classes"].items():
35
- G.add_node(class_id,
36
- type="class",
37
- description=class_data.get("description", ""),
38
- properties=class_data.get("properties", []))
39
-
40
- # Add subclass relationships
41
- if "subClassOf" in class_data:
42
- G.add_edge(class_id, class_data["subClassOf"],
43
- type="subClassOf")
44
-
45
- # Add relationship type information
46
- self.relationship_info = {r["name"]: r for r in self.ontology_data["relationships"]}
47
-
48
- # Add instance nodes and their relationships
49
- for instance in self.ontology_data["instances"]:
50
- G.add_node(instance["id"],
51
- type="instance",
52
- class_type=instance["type"],
53
- properties=instance.get("properties", {}))
54
-
55
- # Add instance-of-class relationship
56
- G.add_edge(instance["id"], instance["type"], type="instanceOf")
57
-
58
- # Add relationships between instances
59
- for rel in instance.get("relationships", []):
60
- G.add_edge(instance["id"], rel["target"],
61
- type=rel["type"])
62
-
63
- return G
64
-
65
- def get_classes(self) -> List[str]:
66
- """Return a list of all class names in the ontology."""
67
- return list(self.ontology_data["classes"].keys())
68
-
69
- def get_class_hierarchy(self) -> Dict[str, List[str]]:
70
- """Return a dictionary mapping each class to its subclasses."""
71
- hierarchy = {}
72
- for class_id in self.get_classes():
73
- hierarchy[class_id] = []
74
-
75
- for class_id, class_data in self.ontology_data["classes"].items():
76
- if "subClassOf" in class_data:
77
- parent = class_data["subClassOf"]
78
- if parent in hierarchy:
79
- hierarchy[parent].append(class_id)
80
-
81
- return hierarchy
82
-
83
- def get_instances_of_class(self, class_name: str, include_subclasses: bool = True) -> List[str]:
84
- """
85
- Get all instances of a given class.
86
-
87
- Args:
88
- class_name: The name of the class
89
- include_subclasses: Whether to include instances of subclasses
90
-
91
- Returns:
92
- A list of instance IDs
93
- """
94
- if include_subclasses:
95
- # Get all subclasses recursively
96
- subclasses = set(self._get_all_subclasses(class_name))
97
- subclasses.add(class_name)
98
-
99
- # Get instances of all classes
100
- instances = []
101
- for class_id in subclasses:
102
- instances.extend([
103
- n for n, attr in self.graph.nodes(data=True)
104
- if attr.get("type") == "instance" and attr.get("class_type") == class_id
105
- ])
106
- return instances
107
- else:
108
- # Just get direct instances
109
- return [
110
- n for n, attr in self.graph.nodes(data=True)
111
- if attr.get("type") == "instance" and attr.get("class_type") == class_name
112
- ]
113
-
114
- def _get_all_subclasses(self, class_name: str) -> List[str]:
115
- """Recursively get all subclasses of a given class."""
116
- subclasses = []
117
- direct_subclasses = [
118
- src for src, dst, data in self.graph.edges(data=True)
119
- if dst == class_name and data.get("type") == "subClassOf"
120
- ]
121
-
122
- for subclass in direct_subclasses:
123
- subclasses.append(subclass)
124
- subclasses.extend(self._get_all_subclasses(subclass))
125
-
126
- return subclasses
127
-
128
- def get_relationships(self, entity_id: str, relationship_type: Optional[str] = None) -> List[Dict]:
129
- """
130
- Get all relationships for a given entity, optionally filtered by type.
131
-
132
- Args:
133
- entity_id: The ID of the entity
134
- relationship_type: Optional relationship type to filter by
135
-
136
- Returns:
137
- A list of dictionaries containing relationship information
138
- """
139
- relationships = []
140
-
141
- # Look at outgoing edges
142
- for _, target, data in self.graph.out_edges(entity_id, data=True):
143
- rel_type = data.get("type")
144
- if rel_type != "instanceOf" and rel_type != "subClassOf":
145
- if relationship_type is None or rel_type == relationship_type:
146
- relationships.append({
147
- "type": rel_type,
148
- "target": target,
149
- "direction": "outgoing"
150
- })
151
-
152
- # Look at incoming edges
153
- for source, _, data in self.graph.in_edges(entity_id, data=True):
154
- rel_type = data.get("type")
155
- if rel_type != "instanceOf" and rel_type != "subClassOf":
156
- if relationship_type is None or rel_type == relationship_type:
157
- relationships.append({
158
- "type": rel_type,
159
- "source": source,
160
- "direction": "incoming"
161
- })
162
-
163
- return relationships
164
-
165
- def find_paths(self, source_id: str, target_id: str, max_length: int = 3) -> List[List[Dict]]:
166
- """
167
- Find all paths between two entities up to a maximum length.
168
-
169
- Args:
170
- source_id: Starting entity ID
171
- target_id: Target entity ID
172
- max_length: Maximum path length
173
-
174
- Returns:
175
- A list of paths, where each path is a list of relationship dictionaries
176
- """
177
- paths = []
178
-
179
- # Use networkx to find simple paths
180
- simple_paths = nx.all_simple_paths(self.graph, source_id, target_id, cutoff=max_length)
181
-
182
- for path in simple_paths:
183
- path_with_edges = []
184
- for i in range(len(path) - 1):
185
- source = path[i]
186
- target = path[i + 1]
187
- # There may be multiple edges between nodes
188
- edges = self.graph.get_edge_data(source, target)
189
- if edges:
190
- for key, data in edges.items():
191
- path_with_edges.append({
192
- "source": source,
193
- "target": target,
194
- "type": data.get("type", "unknown")
195
- })
196
- paths.append(path_with_edges)
197
-
198
- return paths
199
-
200
- def get_entity_info(self, entity_id: str) -> Dict:
201
- """
202
- Get detailed information about an entity.
203
-
204
- Args:
205
- entity_id: The ID of the entity
206
-
207
- Returns:
208
- A dictionary with entity information
209
- """
210
- if entity_id not in self.graph:
211
- return {}
212
-
213
- node_data = self.graph.nodes[entity_id]
214
- entity_type = node_data.get("type")
215
-
216
- if entity_type == "instance":
217
- # Get class information
218
- class_type = node_data.get("class_type")
219
- class_info = self.ontology_data["classes"].get(class_type, {})
220
-
221
- return {
222
- "id": entity_id,
223
- "type": entity_type,
224
- "class": class_type,
225
- "class_description": class_info.get("description", ""),
226
- "properties": node_data.get("properties", {}),
227
- "relationships": self.get_relationships(entity_id)
228
- }
229
- elif entity_type == "class":
230
- return {
231
- "id": entity_id,
232
- "type": entity_type,
233
- "description": node_data.get("description", ""),
234
- "properties": node_data.get("properties", []),
235
- "subclasses": self._get_all_subclasses(entity_id),
236
- "instances": self.get_instances_of_class(entity_id)
237
- }
238
-
239
- return node_data
240
-
241
- def get_text_representation(self) -> str:
242
- """
243
- Generate a text representation of the ontology for embedding.
244
-
245
- Returns:
246
- A string containing the textual representation of the ontology
247
- """
248
- text_chunks = []
249
-
250
- # Class definitions
251
- for class_id, class_data in self.ontology_data["classes"].items():
252
- chunk = f"Class: {class_id}\n"
253
- chunk += f"Description: {class_data.get('description', '')}\n"
254
-
255
- if "subClassOf" in class_data:
256
- chunk += f"{class_id} is a subclass of {class_data['subClassOf']}.\n"
257
-
258
- if "properties" in class_data:
259
- chunk += f"{class_id} has properties: {', '.join(class_data['properties'])}.\n"
260
-
261
- text_chunks.append(chunk)
262
-
263
- # Relationship definitions
264
- for rel in self.ontology_data["relationships"]:
265
- chunk = f"Relationship: {rel['name']}\n"
266
- chunk += f"Domain: {rel['domain']}, Range: {rel['range']}\n"
267
- chunk += f"Description: {rel.get('description', '')}\n"
268
- chunk += f"Cardinality: {rel.get('cardinality', 'many-to-many')}\n"
269
-
270
- if "inverse" in rel:
271
- chunk += f"The inverse relationship is {rel['inverse']}.\n"
272
-
273
- text_chunks.append(chunk)
274
-
275
- # Rules
276
- for rule in self.ontology_data.get("rules", []):
277
- chunk = f"Rule: {rule.get('id', '')}\n"
278
- chunk += f"Description: {rule.get('description', '')}\n"
279
- text_chunks.append(chunk)
280
-
281
- # Instance data
282
- for instance in self.ontology_data["instances"]:
283
- chunk = f"Instance: {instance['id']}\n"
284
- chunk += f"Type: {instance['type']}\n"
285
-
286
- # Properties
287
- if "properties" in instance:
288
- props = []
289
- for key, value in instance["properties"].items():
290
- if isinstance(value, list):
291
- props.append(f"{key}: {', '.join(str(v) for v in value)}")
292
- else:
293
- props.append(f"{key}: {value}")
294
-
295
- if props:
296
- chunk += "Properties:\n- " + "\n- ".join(props) + "\n"
297
-
298
- # Relationships
299
- if "relationships" in instance:
300
- rels = []
301
- for rel in instance["relationships"]:
302
- rels.append(f"{rel['type']} {rel['target']}")
303
-
304
- if rels:
305
- chunk += "Relationships:\n- " + "\n- ".join(rels) + "\n"
306
-
307
- text_chunks.append(chunk)
308
-
309
- return "\n\n".join(text_chunks)
310
-
311
- def query_by_relationship(self, source_type: str, relationship: str, target_type: str) -> List[Dict]:
312
- """
313
- Query for instances connected by a specific relationship.
314
-
315
- Args:
316
- source_type: Type of the source entity
317
- relationship: Type of relationship
318
- target_type: Type of the target entity
319
-
320
- Returns:
321
- A list of matching relationship dictionaries
322
- """
323
- results = []
324
-
325
- # Get all instances of the source type
326
- source_instances = self.get_instances_of_class(source_type)
327
-
328
- for source_id in source_instances:
329
- # Get relationships of the specified type
330
- relationships = self.get_relationships(source_id, relationship)
331
-
332
- for rel in relationships:
333
- if rel["direction"] == "outgoing" and "target" in rel:
334
- target_id = rel["target"]
335
- target_data = self.graph.nodes[target_id]
336
-
337
- # Check if the target is of the right type
338
- if (target_data.get("type") == "instance" and
339
- target_data.get("class_type") == target_type):
340
- results.append({
341
- "source": source_id,
342
- "source_properties": self.graph.nodes[source_id].get("properties", {}),
343
- "relationship": relationship,
344
- "target": target_id,
345
- "target_properties": target_data.get("properties", {})
346
- })
347
-
348
- return results
349
-
350
- def get_semantic_context(self, query: str) -> List[str]:
351
- """
352
- Retrieve relevant semantic context from the ontology based on a query.
353
-
354
- This method identifies entities and relationships mentioned in the query
355
- and returns contextual information about them from the ontology.
356
-
357
- Args:
358
- query: The query string to analyze
359
-
360
- Returns:
361
- A list of text chunks providing relevant ontological context
362
- """
363
- # This is a simple implementation - a more sophisticated one would use
364
- # entity recognition and semantic parsing
365
-
366
- query_lower = query.lower()
367
- context_chunks = []
368
-
369
- # Check for class mentions
370
- for class_id in self.get_classes():
371
- if class_id.lower() in query_lower:
372
- # Add class information
373
- class_data = self.ontology_data["classes"][class_id]
374
- chunk = f"Class {class_id}: {class_data.get('description', '')}\n"
375
-
376
- # Add subclass information
377
- if "subClassOf" in class_data:
378
- parent = class_data["subClassOf"]
379
- chunk += f"{class_id} is a subclass of {parent}.\n"
380
-
381
- # Add property information
382
- if "properties" in class_data:
383
- chunk += f"{class_id} has properties: {', '.join(class_data['properties'])}.\n"
384
-
385
- context_chunks.append(chunk)
386
-
387
- # Also add some instance examples
388
- instances = self.get_instances_of_class(class_id, include_subclasses=False)[:3]
389
- if instances:
390
- instance_chunk = f"Examples of {class_id}:\n"
391
- for inst_id in instances:
392
- props = self.graph.nodes[inst_id].get("properties", {})
393
- if "name" in props:
394
- instance_chunk += f"- {inst_id} ({props['name']})\n"
395
- else:
396
- instance_chunk += f"- {inst_id}\n"
397
- context_chunks.append(instance_chunk)
398
-
399
- # Check for relationship mentions
400
- for rel in self.ontology_data["relationships"]:
401
- if rel["name"].lower() in query_lower:
402
- chunk = f"Relationship {rel['name']}: {rel.get('description', '')}\n"
403
- chunk += f"This relationship connects {rel['domain']} to {rel['range']}.\n"
404
-
405
- # Add examples
406
- examples = self.query_by_relationship(rel['domain'], rel['name'], rel['range'])[:3]
407
- if examples:
408
- chunk += "Examples:\n"
409
- for ex in examples:
410
- source_props = ex["source_properties"]
411
- target_props = ex["target_properties"]
412
-
413
- source_name = source_props.get("name", ex["source"])
414
- target_name = target_props.get("name", ex["target"])
415
-
416
- chunk += f"- {source_name} {rel['name']} {target_name}\n"
417
-
418
- context_chunks.append(chunk)
419
-
420
- # If we found nothing specific, add general ontology info
421
- if not context_chunks:
422
- # Add information about top-level classes
423
- top_classes = [c for c, data in self.ontology_data["classes"].items()
424
- if "subClassOf" not in data or data["subClassOf"] == "Entity"]
425
-
426
- if top_classes:
427
- chunk = "Main classes in the ontology:\n"
428
- for cls in top_classes:
429
- desc = self.ontology_data["classes"][cls].get("description", "")
430
- chunk += f"- {cls}: {desc}\n"
431
- context_chunks.append(chunk)
432
-
433
- # Add information about key relationships
434
- if self.ontology_data["relationships"]:
435
- chunk = "Key relationships in the ontology:\n"
436
- for rel in self.ontology_data["relationships"][:5]: # Top 5 relationships
437
- chunk += f"- {rel['name']}: {rel.get('description', '')}\n"
438
- context_chunks.append(chunk)
439
-
 
 
 
 
 
 
 
 
 
440
  return context_chunks
 
1
+ # src/ontology_manager.py
2
+
3
+ import json
4
+ import networkx as nx
5
+ from typing import Dict, List, Any, Optional, Union, Set
6
+
7
+ class OntologyManager:
8
+ """
9
+ Manages the ontology model and provides methods for querying and navigating
10
+ the ontological structure.
11
+ """
12
+
13
+ def __init__(self, ontology_path: str):
14
+ """
15
+ Initialize the ontology manager with a path to the ontology JSON file.
16
+
17
+ Args:
18
+ ontology_path: Path to the JSON file containing the ontology model
19
+ """
20
+ self.ontology_path = ontology_path
21
+ self.ontology_data = self._load_ontology()
22
+ self.graph = self._build_graph()
23
+
24
+ def _load_ontology(self) -> Dict:
25
+ """Load the ontology from the JSON file."""
26
+ with open(self.ontology_path, 'r') as f:
27
+ return json.load(f)
28
+
29
+ def _build_graph(self):
30
+ """Build the ontology graph from the JSON data."""
31
+ # Add classes
32
+ for class_id, class_data in self.ontology_data["classes"].items():
33
+ self.graph.add_node(
34
+ class_id,
35
+ type="class",
36
+ description=class_data.get("description", ""),
37
+ properties=class_data.get("properties", [])
38
+ )
39
+
40
+ # Handle subclass relations
41
+ if "subClassOf" in class_data:
42
+ parent = class_data["subClassOf"]
43
+ self.graph.add_edge(class_id, parent, type="subClassOf")
44
+
45
+ # Add relationships (schema-level only, no edge added yet)
46
+ for rel in self.ontology_data.get("relationships", []):
47
+ pass # schema relationships are used for metadata, not edges
48
+
49
+ # Add instances
50
+ for instance in self.ontology_data.get("instances", []):
51
+ instance_id = instance["id"]
52
+ class_type = instance["type"]
53
+ properties = instance.get("properties", {})
54
+
55
+ # Add the instance node
56
+ self.graph.add_node(
57
+ instance_id,
58
+ type="instance",
59
+ class_type=class_type,
60
+ properties=properties
61
+ )
62
+
63
+ # Link instance to its class
64
+ self.graph.add_edge(instance_id, class_type, type="instanceOf")
65
+
66
+ # Add relationship edges if any
67
+ for rel in instance.get("relationships", []):
68
+ target = rel.get("target")
69
+ rel_type = rel.get("type")
70
+ if target and rel_type:
71
+ self.graph.add_edge(instance_id, target, type=rel_type)
72
+
73
+
74
+ def get_classes(self) -> List[str]:
75
+ """Return a list of all class names in the ontology."""
76
+ return list(self.ontology_data["classes"].keys())
77
+
78
+ def get_class_hierarchy(self) -> Dict[str, List[str]]:
79
+ """Return a dictionary mapping each class to its subclasses."""
80
+ hierarchy = {}
81
+ for class_id in self.get_classes():
82
+ hierarchy[class_id] = []
83
+
84
+ for class_id, class_data in self.ontology_data["classes"].items():
85
+ if "subClassOf" in class_data:
86
+ parent = class_data["subClassOf"]
87
+ if parent in hierarchy:
88
+ hierarchy[parent].append(class_id)
89
+
90
+ return hierarchy
91
+
92
+ def get_instances_of_class(self, class_name: str, include_subclasses: bool = True) -> List[str]:
93
+ """
94
+ Get all instances of a given class.
95
+
96
+ Args:
97
+ class_name: The name of the class
98
+ include_subclasses: Whether to include instances of subclasses
99
+
100
+ Returns:
101
+ A list of instance IDs
102
+ """
103
+ if include_subclasses:
104
+ # Get all subclasses recursively
105
+ subclasses = set(self._get_all_subclasses(class_name))
106
+ subclasses.add(class_name)
107
+
108
+ # Get instances of all classes
109
+ instances = []
110
+ for class_id in subclasses:
111
+ instances.extend([
112
+ n for n, attr in self.graph.nodes(data=True)
113
+ if attr.get("type") == "instance" and attr.get("class_type") == class_id
114
+ ])
115
+ return instances
116
+ else:
117
+ # Just get direct instances
118
+ return [
119
+ n for n, attr in self.graph.nodes(data=True)
120
+ if attr.get("type") == "instance" and attr.get("class_type") == class_name
121
+ ]
122
+
123
+ def _get_all_subclasses(self, class_name: str) -> List[str]:
124
+ """Recursively get all subclasses of a given class."""
125
+ subclasses = []
126
+ direct_subclasses = [
127
+ src for src, dst, data in self.graph.edges(data=True)
128
+ if dst == class_name and data.get("type") == "subClassOf"
129
+ ]
130
+
131
+ for subclass in direct_subclasses:
132
+ subclasses.append(subclass)
133
+ subclasses.extend(self._get_all_subclasses(subclass))
134
+
135
+ return subclasses
136
+
137
+ def get_relationships(self, entity_id: str, relationship_type: Optional[str] = None) -> List[Dict]:
138
+ """
139
+ Get all relationships for a given entity, optionally filtered by type.
140
+
141
+ Args:
142
+ entity_id: The ID of the entity
143
+ relationship_type: Optional relationship type to filter by
144
+
145
+ Returns:
146
+ A list of dictionaries containing relationship information
147
+ """
148
+ relationships = []
149
+
150
+ # Look at outgoing edges
151
+ for _, target, data in self.graph.out_edges(entity_id, data=True):
152
+ rel_type = data.get("type")
153
+ if rel_type != "instanceOf" and rel_type != "subClassOf":
154
+ if relationship_type is None or rel_type == relationship_type:
155
+ relationships.append({
156
+ "type": rel_type,
157
+ "target": target,
158
+ "direction": "outgoing"
159
+ })
160
+
161
+ # Look at incoming edges
162
+ for source, _, data in self.graph.in_edges(entity_id, data=True):
163
+ rel_type = data.get("type")
164
+ if rel_type != "instanceOf" and rel_type != "subClassOf":
165
+ if relationship_type is None or rel_type == relationship_type:
166
+ relationships.append({
167
+ "type": rel_type,
168
+ "source": source,
169
+ "direction": "incoming"
170
+ })
171
+
172
+ return relationships
173
+
174
+ def find_paths(self, source_id: str, target_id: str, max_length: int = 3) -> List[List[Dict]]:
175
+ """
176
+ Find all paths between two entities up to a maximum length.
177
+
178
+ Args:
179
+ source_id: Starting entity ID
180
+ target_id: Target entity ID
181
+ max_length: Maximum path length
182
+
183
+ Returns:
184
+ A list of paths, where each path is a list of relationship dictionaries
185
+ """
186
+ paths = []
187
+
188
+ # Use networkx to find simple paths
189
+ simple_paths = nx.all_simple_paths(self.graph, source_id, target_id, cutoff=max_length)
190
+
191
+ for path in simple_paths:
192
+ path_with_edges = []
193
+ for i in range(len(path) - 1):
194
+ source = path[i]
195
+ target = path[i + 1]
196
+ # There may be multiple edges between nodes
197
+ edges = self.graph.get_edge_data(source, target)
198
+ if edges:
199
+ for key, data in edges.items():
200
+ path_with_edges.append({
201
+ "source": source,
202
+ "target": target,
203
+ "type": data.get("type", "unknown")
204
+ })
205
+ paths.append(path_with_edges)
206
+
207
+ return paths
208
+
209
+ def get_entity_info(self, entity_id: str) -> Dict:
210
+ """
211
+ Get detailed information about an entity.
212
+
213
+ Args:
214
+ entity_id: The ID of the entity
215
+
216
+ Returns:
217
+ A dictionary with entity information
218
+ """
219
+ if entity_id not in self.graph:
220
+ return {}
221
+
222
+ node_data = self.graph.nodes[entity_id]
223
+ entity_type = node_data.get("type")
224
+
225
+ if entity_type == "instance":
226
+ # Get class information
227
+ class_type = node_data.get("class_type")
228
+ class_info = self.ontology_data["classes"].get(class_type, {})
229
+
230
+ return {
231
+ "id": entity_id,
232
+ "type": entity_type,
233
+ "class": class_type,
234
+ "class_description": class_info.get("description", ""),
235
+ "properties": node_data.get("properties", {}),
236
+ "relationships": self.get_relationships(entity_id)
237
+ }
238
+ elif entity_type == "class":
239
+ return {
240
+ "id": entity_id,
241
+ "type": entity_type,
242
+ "description": node_data.get("description", ""),
243
+ "properties": node_data.get("properties", []),
244
+ "subclasses": self._get_all_subclasses(entity_id),
245
+ "instances": self.get_instances_of_class(entity_id)
246
+ }
247
+
248
+ return node_data
249
+
250
+ def get_text_representation(self) -> str:
251
+ """
252
+ Generate a text representation of the ontology for embedding.
253
+
254
+ Returns:
255
+ A string containing the textual representation of the ontology
256
+ """
257
+ text_chunks = []
258
+
259
+ # Class definitions
260
+ for class_id, class_data in self.ontology_data["classes"].items():
261
+ chunk = f"Class: {class_id}\n"
262
+ chunk += f"Description: {class_data.get('description', '')}\n"
263
+
264
+ if "subClassOf" in class_data:
265
+ chunk += f"{class_id} is a subclass of {class_data['subClassOf']}.\n"
266
+
267
+ if "properties" in class_data:
268
+ chunk += f"{class_id} has properties: {', '.join(class_data['properties'])}.\n"
269
+
270
+ text_chunks.append(chunk)
271
+
272
+ # Relationship definitions
273
+ for rel in self.ontology_data["relationships"]:
274
+ chunk = f"Relationship: {rel['name']}\n"
275
+ chunk += f"Domain: {rel['domain']}, Range: {rel['range']}\n"
276
+ chunk += f"Description: {rel.get('description', '')}\n"
277
+ chunk += f"Cardinality: {rel.get('cardinality', 'many-to-many')}\n"
278
+
279
+ if "inverse" in rel:
280
+ chunk += f"The inverse relationship is {rel['inverse']}.\n"
281
+
282
+ text_chunks.append(chunk)
283
+
284
+ # Rules
285
+ for rule in self.ontology_data.get("rules", []):
286
+ chunk = f"Rule: {rule.get('id', '')}\n"
287
+ chunk += f"Description: {rule.get('description', '')}\n"
288
+ text_chunks.append(chunk)
289
+
290
+ # Instance data
291
+ for instance in self.ontology_data["instances"]:
292
+ chunk = f"Instance: {instance['id']}\n"
293
+ chunk += f"Type: {instance['type']}\n"
294
+
295
+ # Properties
296
+ if "properties" in instance:
297
+ props = []
298
+ for key, value in instance["properties"].items():
299
+ if isinstance(value, list):
300
+ props.append(f"{key}: {', '.join(str(v) for v in value)}")
301
+ else:
302
+ props.append(f"{key}: {value}")
303
+
304
+ if props:
305
+ chunk += "Properties:\n- " + "\n- ".join(props) + "\n"
306
+
307
+ # Relationships
308
+ if "relationships" in instance:
309
+ rels = []
310
+ for rel in instance["relationships"]:
311
+ rels.append(f"{rel['type']} {rel['target']}")
312
+
313
+ if rels:
314
+ chunk += "Relationships:\n- " + "\n- ".join(rels) + "\n"
315
+
316
+ text_chunks.append(chunk)
317
+
318
+ return "\n\n".join(text_chunks)
319
+
320
+ def query_by_relationship(self, source_type: str, relationship: str, target_type: str) -> List[Dict]:
321
+ """
322
+ Query for instances connected by a specific relationship.
323
+
324
+ Args:
325
+ source_type: Type of the source entity
326
+ relationship: Type of relationship
327
+ target_type: Type of the target entity
328
+
329
+ Returns:
330
+ A list of matching relationship dictionaries
331
+ """
332
+ results = []
333
+
334
+ # Get all instances of the source type
335
+ source_instances = self.get_instances_of_class(source_type)
336
+
337
+ for source_id in source_instances:
338
+ # Get relationships of the specified type
339
+ relationships = self.get_relationships(source_id, relationship)
340
+
341
+ for rel in relationships:
342
+ if rel["direction"] == "outgoing" and "target" in rel:
343
+ target_id = rel["target"]
344
+ target_data = self.graph.nodes[target_id]
345
+
346
+ # Check if the target is of the right type
347
+ if (target_data.get("type") == "instance" and
348
+ target_data.get("class_type") == target_type):
349
+ results.append({
350
+ "source": source_id,
351
+ "source_properties": self.graph.nodes[source_id].get("properties", {}),
352
+ "relationship": relationship,
353
+ "target": target_id,
354
+ "target_properties": target_data.get("properties", {})
355
+ })
356
+
357
+ return results
358
+
359
+ def get_semantic_context(self, query: str) -> List[str]:
360
+ """
361
+ Retrieve relevant semantic context from the ontology based on a query.
362
+
363
+ This method identifies entities and relationships mentioned in the query
364
+ and returns contextual information about them from the ontology.
365
+
366
+ Args:
367
+ query: The query string to analyze
368
+
369
+ Returns:
370
+ A list of text chunks providing relevant ontological context
371
+ """
372
+ # This is a simple implementation - a more sophisticated one would use
373
+ # entity recognition and semantic parsing
374
+
375
+ query_lower = query.lower()
376
+ context_chunks = []
377
+
378
+ # Check for class mentions
379
+ for class_id in self.get_classes():
380
+ if class_id.lower() in query_lower:
381
+ # Add class information
382
+ class_data = self.ontology_data["classes"][class_id]
383
+ chunk = f"Class {class_id}: {class_data.get('description', '')}\n"
384
+
385
+ # Add subclass information
386
+ if "subClassOf" in class_data:
387
+ parent = class_data["subClassOf"]
388
+ chunk += f"{class_id} is a subclass of {parent}.\n"
389
+
390
+ # Add property information
391
+ if "properties" in class_data:
392
+ chunk += f"{class_id} has properties: {', '.join(class_data['properties'])}.\n"
393
+
394
+ context_chunks.append(chunk)
395
+
396
+ # Also add some instance examples
397
+ instances = self.get_instances_of_class(class_id, include_subclasses=False)[:3]
398
+ if instances:
399
+ instance_chunk = f"Examples of {class_id}:\n"
400
+ for inst_id in instances:
401
+ props = self.graph.nodes[inst_id].get("properties", {})
402
+ if "name" in props:
403
+ instance_chunk += f"- {inst_id} ({props['name']})\n"
404
+ else:
405
+ instance_chunk += f"- {inst_id}\n"
406
+ context_chunks.append(instance_chunk)
407
+
408
+ # Check for relationship mentions
409
+ for rel in self.ontology_data["relationships"]:
410
+ if rel["name"].lower() in query_lower:
411
+ chunk = f"Relationship {rel['name']}: {rel.get('description', '')}\n"
412
+ chunk += f"This relationship connects {rel['domain']} to {rel['range']}.\n"
413
+
414
+ # Add examples
415
+ examples = self.query_by_relationship(rel['domain'], rel['name'], rel['range'])[:3]
416
+ if examples:
417
+ chunk += "Examples:\n"
418
+ for ex in examples:
419
+ source_props = ex["source_properties"]
420
+ target_props = ex["target_properties"]
421
+
422
+ source_name = source_props.get("name", ex["source"])
423
+ target_name = target_props.get("name", ex["target"])
424
+
425
+ chunk += f"- {source_name} {rel['name']} {target_name}\n"
426
+
427
+ context_chunks.append(chunk)
428
+
429
+ # If we found nothing specific, add general ontology info
430
+ if not context_chunks:
431
+ # Add information about top-level classes
432
+ top_classes = [c for c, data in self.ontology_data["classes"].items()
433
+ if "subClassOf" not in data or data["subClassOf"] == "Entity"]
434
+
435
+ if top_classes:
436
+ chunk = "Main classes in the ontology:\n"
437
+ for cls in top_classes:
438
+ desc = self.ontology_data["classes"][cls].get("description", "")
439
+ chunk += f"- {cls}: {desc}\n"
440
+ context_chunks.append(chunk)
441
+
442
+ # Add information about key relationships
443
+ if self.ontology_data["relationships"]:
444
+ chunk = "Key relationships in the ontology:\n"
445
+ for rel in self.ontology_data["relationships"][:5]: # Top 5 relationships
446
+ chunk += f"- {rel['name']}: {rel.get('description', '')}\n"
447
+ context_chunks.append(chunk)
448
+
449
  return context_chunks