""" Causal Reasoning System for Cybersecurity Events Understanding cause-effect relationships in security incidents and attack chains """ import json import numpy as np import pandas as pd from typing import Dict, List, Optional, Any, Tuple, Set from dataclasses import dataclass, asdict from datetime import datetime, timedelta from collections import defaultdict, deque import networkx as nx from enum import Enum import logging class CausalRelationType(Enum): """Types of causal relationships in cybersecurity events""" DIRECT_CAUSE = "direct_cause" INDIRECT_CAUSE = "indirect_cause" ENABLING_CONDITION = "enabling_condition" TEMPORAL_SEQUENCE = "temporal_sequence" CORRELATED = "correlated" SPURIOUS = "spurious" class ConfidenceLevel(Enum): """Confidence levels for causal inferences""" HIGH = "high" # > 0.8 MEDIUM = "medium" # 0.5 - 0.8 LOW = "low" # 0.2 - 0.5 UNCERTAIN = "uncertain" # < 0.2 @dataclass class SecurityEvent: """Represents a security event with temporal and contextual information""" event_id: str timestamp: datetime event_type: str source: str target: str severity: str attributes: Dict[str, Any] context: Dict[str, Any] @dataclass class CausalHypothesis: """A hypothesis about causal relationship between events""" hypothesis_id: str cause_event_id: str effect_event_id: str relationship_type: CausalRelationType confidence_score: float confidence_level: ConfidenceLevel evidence: Dict[str, Any] temporal_gap: float # seconds mechanism: str created_at: str @dataclass class CausalChain: """A chain of causally linked security events""" chain_id: str events: List[SecurityEvent] causal_links: List[CausalHypothesis] root_cause: str final_effect: str attack_pattern: str mitigation_points: List[str] created_at: str class CausalInferenceEngine: """Advanced causal inference engine for cybersecurity events""" def __init__(self, max_temporal_gap: int = 3600): self.max_temporal_gap = max_temporal_gap # Maximum gap in seconds for causal consideration self.events = [] self.causal_graph = nx.DiGraph() self.causal_patterns = self._load_causal_patterns() self.logger = logging.getLogger(__name__) # Causal inference models self.temporal_models = self._initialize_temporal_models() self.correlation_threshold = 0.7 self.causation_threshold = 0.6 def _load_causal_patterns(self) -> Dict[str, Any]: """Load known causal patterns in cybersecurity""" return { "attack_chains": { "lateral_movement": { "pattern": ["reconnaissance", "initial_access", "persistence", "privilege_escalation", "lateral_movement"], "temporal_constraints": [300, 1800, 900, 600], # Max seconds between stages "confidence_boost": 0.2 }, "data_exfiltration": { "pattern": ["initial_access", "discovery", "collection", "exfiltration"], "temporal_constraints": [1800, 3600, 1200], "confidence_boost": 0.3 }, "ransomware": { "pattern": ["initial_access", "persistence", "privilege_escalation", "lateral_movement", "encryption"], "temporal_constraints": [600, 1200, 900, 300], "confidence_boost": 0.25 } }, "vulnerability_exploitation": { "pattern": ["vulnerability_scan", "exploit_attempt", "successful_exploitation"], "temporal_constraints": [300, 60], "confidence_boost": 0.4 }, "insider_threat": { "pattern": ["anomalous_access", "data_access", "data_transfer"], "temporal_constraints": [1800, 900], "confidence_boost": 0.15 } } def _initialize_temporal_models(self) -> Dict[str, Any]: """Initialize temporal causal inference models""" return { "granger_causality": { "window_size": 10, "max_lag": 5, "significance_level": 0.05 }, "transfer_entropy": { "k_history": 3, "embedding_dim": 2, "threshold": 0.1 }, "ccm": { # Convergent Cross Mapping "embedding_dim": 3, "tau": 1, "library_size_range": (10, 100) } } def add_event(self, event: SecurityEvent) -> None: """Add a security event to the analysis""" self.events.append(event) self.causal_graph.add_node(event.event_id, event=event) # Update causal relationships self._update_causal_relationships(event) def _update_causal_relationships(self, new_event: SecurityEvent) -> None: """Update causal relationships when a new event is added""" # Look for potential causal relationships with recent events recent_events = [ e for e in self.events if abs((new_event.timestamp - e.timestamp).total_seconds()) <= self.max_temporal_gap and e.event_id != new_event.event_id ] for event in recent_events: hypothesis = self._generate_causal_hypothesis(event, new_event) if hypothesis and hypothesis.confidence_score >= 0.2: self._add_causal_edge(hypothesis) def _generate_causal_hypothesis(self, cause_event: SecurityEvent, effect_event: SecurityEvent) -> Optional[CausalHypothesis]: """Generate a causal hypothesis between two events""" # Check temporal order if cause_event.timestamp >= effect_event.timestamp: return None temporal_gap = (effect_event.timestamp - cause_event.timestamp).total_seconds() if temporal_gap > self.max_temporal_gap: return None # Calculate confidence score based on multiple factors confidence_factors = { "temporal_proximity": self._calculate_temporal_confidence(temporal_gap), "semantic_similarity": self._calculate_semantic_similarity(cause_event, effect_event), "pattern_match": self._calculate_pattern_match(cause_event, effect_event), "contextual_similarity": self._calculate_contextual_similarity(cause_event, effect_event), "causal_mechanism": self._identify_causal_mechanism(cause_event, effect_event) } # Weighted confidence score weights = { "temporal_proximity": 0.2, "semantic_similarity": 0.25, "pattern_match": 0.3, "contextual_similarity": 0.15, "causal_mechanism": 0.1 } confidence_score = sum( confidence_factors[factor] * weights[factor] for factor in confidence_factors ) # Determine relationship type relationship_type = self._determine_relationship_type( cause_event, effect_event, confidence_factors ) # Determine confidence level if confidence_score > 0.8: confidence_level = ConfidenceLevel.HIGH elif confidence_score > 0.5: confidence_level = ConfidenceLevel.MEDIUM elif confidence_score > 0.2: confidence_level = ConfidenceLevel.LOW else: confidence_level = ConfidenceLevel.UNCERTAIN # Generate mechanism explanation mechanism = self._generate_mechanism_explanation(cause_event, effect_event, confidence_factors) hypothesis = CausalHypothesis( hypothesis_id=f"hyp_{cause_event.event_id}_{effect_event.event_id}", cause_event_id=cause_event.event_id, effect_event_id=effect_event.event_id, relationship_type=relationship_type, confidence_score=confidence_score, confidence_level=confidence_level, evidence=confidence_factors, temporal_gap=temporal_gap, mechanism=mechanism, created_at=datetime.now().isoformat() ) return hypothesis def _calculate_temporal_confidence(self, temporal_gap: float) -> float: """Calculate confidence based on temporal proximity""" # Exponential decay function return np.exp(-temporal_gap / 600) # 600 seconds half-life def _calculate_semantic_similarity(self, event1: SecurityEvent, event2: SecurityEvent) -> float: """Calculate semantic similarity between events""" # Simple keyword-based similarity (in production, use embeddings) keywords1 = set(event1.event_type.lower().split() + list(event1.attributes.get('keywords', []))) keywords2 = set(event2.event_type.lower().split() + list(event2.attributes.get('keywords', []))) if not keywords1 or not keywords2: return 0.0 intersection = len(keywords1 & keywords2) union = len(keywords1 | keywords2) return intersection / union if union > 0 else 0.0 def _calculate_pattern_match(self, cause_event: SecurityEvent, effect_event: SecurityEvent) -> float: """Calculate how well events match known causal patterns""" max_match = 0.0 for pattern_name, pattern_info in self.causal_patterns.items(): if isinstance(pattern_info, dict) and 'pattern' in pattern_info: pattern = pattern_info['pattern'] confidence_boost = pattern_info.get('confidence_boost', 0.1) # Check if events match consecutive steps in pattern try: cause_idx = pattern.index(cause_event.event_type.lower()) effect_idx = pattern.index(effect_event.event_type.lower()) if effect_idx == cause_idx + 1: match_score = 0.8 + confidence_boost max_match = max(max_match, match_score) elif effect_idx > cause_idx: # Non-consecutive but in sequence gap_penalty = (effect_idx - cause_idx - 1) * 0.1 match_score = max(0.3, 0.6 - gap_penalty + confidence_boost) max_match = max(max_match, match_score) except ValueError: continue return min(1.0, max_match) def _calculate_contextual_similarity(self, event1: SecurityEvent, event2: SecurityEvent) -> float: """Calculate contextual similarity (same host, network, user, etc.)""" context_matches = 0 total_contexts = 0 contexts_to_check = ['source', 'target', 'user', 'host', 'network', 'process'] for context in contexts_to_check: val1 = getattr(event1, context, None) or event1.context.get(context) val2 = getattr(event2, context, None) or event2.context.get(context) if val1 is not None and val2 is not None: total_contexts += 1 if val1 == val2: context_matches += 1 return context_matches / total_contexts if total_contexts > 0 else 0.0 def _identify_causal_mechanism(self, cause_event: SecurityEvent, effect_event: SecurityEvent) -> float: """Identify potential causal mechanisms""" mechanisms = { "exploitation": ["exploit", "vulnerability", "compromise"], "lateral_movement": ["login", "access", "connection"], "persistence": ["install", "create", "modify"], "exfiltration": ["copy", "transfer", "download"] } cause_type = cause_event.event_type.lower() effect_type = effect_event.event_type.lower() for mechanism, keywords in mechanisms.items(): if any(kw in cause_type for kw in keywords) and any(kw in effect_type for kw in keywords): return 0.7 return 0.3 def _determine_relationship_type(self, cause_event: SecurityEvent, effect_event: SecurityEvent, confidence_factors: Dict[str, float]) -> CausalRelationType: """Determine the type of causal relationship""" if confidence_factors["pattern_match"] > 0.7: return CausalRelationType.DIRECT_CAUSE elif confidence_factors["temporal_proximity"] > 0.8 and confidence_factors["contextual_similarity"] > 0.6: return CausalRelationType.DIRECT_CAUSE elif confidence_factors["semantic_similarity"] > 0.5: return CausalRelationType.INDIRECT_CAUSE elif confidence_factors["temporal_proximity"] > 0.5: return CausalRelationType.TEMPORAL_SEQUENCE else: return CausalRelationType.CORRELATED def _generate_mechanism_explanation(self, cause_event: SecurityEvent, effect_event: SecurityEvent, confidence_factors: Dict[str, float]) -> str: """Generate human-readable explanation of causal mechanism""" cause_type = cause_event.event_type effect_type = effect_event.event_type temporal_gap = (effect_event.timestamp - cause_event.timestamp).total_seconds() if confidence_factors["pattern_match"] > 0.7: return f"'{cause_type}' directly enabled '{effect_type}' as part of a known attack pattern" elif confidence_factors["contextual_similarity"] > 0.6: return f"'{cause_type}' on same system/user likely caused '{effect_type}' ({temporal_gap:.0f}s later)" elif temporal_gap < 60: return f"'{cause_type}' immediately preceded '{effect_type}' ({temporal_gap:.0f}s gap)" else: return f"'{cause_type}' may have contributed to conditions enabling '{effect_type}'" def _add_causal_edge(self, hypothesis: CausalHypothesis) -> None: """Add a causal edge to the graph""" self.causal_graph.add_edge( hypothesis.cause_event_id, hypothesis.effect_event_id, hypothesis=hypothesis, weight=hypothesis.confidence_score ) def identify_attack_chains(self, min_confidence: float = 0.4) -> List[CausalChain]: """Identify causal attack chains from the event graph""" chains = [] # Find all paths in the causal graph for root_node in self.causal_graph.nodes(): # Check if this could be a root cause (few incoming edges) if self.causal_graph.in_degree(root_node) <= 1: paths = self._find_causal_paths_from_root(root_node, min_confidence) for path in paths: if len(path) >= 2: # At least 2 events for a chain chain = self._create_causal_chain(path) if chain: chains.append(chain) return chains def _find_causal_paths_from_root(self, root_node: str, min_confidence: float) -> List[List[str]]: """Find all causal paths starting from a root node""" paths = [] def dfs(node: str, current_path: List[str], visited: Set[str]): if node in visited: return visited.add(node) current_path.append(node) # Get successors with sufficient confidence successors = [] for successor in self.causal_graph.successors(node): edge_data = self.causal_graph.get_edge_data(node, successor) if edge_data and edge_data.get('weight', 0) >= min_confidence: successors.append(successor) if not successors: # End of path if len(current_path) >= 2: paths.append(current_path.copy()) else: for successor in successors: dfs(successor, current_path, visited.copy()) current_path.pop() dfs(root_node, [], set()) return paths def _create_causal_chain(self, event_path: List[str]) -> Optional[CausalChain]: """Create a causal chain from a path of events""" events = [] causal_links = [] # Get events for path event_dict = {event.event_id: event for event in self.events} for event_id in event_path: if event_id in event_dict: events.append(event_dict[event_id]) if len(events) < 2: return None # Get causal links for i in range(len(event_path) - 1): edge_data = self.causal_graph.get_edge_data(event_path[i], event_path[i + 1]) if edge_data and 'hypothesis' in edge_data: causal_links.append(edge_data['hypothesis']) # Identify attack pattern attack_pattern = self._identify_attack_pattern(events) # Identify mitigation points mitigation_points = self._identify_mitigation_points(events, causal_links) chain = CausalChain( chain_id=f"chain_{datetime.now().strftime('%Y%m%d_%H%M%S_%f')}", events=events, causal_links=causal_links, root_cause=events[0].event_id, final_effect=events[-1].event_id, attack_pattern=attack_pattern, mitigation_points=mitigation_points, created_at=datetime.now().isoformat() ) return chain def _identify_attack_pattern(self, events: List[SecurityEvent]) -> str: """Identify the attack pattern represented by event sequence""" event_types = [event.event_type.lower() for event in events] # Check against known patterns for pattern_name, pattern_info in self.causal_patterns.items(): if isinstance(pattern_info, dict) and 'pattern' in pattern_info: pattern = pattern_info['pattern'] # Check if event sequence matches pattern if self._sequence_matches_pattern(event_types, pattern): return pattern_name # Generic classification based on final effect final_event = events[-1].event_type.lower() if 'exfiltration' in final_event or 'download' in final_event: return "data_exfiltration" elif 'encryption' in final_event or 'ransomware' in final_event: return "ransomware" elif 'privilege' in final_event or 'escalation' in final_event: return "privilege_escalation" else: return "unknown_attack" def _sequence_matches_pattern(self, event_types: List[str], pattern: List[str]) -> bool: """Check if event sequence matches a known pattern""" if len(event_types) > len(pattern): return False pattern_index = 0 for event_type in event_types: while pattern_index < len(pattern): if event_type in pattern[pattern_index] or pattern[pattern_index] in event_type: pattern_index += 1 break pattern_index += 1 else: return False return True def _identify_mitigation_points(self, events: List[SecurityEvent], causal_links: List[CausalHypothesis]) -> List[str]: """Identify points where the attack chain could have been disrupted""" mitigation_points = [] for i, event in enumerate(events[:-1]): # Exclude final event event_type = event.event_type.lower() # Common mitigation points if 'reconnaissance' in event_type or 'scan' in event_type: mitigation_points.append(f"Detect and block reconnaissance at event {event.event_id}") elif 'initial_access' in event_type or 'exploit' in event_type: mitigation_points.append(f"Prevent initial access at event {event.event_id}") elif 'persistence' in event_type: mitigation_points.append(f"Detect persistence mechanisms at event {event.event_id}") elif 'lateral_movement' in event_type: mitigation_points.append(f"Segment network to prevent lateral movement at event {event.event_id}") # Check causal link strength if i < len(causal_links): link = causal_links[i] if link.confidence_score < 0.7: # Weak causal link mitigation_points.append(f"Strengthen monitoring between events {link.cause_event_id} and {link.effect_event_id}") return mitigation_points def analyze_root_causes(self, min_events: int = 3) -> List[Dict[str, Any]]: """Analyze root causes of security incidents""" root_causes = [] # Find nodes with high out-degree (many effects) and low in-degree (few causes) for node in self.causal_graph.nodes(): in_degree = self.causal_graph.in_degree(node) out_degree = self.causal_graph.out_degree(node) if in_degree <= 1 and out_degree >= min_events - 1: # Get all events caused by this root cause reachable_events = list(nx.descendants(self.causal_graph, node)) if len(reachable_events) >= min_events - 1: event = next((e for e in self.events if e.event_id == node), None) if event: root_causes.append({ "root_cause_event": event, "affected_events": len(reachable_events), "causal_impact_score": out_degree / len(self.events), "downstream_events": [ next((e for e in self.events if e.event_id == eid), None) for eid in reachable_events[:10] # Top 10 ] }) # Sort by impact score root_causes.sort(key=lambda x: x["causal_impact_score"], reverse=True) return root_causes def get_causal_explanations(self, event_id: str) -> Dict[str, Any]: """Get causal explanations for a specific event""" if event_id not in self.causal_graph.nodes(): return {"error": "Event not found"} # Get direct causes predecessors = list(self.causal_graph.predecessors(event_id)) direct_causes = [] for pred in predecessors: edge_data = self.causal_graph.get_edge_data(pred, event_id) if edge_data and 'hypothesis' in edge_data: hypothesis = edge_data['hypothesis'] cause_event = next((e for e in self.events if e.event_id == pred), None) if cause_event: direct_causes.append({ "cause_event": cause_event, "mechanism": hypothesis.mechanism, "confidence": hypothesis.confidence_score, "relationship_type": hypothesis.relationship_type.value }) # Get effects successors = list(self.causal_graph.successors(event_id)) effects = [] for succ in successors: edge_data = self.causal_graph.get_edge_data(event_id, succ) if edge_data and 'hypothesis' in edge_data: hypothesis = edge_data['hypothesis'] effect_event = next((e for e in self.events if e.event_id == succ), None) if effect_event: effects.append({ "effect_event": effect_event, "mechanism": hypothesis.mechanism, "confidence": hypothesis.confidence_score, "relationship_type": hypothesis.relationship_type.value }) # Get indirect causes (ancestors) ancestors = list(nx.ancestors(self.causal_graph, event_id))[:5] # Limit to 5 indirect_causes = [ next((e for e in self.events if e.event_id == aid), None) for aid in ancestors if aid not in predecessors ] return { "target_event": next((e for e in self.events if e.event_id == event_id), None), "direct_causes": direct_causes, "effects": effects, "indirect_causes": [c for c in indirect_causes if c is not None], "causal_chain_length": len(list(nx.ancestors(self.causal_graph, event_id))) + 1 } # Example usage and testing if __name__ == "__main__": print("šŸ” Causal Reasoning System Testing:") print("=" * 50) # Initialize causal inference engine causal_engine = CausalInferenceEngine() # Create sample security events base_time = datetime.now() events = [ SecurityEvent( event_id="evt_001", timestamp=base_time, event_type="vulnerability_scan", source="192.168.1.100", target="192.168.1.50", severity="low", attributes={"scanner": "nmap", "ports": "22,80,443"}, context={"network": "internal", "user": "attacker"} ), SecurityEvent( event_id="evt_002", timestamp=base_time + timedelta(minutes=5), event_type="exploit_attempt", source="192.168.1.100", target="192.168.1.50", severity="medium", attributes={"exploit": "ssh_brute_force", "port": "22"}, context={"network": "internal", "user": "attacker"} ), SecurityEvent( event_id="evt_003", timestamp=base_time + timedelta(minutes=8), event_type="successful_exploitation", source="192.168.1.100", target="192.168.1.50", severity="high", attributes={"method": "credential_brute_force", "service": "ssh"}, context={"network": "internal", "user": "attacker", "compromised_account": "admin"} ), SecurityEvent( event_id="evt_004", timestamp=base_time + timedelta(minutes=15), event_type="privilege_escalation", source="192.168.1.50", target="192.168.1.50", severity="high", attributes={"method": "sudo_exploit", "user": "admin"}, context={"network": "internal", "user": "attacker", "host": "server1"} ), SecurityEvent( event_id="evt_005", timestamp=base_time + timedelta(minutes=25), event_type="lateral_movement", source="192.168.1.50", target="192.168.1.75", severity="high", attributes={"method": "ssh_key", "protocol": "ssh"}, context={"network": "internal", "user": "attacker", "host": "server2"} ), SecurityEvent( event_id="evt_006", timestamp=base_time + timedelta(minutes=30), event_type="data_exfiltration", source="192.168.1.75", target="external_server", severity="critical", attributes={"method": "scp", "data_volume": "500MB"}, context={"network": "external", "user": "attacker", "data_type": "sensitive"} ) ] # Add events to causal engine print("\nšŸ“Š Adding security events...") for event in events: causal_engine.add_event(event) print(f" Added: {event.event_type} at {event.timestamp.strftime('%H:%M:%S')}") # Identify attack chains print("\nšŸ”— Identifying causal attack chains...") attack_chains = causal_engine.identify_attack_chains(min_confidence=0.3) for i, chain in enumerate(attack_chains, 1): print(f"\n Chain {i}: {chain.attack_pattern}") print(f" Events: {len(chain.events)}") print(f" Root Cause: {chain.root_cause}") print(f" Final Effect: {chain.final_effect}") print(f" Causal Links: {len(chain.causal_links)}") for link in chain.causal_links[:3]: # Show first 3 links print(f" {link.cause_event_id} → {link.effect_event_id} " f"(confidence: {link.confidence_score:.3f}, {link.relationship_type.value})") if chain.mitigation_points: print(f" Mitigation Points:") for point in chain.mitigation_points[:2]: # Show first 2 print(f" - {point}") # Analyze root causes print("\nšŸŽÆ Root Cause Analysis:") root_causes = causal_engine.analyze_root_causes() for cause in root_causes[:3]: # Show top 3 event = cause["root_cause_event"] print(f" Root Cause: {event.event_type}") print(f" Impact Score: {cause['causal_impact_score']:.3f}") print(f" Affected Events: {cause['affected_events']}") # Get causal explanations for specific event print("\nšŸ’” Causal Explanation for Final Event:") explanation = causal_engine.get_causal_explanations("evt_006") print(f" Target Event: {explanation['target_event'].event_type}") print(f" Causal Chain Length: {explanation['causal_chain_length']}") if explanation['direct_causes']: print(f" Direct Causes:") for cause in explanation['direct_causes'][:2]: print(f" - {cause['cause_event'].event_type} " f"(confidence: {cause['confidence']:.3f})") print(f" Mechanism: {cause['mechanism']}") print("\nāœ… Causal Reasoning System implemented and tested")