ankigen / ankigen_core /agents /feature_flags.py
brickfrog's picture
Upload folder using huggingface_hub
56fd459 verified
raw
history blame
8.78 kB
# Feature flags for gradual agent migration rollout
import os
from typing import Dict, Any, Optional
from dataclasses import dataclass
from enum import Enum
from ankigen_core.logging import logger
class AgentMode(Enum):
"""Agent system operation modes"""
LEGACY = "legacy" # Use original LLM interface
AGENT_ONLY = "agent_only" # Use agents for everything
HYBRID = "hybrid" # Mix agents and legacy based on flags
A_B_TEST = "a_b_test" # Random selection for A/B testing
@dataclass
class AgentFeatureFlags:
"""Feature flags for controlling agent system rollout"""
# Main mode controls
mode: AgentMode = AgentMode.LEGACY
# Generation agents
enable_subject_expert_agent: bool = False
enable_pedagogical_agent: bool = False
enable_content_structuring_agent: bool = False
enable_generation_coordinator: bool = False
# Judge agents
enable_content_accuracy_judge: bool = False
enable_pedagogical_judge: bool = False
enable_clarity_judge: bool = False
enable_technical_judge: bool = False
enable_completeness_judge: bool = False
enable_judge_coordinator: bool = False
# Enhancement agents
enable_revision_agent: bool = False
enable_enhancement_agent: bool = False
# Workflow features
enable_multi_agent_generation: bool = False
enable_parallel_judging: bool = False
enable_agent_handoffs: bool = False
enable_agent_tracing: bool = True
# A/B testing
ab_test_ratio: float = 0.5 # Percentage for A group
ab_test_user_hash: Optional[str] = None
# Performance
agent_timeout: float = 30.0
max_agent_retries: int = 3
enable_agent_caching: bool = True
# Quality thresholds
min_judge_consensus: float = 0.6 # Minimum agreement between judges
max_revision_iterations: int = 3
@classmethod
def from_env(cls) -> "AgentFeatureFlags":
"""Load feature flags from environment variables"""
return cls(
mode=AgentMode(os.getenv("ANKIGEN_AGENT_MODE", "legacy")),
# Generation agents
enable_subject_expert_agent=_env_bool("ANKIGEN_ENABLE_SUBJECT_EXPERT"),
enable_pedagogical_agent=_env_bool("ANKIGEN_ENABLE_PEDAGOGICAL_AGENT"),
enable_content_structuring_agent=_env_bool("ANKIGEN_ENABLE_CONTENT_STRUCTURING"),
enable_generation_coordinator=_env_bool("ANKIGEN_ENABLE_GENERATION_COORDINATOR"),
# Judge agents
enable_content_accuracy_judge=_env_bool("ANKIGEN_ENABLE_CONTENT_JUDGE"),
enable_pedagogical_judge=_env_bool("ANKIGEN_ENABLE_PEDAGOGICAL_JUDGE"),
enable_clarity_judge=_env_bool("ANKIGEN_ENABLE_CLARITY_JUDGE"),
enable_technical_judge=_env_bool("ANKIGEN_ENABLE_TECHNICAL_JUDGE"),
enable_completeness_judge=_env_bool("ANKIGEN_ENABLE_COMPLETENESS_JUDGE"),
enable_judge_coordinator=_env_bool("ANKIGEN_ENABLE_JUDGE_COORDINATOR"),
# Enhancement agents
enable_revision_agent=_env_bool("ANKIGEN_ENABLE_REVISION_AGENT"),
enable_enhancement_agent=_env_bool("ANKIGEN_ENABLE_ENHANCEMENT_AGENT"),
# Workflow features
enable_multi_agent_generation=_env_bool("ANKIGEN_ENABLE_MULTI_AGENT_GEN"),
enable_parallel_judging=_env_bool("ANKIGEN_ENABLE_PARALLEL_JUDGING"),
enable_agent_handoffs=_env_bool("ANKIGEN_ENABLE_AGENT_HANDOFFS"),
enable_agent_tracing=_env_bool("ANKIGEN_ENABLE_AGENT_TRACING", default=True),
# A/B testing
ab_test_ratio=float(os.getenv("ANKIGEN_AB_TEST_RATIO", "0.5")),
ab_test_user_hash=os.getenv("ANKIGEN_AB_TEST_USER_HASH"),
# Performance
agent_timeout=float(os.getenv("ANKIGEN_AGENT_TIMEOUT", "30.0")),
max_agent_retries=int(os.getenv("ANKIGEN_MAX_AGENT_RETRIES", "3")),
enable_agent_caching=_env_bool("ANKIGEN_ENABLE_AGENT_CACHING", default=True),
# Quality thresholds
min_judge_consensus=float(os.getenv("ANKIGEN_MIN_JUDGE_CONSENSUS", "0.6")),
max_revision_iterations=int(os.getenv("ANKIGEN_MAX_REVISION_ITERATIONS", "3")),
)
def should_use_agents(self) -> bool:
"""Determine if agents should be used based on current mode"""
if self.mode == AgentMode.LEGACY:
return False
elif self.mode == AgentMode.AGENT_ONLY:
return True
elif self.mode == AgentMode.HYBRID:
# Use agents if any agent features are enabled
return (
self.enable_subject_expert_agent or
self.enable_pedagogical_agent or
self.enable_content_structuring_agent or
any([
self.enable_content_accuracy_judge,
self.enable_pedagogical_judge,
self.enable_clarity_judge,
self.enable_technical_judge,
self.enable_completeness_judge,
])
)
elif self.mode == AgentMode.A_B_TEST:
# Use hash-based or random selection for A/B testing
if self.ab_test_user_hash:
# Use consistent hash-based selection
import hashlib
hash_value = int(hashlib.md5(self.ab_test_user_hash.encode()).hexdigest(), 16)
return (hash_value % 100) < (self.ab_test_ratio * 100)
else:
# Use random selection (note: not session-consistent)
import random
return random.random() < self.ab_test_ratio
return False
def get_enabled_agents(self) -> Dict[str, bool]:
"""Get a dictionary of all enabled agents"""
return {
"subject_expert": self.enable_subject_expert_agent,
"pedagogical": self.enable_pedagogical_agent,
"content_structuring": self.enable_content_structuring_agent,
"generation_coordinator": self.enable_generation_coordinator,
"content_accuracy_judge": self.enable_content_accuracy_judge,
"pedagogical_judge": self.enable_pedagogical_judge,
"clarity_judge": self.enable_clarity_judge,
"technical_judge": self.enable_technical_judge,
"completeness_judge": self.enable_completeness_judge,
"judge_coordinator": self.enable_judge_coordinator,
"revision_agent": self.enable_revision_agent,
"enhancement_agent": self.enable_enhancement_agent,
}
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for logging/debugging"""
return {
"mode": self.mode.value,
"enabled_agents": self.get_enabled_agents(),
"workflow_features": {
"multi_agent_generation": self.enable_multi_agent_generation,
"parallel_judging": self.enable_parallel_judging,
"agent_handoffs": self.enable_agent_handoffs,
"agent_tracing": self.enable_agent_tracing,
},
"ab_test_ratio": self.ab_test_ratio,
"performance_config": {
"timeout": self.agent_timeout,
"max_retries": self.max_agent_retries,
"caching": self.enable_agent_caching,
},
"quality_thresholds": {
"min_judge_consensus": self.min_judge_consensus,
"max_revision_iterations": self.max_revision_iterations,
}
}
def _env_bool(env_var: str, default: bool = False) -> bool:
"""Helper to parse boolean environment variables"""
value = os.getenv(env_var, str(default)).lower()
return value in ("true", "1", "yes", "on", "enabled")
# Global instance - can be overridden in tests or specific deployments
_global_flags: Optional[AgentFeatureFlags] = None
def get_feature_flags() -> AgentFeatureFlags:
"""Get the global feature flags instance"""
global _global_flags
if _global_flags is None:
_global_flags = AgentFeatureFlags.from_env()
logger.info(f"Loaded agent feature flags: {_global_flags.mode.value}")
logger.debug(f"Feature flags config: {_global_flags.to_dict()}")
return _global_flags
def set_feature_flags(flags: AgentFeatureFlags):
"""Set global feature flags (for testing or runtime reconfiguration)"""
global _global_flags
_global_flags = flags
logger.info(f"Updated agent feature flags: {flags.mode.value}")
def reset_feature_flags():
"""Reset feature flags (reload from environment)"""
global _global_flags
_global_flags = None