Spaces:

brickfrog
/

ankigen

Running

File size: 8,784 Bytes

56fd459

# Feature flags for gradual agent migration rollout

import os
from typing import Dict, Any, Optional
from dataclasses import dataclass
from enum import Enum

from ankigen_core.logging import logger


class AgentMode(Enum):
    """Agent system operation modes"""
    LEGACY = "legacy"  # Use original LLM interface
    AGENT_ONLY = "agent_only"  # Use agents for everything
    HYBRID = "hybrid"  # Mix agents and legacy based on flags
    A_B_TEST = "a_b_test"  # Random selection for A/B testing


@dataclass
class AgentFeatureFlags:
    """Feature flags for controlling agent system rollout"""
    
    # Main mode controls
    mode: AgentMode = AgentMode.LEGACY
    
    # Generation agents
    enable_subject_expert_agent: bool = False
    enable_pedagogical_agent: bool = False
    enable_content_structuring_agent: bool = False
    enable_generation_coordinator: bool = False
    
    # Judge agents
    enable_content_accuracy_judge: bool = False
    enable_pedagogical_judge: bool = False
    enable_clarity_judge: bool = False
    enable_technical_judge: bool = False
    enable_completeness_judge: bool = False
    enable_judge_coordinator: bool = False
    
    # Enhancement agents
    enable_revision_agent: bool = False
    enable_enhancement_agent: bool = False
    
    # Workflow features
    enable_multi_agent_generation: bool = False
    enable_parallel_judging: bool = False
    enable_agent_handoffs: bool = False
    enable_agent_tracing: bool = True
    
    # A/B testing
    ab_test_ratio: float = 0.5  # Percentage for A group
    ab_test_user_hash: Optional[str] = None
    
    # Performance
    agent_timeout: float = 30.0
    max_agent_retries: int = 3
    enable_agent_caching: bool = True
    
    # Quality thresholds
    min_judge_consensus: float = 0.6  # Minimum agreement between judges
    max_revision_iterations: int = 3
    
    @classmethod
    def from_env(cls) -> "AgentFeatureFlags":
        """Load feature flags from environment variables"""
        return cls(
            mode=AgentMode(os.getenv("ANKIGEN_AGENT_MODE", "legacy")),
            
            # Generation agents
            enable_subject_expert_agent=_env_bool("ANKIGEN_ENABLE_SUBJECT_EXPERT"),
            enable_pedagogical_agent=_env_bool("ANKIGEN_ENABLE_PEDAGOGICAL_AGENT"),
            enable_content_structuring_agent=_env_bool("ANKIGEN_ENABLE_CONTENT_STRUCTURING"),
            enable_generation_coordinator=_env_bool("ANKIGEN_ENABLE_GENERATION_COORDINATOR"),
            
            # Judge agents
            enable_content_accuracy_judge=_env_bool("ANKIGEN_ENABLE_CONTENT_JUDGE"),
            enable_pedagogical_judge=_env_bool("ANKIGEN_ENABLE_PEDAGOGICAL_JUDGE"),
            enable_clarity_judge=_env_bool("ANKIGEN_ENABLE_CLARITY_JUDGE"),
            enable_technical_judge=_env_bool("ANKIGEN_ENABLE_TECHNICAL_JUDGE"),
            enable_completeness_judge=_env_bool("ANKIGEN_ENABLE_COMPLETENESS_JUDGE"),
            enable_judge_coordinator=_env_bool("ANKIGEN_ENABLE_JUDGE_COORDINATOR"),
            
            # Enhancement agents
            enable_revision_agent=_env_bool("ANKIGEN_ENABLE_REVISION_AGENT"),
            enable_enhancement_agent=_env_bool("ANKIGEN_ENABLE_ENHANCEMENT_AGENT"),
            
            # Workflow features
            enable_multi_agent_generation=_env_bool("ANKIGEN_ENABLE_MULTI_AGENT_GEN"),
            enable_parallel_judging=_env_bool("ANKIGEN_ENABLE_PARALLEL_JUDGING"),
            enable_agent_handoffs=_env_bool("ANKIGEN_ENABLE_AGENT_HANDOFFS"),
            enable_agent_tracing=_env_bool("ANKIGEN_ENABLE_AGENT_TRACING", default=True),
            
            # A/B testing
            ab_test_ratio=float(os.getenv("ANKIGEN_AB_TEST_RATIO", "0.5")),
            ab_test_user_hash=os.getenv("ANKIGEN_AB_TEST_USER_HASH"),
            
            # Performance
            agent_timeout=float(os.getenv("ANKIGEN_AGENT_TIMEOUT", "30.0")),
            max_agent_retries=int(os.getenv("ANKIGEN_MAX_AGENT_RETRIES", "3")),
            enable_agent_caching=_env_bool("ANKIGEN_ENABLE_AGENT_CACHING", default=True),
            
            # Quality thresholds
            min_judge_consensus=float(os.getenv("ANKIGEN_MIN_JUDGE_CONSENSUS", "0.6")),
            max_revision_iterations=int(os.getenv("ANKIGEN_MAX_REVISION_ITERATIONS", "3")),
        )
    
    def should_use_agents(self) -> bool:
        """Determine if agents should be used based on current mode"""
        if self.mode == AgentMode.LEGACY:
            return False
        elif self.mode == AgentMode.AGENT_ONLY:
            return True
        elif self.mode == AgentMode.HYBRID:
            # Use agents if any agent features are enabled
            return (
                self.enable_subject_expert_agent or
                self.enable_pedagogical_agent or
                self.enable_content_structuring_agent or
                any([
                    self.enable_content_accuracy_judge,
                    self.enable_pedagogical_judge,
                    self.enable_clarity_judge,
                    self.enable_technical_judge,
                    self.enable_completeness_judge,
                ])
            )
        elif self.mode == AgentMode.A_B_TEST:
            # Use hash-based or random selection for A/B testing
            if self.ab_test_user_hash:
                # Use consistent hash-based selection
                import hashlib
                hash_value = int(hashlib.md5(self.ab_test_user_hash.encode()).hexdigest(), 16)
                return (hash_value % 100) < (self.ab_test_ratio * 100)
            else:
                # Use random selection (note: not session-consistent)
                import random
                return random.random() < self.ab_test_ratio
        
        return False
    
    def get_enabled_agents(self) -> Dict[str, bool]:
        """Get a dictionary of all enabled agents"""
        return {
            "subject_expert": self.enable_subject_expert_agent,
            "pedagogical": self.enable_pedagogical_agent,
            "content_structuring": self.enable_content_structuring_agent,
            "generation_coordinator": self.enable_generation_coordinator,
            "content_accuracy_judge": self.enable_content_accuracy_judge,
            "pedagogical_judge": self.enable_pedagogical_judge,
            "clarity_judge": self.enable_clarity_judge,
            "technical_judge": self.enable_technical_judge,
            "completeness_judge": self.enable_completeness_judge,
            "judge_coordinator": self.enable_judge_coordinator,
            "revision_agent": self.enable_revision_agent,
            "enhancement_agent": self.enable_enhancement_agent,
        }
    
    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary for logging/debugging"""
        return {
            "mode": self.mode.value,
            "enabled_agents": self.get_enabled_agents(),
            "workflow_features": {
                "multi_agent_generation": self.enable_multi_agent_generation,
                "parallel_judging": self.enable_parallel_judging,
                "agent_handoffs": self.enable_agent_handoffs,
                "agent_tracing": self.enable_agent_tracing,
            },
            "ab_test_ratio": self.ab_test_ratio,
            "performance_config": {
                "timeout": self.agent_timeout,
                "max_retries": self.max_agent_retries,
                "caching": self.enable_agent_caching,
            },
            "quality_thresholds": {
                "min_judge_consensus": self.min_judge_consensus,
                "max_revision_iterations": self.max_revision_iterations,
            }
        }


def _env_bool(env_var: str, default: bool = False) -> bool:
    """Helper to parse boolean environment variables"""
    value = os.getenv(env_var, str(default)).lower()
    return value in ("true", "1", "yes", "on", "enabled")


# Global instance - can be overridden in tests or specific deployments
_global_flags: Optional[AgentFeatureFlags] = None


def get_feature_flags() -> AgentFeatureFlags:
    """Get the global feature flags instance"""
    global _global_flags
    if _global_flags is None:
        _global_flags = AgentFeatureFlags.from_env()
        logger.info(f"Loaded agent feature flags: {_global_flags.mode.value}")
        logger.debug(f"Feature flags config: {_global_flags.to_dict()}")
    return _global_flags


def set_feature_flags(flags: AgentFeatureFlags):
    """Set global feature flags (for testing or runtime reconfiguration)"""
    global _global_flags
    _global_flags = flags
    logger.info(f"Updated agent feature flags: {flags.mode.value}")


def reset_feature_flags():
    """Reset feature flags (reload from environment)"""
    global _global_flags
    _global_flags = None