Spaces:

brickfrog
/

ankigen

Running

App Files Files Community

ankigen / ankigen_core /agents /feature_flags.py

brickfrog

Upload folder using huggingface_hub

56fd459 verified 11 days ago

raw

history blame

8.78 kB

	# Feature flags for gradual agent migration rollout

	import os
	from typing import Dict, Any, Optional
	from dataclasses import dataclass
	from enum import Enum

	from ankigen_core.logging import logger


	class AgentMode(Enum):
	"""Agent system operation modes"""
	LEGACY = "legacy" # Use original LLM interface
	AGENT_ONLY = "agent_only" # Use agents for everything
	HYBRID = "hybrid" # Mix agents and legacy based on flags
	A_B_TEST = "a_b_test" # Random selection for A/B testing


	@dataclass
	class AgentFeatureFlags:
	"""Feature flags for controlling agent system rollout"""

	# Main mode controls
	mode: AgentMode = AgentMode.LEGACY

	# Generation agents
	enable_subject_expert_agent: bool = False
	enable_pedagogical_agent: bool = False
	enable_content_structuring_agent: bool = False
	enable_generation_coordinator: bool = False

	# Judge agents
	enable_content_accuracy_judge: bool = False
	enable_pedagogical_judge: bool = False
	enable_clarity_judge: bool = False
	enable_technical_judge: bool = False
	enable_completeness_judge: bool = False
	enable_judge_coordinator: bool = False

	# Enhancement agents
	enable_revision_agent: bool = False
	enable_enhancement_agent: bool = False

	# Workflow features
	enable_multi_agent_generation: bool = False
	enable_parallel_judging: bool = False
	enable_agent_handoffs: bool = False
	enable_agent_tracing: bool = True

	# A/B testing
	ab_test_ratio: float = 0.5 # Percentage for A group
	ab_test_user_hash: Optional[str] = None

	# Performance
	agent_timeout: float = 30.0
	max_agent_retries: int = 3
	enable_agent_caching: bool = True

	# Quality thresholds
	min_judge_consensus: float = 0.6 # Minimum agreement between judges
	max_revision_iterations: int = 3

	@classmethod
	def from_env(cls) -> "AgentFeatureFlags":
	"""Load feature flags from environment variables"""
	return cls(
	mode=AgentMode(os.getenv("ANKIGEN_AGENT_MODE", "legacy")),

	# Generation agents
	enable_subject_expert_agent=_env_bool("ANKIGEN_ENABLE_SUBJECT_EXPERT"),
	enable_pedagogical_agent=_env_bool("ANKIGEN_ENABLE_PEDAGOGICAL_AGENT"),
	enable_content_structuring_agent=_env_bool("ANKIGEN_ENABLE_CONTENT_STRUCTURING"),
	enable_generation_coordinator=_env_bool("ANKIGEN_ENABLE_GENERATION_COORDINATOR"),

	# Judge agents
	enable_content_accuracy_judge=_env_bool("ANKIGEN_ENABLE_CONTENT_JUDGE"),
	enable_pedagogical_judge=_env_bool("ANKIGEN_ENABLE_PEDAGOGICAL_JUDGE"),
	enable_clarity_judge=_env_bool("ANKIGEN_ENABLE_CLARITY_JUDGE"),
	enable_technical_judge=_env_bool("ANKIGEN_ENABLE_TECHNICAL_JUDGE"),
	enable_completeness_judge=_env_bool("ANKIGEN_ENABLE_COMPLETENESS_JUDGE"),
	enable_judge_coordinator=_env_bool("ANKIGEN_ENABLE_JUDGE_COORDINATOR"),

	# Enhancement agents
	enable_revision_agent=_env_bool("ANKIGEN_ENABLE_REVISION_AGENT"),
	enable_enhancement_agent=_env_bool("ANKIGEN_ENABLE_ENHANCEMENT_AGENT"),

	# Workflow features
	enable_multi_agent_generation=_env_bool("ANKIGEN_ENABLE_MULTI_AGENT_GEN"),
	enable_parallel_judging=_env_bool("ANKIGEN_ENABLE_PARALLEL_JUDGING"),
	enable_agent_handoffs=_env_bool("ANKIGEN_ENABLE_AGENT_HANDOFFS"),
	enable_agent_tracing=_env_bool("ANKIGEN_ENABLE_AGENT_TRACING", default=True),

	# A/B testing
	ab_test_ratio=float(os.getenv("ANKIGEN_AB_TEST_RATIO", "0.5")),
	ab_test_user_hash=os.getenv("ANKIGEN_AB_TEST_USER_HASH"),

	# Performance
	agent_timeout=float(os.getenv("ANKIGEN_AGENT_TIMEOUT", "30.0")),
	max_agent_retries=int(os.getenv("ANKIGEN_MAX_AGENT_RETRIES", "3")),
	enable_agent_caching=_env_bool("ANKIGEN_ENABLE_AGENT_CACHING", default=True),

	# Quality thresholds
	min_judge_consensus=float(os.getenv("ANKIGEN_MIN_JUDGE_CONSENSUS", "0.6")),
	max_revision_iterations=int(os.getenv("ANKIGEN_MAX_REVISION_ITERATIONS", "3")),
	)

	def should_use_agents(self) -> bool:
	"""Determine if agents should be used based on current mode"""
	if self.mode == AgentMode.LEGACY:
	return False
	elif self.mode == AgentMode.AGENT_ONLY:
	return True
	elif self.mode == AgentMode.HYBRID:
	# Use agents if any agent features are enabled
	return (
	self.enable_subject_expert_agent or
	self.enable_pedagogical_agent or
	self.enable_content_structuring_agent or
	any([
	self.enable_content_accuracy_judge,
	self.enable_pedagogical_judge,
	self.enable_clarity_judge,
	self.enable_technical_judge,
	self.enable_completeness_judge,
	])
	)
	elif self.mode == AgentMode.A_B_TEST:
	# Use hash-based or random selection for A/B testing
	if self.ab_test_user_hash:
	# Use consistent hash-based selection
	import hashlib
	hash_value = int(hashlib.md5(self.ab_test_user_hash.encode()).hexdigest(), 16)
	return (hash_value % 100) < (self.ab_test_ratio * 100)
	else:
	# Use random selection (note: not session-consistent)
	import random
	return random.random() < self.ab_test_ratio

	return False

	def get_enabled_agents(self) -> Dict[str, bool]:
	"""Get a dictionary of all enabled agents"""
	return {
	"subject_expert": self.enable_subject_expert_agent,
	"pedagogical": self.enable_pedagogical_agent,
	"content_structuring": self.enable_content_structuring_agent,
	"generation_coordinator": self.enable_generation_coordinator,
	"content_accuracy_judge": self.enable_content_accuracy_judge,
	"pedagogical_judge": self.enable_pedagogical_judge,
	"clarity_judge": self.enable_clarity_judge,
	"technical_judge": self.enable_technical_judge,
	"completeness_judge": self.enable_completeness_judge,
	"judge_coordinator": self.enable_judge_coordinator,
	"revision_agent": self.enable_revision_agent,
	"enhancement_agent": self.enable_enhancement_agent,
	}

	def to_dict(self) -> Dict[str, Any]:
	"""Convert to dictionary for logging/debugging"""
	return {
	"mode": self.mode.value,
	"enabled_agents": self.get_enabled_agents(),
	"workflow_features": {
	"multi_agent_generation": self.enable_multi_agent_generation,
	"parallel_judging": self.enable_parallel_judging,
	"agent_handoffs": self.enable_agent_handoffs,
	"agent_tracing": self.enable_agent_tracing,
	},
	"ab_test_ratio": self.ab_test_ratio,
	"performance_config": {
	"timeout": self.agent_timeout,
	"max_retries": self.max_agent_retries,
	"caching": self.enable_agent_caching,
	},
	"quality_thresholds": {
	"min_judge_consensus": self.min_judge_consensus,
	"max_revision_iterations": self.max_revision_iterations,
	}
	}


	def _env_bool(env_var: str, default: bool = False) -> bool:
	"""Helper to parse boolean environment variables"""
	value = os.getenv(env_var, str(default)).lower()
	return value in ("true", "1", "yes", "on", "enabled")


	# Global instance - can be overridden in tests or specific deployments
	_global_flags: Optional[AgentFeatureFlags] = None


	def get_feature_flags() -> AgentFeatureFlags:
	"""Get the global feature flags instance"""
	global _global_flags
	if _global_flags is None:
	_global_flags = AgentFeatureFlags.from_env()
	logger.info(f"Loaded agent feature flags: {_global_flags.mode.value}")
	logger.debug(f"Feature flags config: {_global_flags.to_dict()}")
	return _global_flags


	def set_feature_flags(flags: AgentFeatureFlags):
	"""Set global feature flags (for testing or runtime reconfiguration)"""
	global _global_flags
	_global_flags = flags
	logger.info(f"Updated agent feature flags: {flags.mode.value}")


	def reset_feature_flags():
	"""Reset feature flags (reload from environment)"""
	global _global_flags
	_global_flags = None