|
|
|
|
|
import pytest |
|
import os |
|
from unittest.mock import patch, Mock |
|
from dataclasses import dataclass |
|
|
|
from ankigen_core.agents.feature_flags import ( |
|
AgentMode, |
|
AgentFeatureFlags, |
|
_env_bool, |
|
get_feature_flags, |
|
set_feature_flags, |
|
reset_feature_flags |
|
) |
|
|
|
|
|
|
|
def test_agent_mode_values(): |
|
"""Test AgentMode enum values""" |
|
assert AgentMode.LEGACY.value == "legacy" |
|
assert AgentMode.AGENT_ONLY.value == "agent_only" |
|
assert AgentMode.HYBRID.value == "hybrid" |
|
assert AgentMode.A_B_TEST.value == "a_b_test" |
|
|
|
|
|
|
|
def test_agent_feature_flags_defaults(): |
|
"""Test AgentFeatureFlags with default values""" |
|
flags = AgentFeatureFlags() |
|
|
|
assert flags.mode == AgentMode.LEGACY |
|
assert flags.enable_subject_expert_agent is False |
|
assert flags.enable_pedagogical_agent is False |
|
assert flags.enable_content_structuring_agent is False |
|
assert flags.enable_generation_coordinator is False |
|
|
|
assert flags.enable_content_accuracy_judge is False |
|
assert flags.enable_pedagogical_judge is False |
|
assert flags.enable_clarity_judge is False |
|
assert flags.enable_technical_judge is False |
|
assert flags.enable_completeness_judge is False |
|
assert flags.enable_judge_coordinator is False |
|
|
|
assert flags.enable_revision_agent is False |
|
assert flags.enable_enhancement_agent is False |
|
|
|
assert flags.enable_multi_agent_generation is False |
|
assert flags.enable_parallel_judging is False |
|
assert flags.enable_agent_handoffs is False |
|
assert flags.enable_agent_tracing is True |
|
|
|
assert flags.ab_test_ratio == 0.5 |
|
assert flags.ab_test_user_hash is None |
|
|
|
assert flags.agent_timeout == 30.0 |
|
assert flags.max_agent_retries == 3 |
|
assert flags.enable_agent_caching is True |
|
|
|
assert flags.min_judge_consensus == 0.6 |
|
assert flags.max_revision_iterations == 3 |
|
|
|
|
|
def test_agent_feature_flags_custom_values(): |
|
"""Test AgentFeatureFlags with custom values""" |
|
flags = AgentFeatureFlags( |
|
mode=AgentMode.AGENT_ONLY, |
|
enable_subject_expert_agent=True, |
|
enable_pedagogical_agent=True, |
|
enable_content_accuracy_judge=True, |
|
enable_multi_agent_generation=True, |
|
ab_test_ratio=0.7, |
|
agent_timeout=60.0, |
|
max_agent_retries=5, |
|
min_judge_consensus=0.8 |
|
) |
|
|
|
assert flags.mode == AgentMode.AGENT_ONLY |
|
assert flags.enable_subject_expert_agent is True |
|
assert flags.enable_pedagogical_agent is True |
|
assert flags.enable_content_accuracy_judge is True |
|
assert flags.enable_multi_agent_generation is True |
|
assert flags.ab_test_ratio == 0.7 |
|
assert flags.agent_timeout == 60.0 |
|
assert flags.max_agent_retries == 5 |
|
assert flags.min_judge_consensus == 0.8 |
|
|
|
|
|
@patch.dict(os.environ, { |
|
'ANKIGEN_AGENT_MODE': 'agent_only', |
|
'ANKIGEN_ENABLE_SUBJECT_EXPERT': 'true', |
|
'ANKIGEN_ENABLE_PEDAGOGICAL_AGENT': '1', |
|
'ANKIGEN_ENABLE_CONTENT_JUDGE': 'yes', |
|
'ANKIGEN_ENABLE_MULTI_AGENT_GEN': 'on', |
|
'ANKIGEN_AB_TEST_RATIO': '0.3', |
|
'ANKIGEN_AGENT_TIMEOUT': '45.0', |
|
'ANKIGEN_MAX_AGENT_RETRIES': '5', |
|
'ANKIGEN_MIN_JUDGE_CONSENSUS': '0.7' |
|
}, clear=False) |
|
def test_agent_feature_flags_from_env(): |
|
"""Test loading AgentFeatureFlags from environment variables""" |
|
flags = AgentFeatureFlags.from_env() |
|
|
|
assert flags.mode == AgentMode.AGENT_ONLY |
|
assert flags.enable_subject_expert_agent is True |
|
assert flags.enable_pedagogical_agent is True |
|
assert flags.enable_content_accuracy_judge is True |
|
assert flags.enable_multi_agent_generation is True |
|
assert flags.ab_test_ratio == 0.3 |
|
assert flags.agent_timeout == 45.0 |
|
assert flags.max_agent_retries == 5 |
|
assert flags.min_judge_consensus == 0.7 |
|
|
|
|
|
@patch.dict(os.environ, {}, clear=True) |
|
def test_agent_feature_flags_from_env_defaults(): |
|
"""Test loading AgentFeatureFlags from environment with defaults""" |
|
flags = AgentFeatureFlags.from_env() |
|
|
|
assert flags.mode == AgentMode.LEGACY |
|
assert flags.enable_subject_expert_agent is False |
|
assert flags.ab_test_ratio == 0.5 |
|
assert flags.agent_timeout == 30.0 |
|
assert flags.max_agent_retries == 3 |
|
|
|
|
|
def test_should_use_agents_legacy_mode(): |
|
"""Test should_use_agents() in LEGACY mode""" |
|
flags = AgentFeatureFlags(mode=AgentMode.LEGACY) |
|
assert flags.should_use_agents() is False |
|
|
|
|
|
def test_should_use_agents_agent_only_mode(): |
|
"""Test should_use_agents() in AGENT_ONLY mode""" |
|
flags = AgentFeatureFlags(mode=AgentMode.AGENT_ONLY) |
|
assert flags.should_use_agents() is True |
|
|
|
|
|
def test_should_use_agents_hybrid_mode_no_agents(): |
|
"""Test should_use_agents() in HYBRID mode with no agents enabled""" |
|
flags = AgentFeatureFlags(mode=AgentMode.HYBRID) |
|
assert flags.should_use_agents() is False |
|
|
|
|
|
def test_should_use_agents_hybrid_mode_with_generation_agent(): |
|
"""Test should_use_agents() in HYBRID mode with generation agent enabled""" |
|
flags = AgentFeatureFlags( |
|
mode=AgentMode.HYBRID, |
|
enable_subject_expert_agent=True |
|
) |
|
assert flags.should_use_agents() is True |
|
|
|
|
|
def test_should_use_agents_hybrid_mode_with_judge_agent(): |
|
"""Test should_use_agents() in HYBRID mode with judge agent enabled""" |
|
flags = AgentFeatureFlags( |
|
mode=AgentMode.HYBRID, |
|
enable_content_accuracy_judge=True |
|
) |
|
assert flags.should_use_agents() is True |
|
|
|
|
|
def test_should_use_agents_ab_test_mode_with_hash(): |
|
"""Test should_use_agents() in A_B_TEST mode with user hash""" |
|
|
|
flags = AgentFeatureFlags( |
|
mode=AgentMode.A_B_TEST, |
|
ab_test_ratio=0.5, |
|
ab_test_user_hash="test_user_1" |
|
) |
|
|
|
|
|
import hashlib |
|
hash_value = int(hashlib.md5("test_user_1".encode()).hexdigest(), 16) |
|
expected_result = (hash_value % 100) < 50 |
|
|
|
assert flags.should_use_agents() == expected_result |
|
|
|
|
|
def test_should_use_agents_ab_test_mode_without_hash(): |
|
"""Test should_use_agents() in A_B_TEST mode without user hash (random)""" |
|
flags = AgentFeatureFlags( |
|
mode=AgentMode.A_B_TEST, |
|
ab_test_ratio=0.5 |
|
) |
|
|
|
|
|
with patch('random.random') as mock_random: |
|
mock_random.return_value = 0.3 |
|
assert flags.should_use_agents() is True |
|
|
|
mock_random.return_value = 0.7 |
|
assert flags.should_use_agents() is False |
|
|
|
|
|
def test_get_enabled_agents(): |
|
"""Test get_enabled_agents() method""" |
|
flags = AgentFeatureFlags( |
|
enable_subject_expert_agent=True, |
|
enable_pedagogical_agent=False, |
|
enable_content_accuracy_judge=True, |
|
enable_revision_agent=True |
|
) |
|
|
|
enabled = flags.get_enabled_agents() |
|
|
|
assert enabled["subject_expert"] is True |
|
assert enabled["pedagogical"] is False |
|
assert enabled["content_accuracy_judge"] is True |
|
assert enabled["revision_agent"] is True |
|
assert enabled["enhancement_agent"] is False |
|
|
|
|
|
def test_to_dict(): |
|
"""Test to_dict() method""" |
|
flags = AgentFeatureFlags( |
|
mode=AgentMode.HYBRID, |
|
enable_subject_expert_agent=True, |
|
enable_multi_agent_generation=True, |
|
enable_agent_tracing=False, |
|
ab_test_ratio=0.3, |
|
agent_timeout=45.0, |
|
max_agent_retries=5, |
|
min_judge_consensus=0.7, |
|
max_revision_iterations=2 |
|
) |
|
|
|
result = flags.to_dict() |
|
|
|
assert result["mode"] == "hybrid" |
|
assert result["enabled_agents"]["subject_expert"] is True |
|
assert result["workflow_features"]["multi_agent_generation"] is True |
|
assert result["workflow_features"]["agent_tracing"] is False |
|
assert result["ab_test_ratio"] == 0.3 |
|
assert result["performance_config"]["timeout"] == 45.0 |
|
assert result["performance_config"]["max_retries"] == 5 |
|
assert result["quality_thresholds"]["min_judge_consensus"] == 0.7 |
|
assert result["quality_thresholds"]["max_revision_iterations"] == 2 |
|
|
|
|
|
|
|
def test_env_bool_true_values(): |
|
"""Test _env_bool() with various true values""" |
|
true_values = ["true", "True", "TRUE", "1", "yes", "Yes", "YES", "on", "On", "ON", "enabled", "ENABLED"] |
|
|
|
for value in true_values: |
|
with patch.dict(os.environ, {'TEST_VAR': value}): |
|
assert _env_bool('TEST_VAR') is True |
|
|
|
|
|
def test_env_bool_false_values(): |
|
"""Test _env_bool() with various false values""" |
|
false_values = ["false", "False", "FALSE", "0", "no", "No", "NO", "off", "Off", "OFF", "disabled", "DISABLED", "random"] |
|
|
|
for value in false_values: |
|
with patch.dict(os.environ, {'TEST_VAR': value}): |
|
assert _env_bool('TEST_VAR') is False |
|
|
|
|
|
def test_env_bool_default_true(): |
|
"""Test _env_bool() with default=True""" |
|
with patch.dict(os.environ, {}, clear=True): |
|
assert _env_bool('NON_EXISTENT_VAR', default=True) is True |
|
|
|
|
|
def test_env_bool_default_false(): |
|
"""Test _env_bool() with default=False""" |
|
with patch.dict(os.environ, {}, clear=True): |
|
assert _env_bool('NON_EXISTENT_VAR', default=False) is False |
|
|
|
|
|
|
|
def test_get_feature_flags_first_call(): |
|
"""Test get_feature_flags() on first call""" |
|
|
|
reset_feature_flags() |
|
|
|
with patch('ankigen_core.agents.feature_flags.AgentFeatureFlags.from_env') as mock_from_env: |
|
mock_flags = AgentFeatureFlags(mode=AgentMode.AGENT_ONLY) |
|
mock_from_env.return_value = mock_flags |
|
|
|
flags = get_feature_flags() |
|
|
|
assert flags == mock_flags |
|
mock_from_env.assert_called_once() |
|
|
|
|
|
def test_get_feature_flags_subsequent_calls(): |
|
"""Test get_feature_flags() on subsequent calls (should use cached value)""" |
|
|
|
test_flags = AgentFeatureFlags(mode=AgentMode.HYBRID) |
|
set_feature_flags(test_flags) |
|
|
|
with patch('ankigen_core.agents.feature_flags.AgentFeatureFlags.from_env') as mock_from_env: |
|
flags1 = get_feature_flags() |
|
flags2 = get_feature_flags() |
|
|
|
assert flags1 == test_flags |
|
assert flags2 == test_flags |
|
|
|
mock_from_env.assert_not_called() |
|
|
|
|
|
def test_set_feature_flags(): |
|
"""Test set_feature_flags() function""" |
|
test_flags = AgentFeatureFlags( |
|
mode=AgentMode.AGENT_ONLY, |
|
enable_subject_expert_agent=True |
|
) |
|
|
|
set_feature_flags(test_flags) |
|
|
|
retrieved_flags = get_feature_flags() |
|
assert retrieved_flags == test_flags |
|
assert retrieved_flags.mode == AgentMode.AGENT_ONLY |
|
assert retrieved_flags.enable_subject_expert_agent is True |
|
|
|
|
|
def test_reset_feature_flags(): |
|
"""Test reset_feature_flags() function""" |
|
|
|
test_flags = AgentFeatureFlags(mode=AgentMode.AGENT_ONLY) |
|
set_feature_flags(test_flags) |
|
|
|
|
|
assert get_feature_flags() == test_flags |
|
|
|
|
|
reset_feature_flags() |
|
|
|
|
|
with patch('ankigen_core.agents.feature_flags.AgentFeatureFlags.from_env') as mock_from_env: |
|
mock_flags = AgentFeatureFlags(mode=AgentMode.HYBRID) |
|
mock_from_env.return_value = mock_flags |
|
|
|
flags = get_feature_flags() |
|
|
|
assert flags == mock_flags |
|
mock_from_env.assert_called_once() |
|
|
|
|
|
|
|
def test_feature_flags_production_config(): |
|
"""Test typical production configuration""" |
|
flags = AgentFeatureFlags( |
|
mode=AgentMode.HYBRID, |
|
enable_subject_expert_agent=True, |
|
enable_pedagogical_agent=True, |
|
enable_content_accuracy_judge=True, |
|
enable_judge_coordinator=True, |
|
enable_multi_agent_generation=True, |
|
enable_parallel_judging=True, |
|
agent_timeout=60.0, |
|
max_agent_retries=3, |
|
min_judge_consensus=0.7 |
|
) |
|
|
|
assert flags.should_use_agents() is True |
|
enabled = flags.get_enabled_agents() |
|
assert enabled["subject_expert"] is True |
|
assert enabled["pedagogical"] is True |
|
assert enabled["content_accuracy_judge"] is True |
|
|
|
|
|
def test_feature_flags_development_config(): |
|
"""Test typical development configuration""" |
|
flags = AgentFeatureFlags( |
|
mode=AgentMode.AGENT_ONLY, |
|
enable_subject_expert_agent=True, |
|
enable_pedagogical_agent=True, |
|
enable_content_accuracy_judge=True, |
|
enable_pedagogical_judge=True, |
|
enable_revision_agent=True, |
|
enable_multi_agent_generation=True, |
|
enable_agent_tracing=True, |
|
agent_timeout=30.0, |
|
max_agent_retries=2 |
|
) |
|
|
|
assert flags.should_use_agents() is True |
|
config_dict = flags.to_dict() |
|
assert config_dict["mode"] == "agent_only" |
|
assert config_dict["workflow_features"]["agent_tracing"] is True |
|
|
|
|
|
def test_feature_flags_ab_test_consistency(): |
|
"""Test A/B test consistency with same user hash""" |
|
flags = AgentFeatureFlags( |
|
mode=AgentMode.A_B_TEST, |
|
ab_test_ratio=0.5, |
|
ab_test_user_hash="consistent_user" |
|
) |
|
|
|
|
|
result1 = flags.should_use_agents() |
|
result2 = flags.should_use_agents() |
|
result3 = flags.should_use_agents() |
|
|
|
assert result1 == result2 == result3 |