Spaces:

brickfrog
/

ankigen

Running

App Files Files Community

ankigen / tests /unit /test_card_generator.py

brickfrog

Upload folder using huggingface_hub

07fe6c3 verified about 1 month ago

raw

history blame

31.2 kB

	# Tests for ankigen_core/card_generator.py
	import pytest
	from unittest.mock import patch, MagicMock, ANY
	import pandas as pd

	# Assuming Pydantic models, ResponseCache etc. are needed
	from ankigen_core.models import Card, CardFront, CardBack, AnkiCardData
	from ankigen_core.utils import ResponseCache
	from ankigen_core.llm_interface import OpenAIClientManager # Needed for type hints

	# Module to test
	from ankigen_core import card_generator
	from ankigen_core.card_generator import (
	get_dataframe_columns,
	) # Import for use in error returns

	# --- Constants Tests (Optional but good practice) ---


	def test_constants_exist_and_have_expected_type():
	"""Test that constants exist and are lists."""
	assert isinstance(card_generator.AVAILABLE_MODELS, list)
	assert isinstance(card_generator.GENERATION_MODES, list)
	assert len(card_generator.AVAILABLE_MODELS) > 0
	assert len(card_generator.GENERATION_MODES) > 0


	# --- generate_cards_batch Tests ---


	@pytest.fixture
	def mock_openai_client_fixture(): # Renamed to avoid conflict with llm_interface tests fixture
	"""Provides a MagicMock OpenAI client."""
	return MagicMock()


	@pytest.fixture
	def mock_response_cache_fixture():
	"""Provides a MagicMock ResponseCache."""
	cache = MagicMock(spec=ResponseCache)
	cache.get.return_value = None # Default to cache miss
	return cache


	@patch("ankigen_core.card_generator.structured_output_completion")
	async def test_generate_cards_batch_success(
	mock_soc, mock_openai_client_fixture, mock_response_cache_fixture
	):
	"""Test successful card generation using generate_cards_batch."""
	mock_openai_client = mock_openai_client_fixture
	mock_response_cache = mock_response_cache_fixture
	model = "gpt-test"
	topic = "Test Topic"
	num_cards = 2
	system_prompt = "System prompt"
	generate_cloze = False

	# Mock the response from structured_output_completion
	mock_soc.return_value = {
	"cards": [
	{
	"card_type": "basic",
	"front": {"question": "Q1"},
	"back": {"answer": "A1", "explanation": "E1", "example": "Ex1"},
	"metadata": {"difficulty": "beginner"},
	},
	{
	"card_type": "cloze",
	"front": {"question": "{{c1::Q2}}"},
	"back": {"answer": "A2_full", "explanation": "E2", "example": "Ex2"},
	"metadata": {"difficulty": "intermediate"},
	},
	]
	}

	result_cards = await card_generator.generate_cards_batch(
	openai_client=mock_openai_client,
	cache=mock_response_cache,
	model=model,
	topic=topic,
	num_cards=num_cards,
	system_prompt=system_prompt,
	generate_cloze=generate_cloze,
	)

	assert len(result_cards) == 2
	assert isinstance(result_cards[0], Card)
	assert result_cards[0].card_type == "basic"
	assert result_cards[0].front.question == "Q1"
	assert result_cards[1].card_type == "cloze"
	assert result_cards[1].front.question == "{{c1::Q2}}"
	assert result_cards[1].metadata["difficulty"] == "intermediate"

	mock_soc.assert_called_once()
	call_args = mock_soc.call_args[1] # Get keyword args
	assert call_args["openai_client"] == mock_openai_client
	assert call_args["cache"] == mock_response_cache
	assert call_args["model"] == model
	assert call_args["system_prompt"] == system_prompt
	assert topic in call_args["user_prompt"]
	assert str(num_cards) in call_args["user_prompt"]
	# Check cloze instruction is NOT present
	assert "generate Cloze deletion cards" not in call_args["user_prompt"]


	@patch("ankigen_core.card_generator.structured_output_completion")
	async def test_generate_cards_batch_cloze_prompt(
	mock_soc, mock_openai_client_fixture, mock_response_cache_fixture
	):
	"""Test generate_cards_batch includes cloze instructions when requested."""
	mock_openai_client = mock_openai_client_fixture
	mock_response_cache = mock_response_cache_fixture
	mock_soc.return_value = {"cards": []} # Return empty for simplicity

	await card_generator.generate_cards_batch(
	openai_client=mock_openai_client,
	cache=mock_response_cache,
	model="gpt-test",
	topic="Cloze Topic",
	num_cards=1,
	system_prompt="System",
	generate_cloze=True,
	)

	mock_soc.assert_called_once()
	call_args = mock_soc.call_args[1]
	# Check that specific cloze instructions are present
	assert "generate Cloze deletion cards" in call_args["user_prompt"]
	# Corrected check: Look for instruction text, not the JSON example syntax
	assert (
	"Format the question field using Anki's cloze syntax"
	in call_args["user_prompt"]
	)


	@patch("ankigen_core.card_generator.structured_output_completion")
	async def test_generate_cards_batch_api_error(
	mock_soc, mock_openai_client_fixture, mock_response_cache_fixture
	):
	"""Test generate_cards_batch handles API errors by re-raising."""
	mock_openai_client = mock_openai_client_fixture
	mock_response_cache = mock_response_cache_fixture
	error_message = "API Error"
	mock_soc.side_effect = ValueError(error_message) # Simulate error from SOC

	with pytest.raises(ValueError, match=error_message):
	await card_generator.generate_cards_batch(
	openai_client=mock_openai_client,
	cache=mock_response_cache,
	model="gpt-test",
	topic="Error Topic",
	num_cards=1,
	system_prompt="System",
	generate_cloze=False,
	)


	@patch("ankigen_core.card_generator.structured_output_completion")
	async def test_generate_cards_batch_invalid_response(
	mock_soc, mock_openai_client_fixture, mock_response_cache_fixture
	):
	"""Test generate_cards_batch handles invalid JSON or missing keys."""
	mock_openai_client = mock_openai_client_fixture
	mock_response_cache = mock_response_cache_fixture
	mock_soc.return_value = {"wrong_key": []} # Missing 'cards' key

	with pytest.raises(ValueError, match="Failed to generate cards"):
	await card_generator.generate_cards_batch(
	openai_client=mock_openai_client,
	cache=mock_response_cache,
	model="gpt-test",
	topic="Invalid Response Topic",
	num_cards=1,
	system_prompt="System",
	generate_cloze=False,
	)


	# --- orchestrate_card_generation Tests ---


	@pytest.fixture
	def mock_client_manager_fixture():
	"""Provides a MagicMock OpenAIClientManager."""
	manager = MagicMock(spec=OpenAIClientManager)
	mock_client = MagicMock() # Mock the client instance it returns
	manager.get_client.return_value = mock_client
	# Simulate successful initialization by default
	manager.initialize_client.return_value = None
	return manager, mock_client


	def base_orchestrator_args(api_key="valid_key", **kwargs):
	"""Base arguments for orchestrate_card_generation."""
	base_args = {
	"api_key_input": api_key,
	"subject": "Subject",
	"generation_mode": "subject", # Default mode
	"source_text": "Source text",
	"url_input": "http://example.com",
	"model_name": "gpt-test",
	"topic_number": 1, # Corresponds to num_cards in generate_cards_batch
	"cards_per_topic": 5, # Corresponds to num_cards in generate_cards_batch
	"preference_prompt": "Pref prompt", # Corresponds to system_prompt
	"generate_cloze": False,
	"use_llm_judge": False,
	}
	base_args.update(kwargs) # Update with any provided kwargs
	return base_args


	@patch("ankigen_core.card_generator.structured_output_completion")
	@patch("ankigen_core.card_generator.generate_cards_batch")
	async def test_orchestrate_subject_mode(
	mock_gcb, mock_soc, mock_client_manager_fixture, mock_response_cache_fixture
	):
	"""Test orchestrate_card_generation in 'subject' mode."""
	manager, client = mock_client_manager_fixture
	cache = mock_response_cache_fixture
	args = base_orchestrator_args(generation_mode="subject")

	# Mock the first SOC call (for topics)
	mock_soc.return_value = {
	"topics": [
	{"name": "Topic 1", "difficulty": "beginner", "description": "Desc 1"}
	]
	}

	# Mock return value from generate_cards_batch (called inside loop)
	mock_gcb.return_value = [
	Card(
	front=CardFront(question="Q1"),
	back=CardBack(answer="A1", explanation="E1", example="Ex1"),
	)
	]

	# Patch gr.Info/Warning
	with patch("gradio.Info"), patch("gradio.Warning"):
	df_result, status, count = await card_generator.orchestrate_card_generation(
	client_manager=manager, cache=cache, **args
	)

	manager.initialize_client.assert_called_once_with(args["api_key_input"])
	manager.get_client.assert_called_once()

	# Check SOC call for topics
	mock_soc.assert_called_once()
	soc_call_args = mock_soc.call_args[1]
	assert soc_call_args["openai_client"] == client
	assert "Generate the top" in soc_call_args["user_prompt"]
	assert args["subject"] in soc_call_args["user_prompt"]

	# Check GCB call for the generated topic
	mock_gcb.assert_called_once_with(
	openai_client=client,
	cache=cache,
	model=args["model_name"],
	topic="Topic 1", # Topic name from mock_soc response
	num_cards=args["cards_per_topic"],
	system_prompt=ANY, # System prompt is constructed internally
	generate_cloze=args["generate_cloze"],
	)
	assert count == 1
	assert isinstance(df_result, pd.DataFrame)
	assert len(df_result) == 1
	assert df_result.iloc[0]["Question"] == "Q1"
	# Correct assertion to check for the returned HTML string (ignoring precise whitespace)
	assert "Generation complete!" in status
	assert "Total cards generated: 1" in status
	assert "<div" in status # Basic check for HTML structure
	# expected_html_status = '''
	# <div style="text-align: center">
	# <p>✅ Generation complete!</p>
	# <p>Total cards generated: 1</p>
	# </div>
	# '''
	# assert status.strip() == expected_html_status.strip()


	@patch("ankigen_core.card_generator.judge_cards")
	@patch("ankigen_core.card_generator.structured_output_completion")
	@patch("ankigen_core.card_generator.generate_cards_batch")
	async def test_orchestrate_subject_mode_with_judge(
	mock_gcb,
	mock_soc,
	mock_judge,
	mock_client_manager_fixture,
	mock_response_cache_fixture,
	):
	"""Test orchestrate_card_generation calls judge_cards when enabled."""
	manager, client = mock_client_manager_fixture
	cache = mock_response_cache_fixture
	args = base_orchestrator_args(generation_mode="subject", use_llm_judge=True)

	mock_soc.return_value = {
	"topics": [{"name": "T1", "difficulty": "d", "description": "d"}]
	}
	sample_card = Card(
	front=CardFront(question="Q1"),
	back=CardBack(answer="A1", explanation="E1", example="Ex1"),
	)
	mock_gcb.return_value = [sample_card]
	mock_judge.return_value = [sample_card]

	with patch("gradio.Info"), patch("gradio.Warning"):
	await card_generator.orchestrate_card_generation(
	client_manager=manager,
	cache=cache,
	**args,
	)

	mock_judge.assert_called_once_with(client, cache, args["model_name"], [sample_card])


	@patch("ankigen_core.card_generator.structured_output_completion")
	@patch("ankigen_core.card_generator.generate_cards_batch")
	async def test_orchestrate_text_mode(
	mock_gcb, mock_soc, mock_client_manager_fixture, mock_response_cache_fixture
	):
	"""Test orchestrate_card_generation in 'text' mode."""
	manager, client = mock_client_manager_fixture
	cache = mock_response_cache_fixture
	args = base_orchestrator_args(generation_mode="text")
	mock_soc.return_value = {"cards": []}

	await card_generator.orchestrate_card_generation(
	client_manager=manager, cache=cache, **args
	)

	mock_soc.assert_called_once()
	call_args = mock_soc.call_args[1]
	assert args["source_text"] in call_args["user_prompt"]


	@patch("ankigen_core.card_generator.fetch_webpage_text")
	@patch("ankigen_core.card_generator.structured_output_completion")
	async def test_orchestrate_web_mode(
	mock_soc, mock_fetch, mock_client_manager_fixture, mock_response_cache_fixture
	):
	"""Test orchestrate_card_generation in 'web' mode."""
	manager, client = mock_client_manager_fixture
	cache = mock_response_cache_fixture
	args = base_orchestrator_args(generation_mode="web")

	fetched_text = "This is the fetched web page text."
	mock_fetch.return_value = fetched_text
	mock_soc.return_value = {
	"cards": []
	} # Mock successful SOC call returning empty cards

	# Mock gr.Info and gr.Warning to avoid Gradio UI calls during test
	# Removed the incorrect pytest.raises and mock_gr_warning patch from here
	with patch("gradio.Info"), patch("gradio.Warning"):
	await card_generator.orchestrate_card_generation(
	client_manager=manager, cache=cache, **args
	)

	mock_fetch.assert_called_once_with(args["url_input"])
	mock_soc.assert_called_once()
	call_args = mock_soc.call_args[1]
	assert fetched_text in call_args["user_prompt"]


	@patch("ankigen_core.card_generator.fetch_webpage_text")
	@patch(
	"ankigen_core.card_generator.gr.Error"
	) # Mock gr.Error used by orchestrate_card_generation
	async def test_orchestrate_web_mode_fetch_error(
	mock_gr_error, mock_fetch, mock_client_manager_fixture, mock_response_cache_fixture
	):
	"""Test 'web' mode handles errors during webpage fetching by calling gr.Error."""
	manager, _ = mock_client_manager_fixture
	cache = mock_response_cache_fixture
	args = base_orchestrator_args(generation_mode="web")
	error_msg = "Connection timed out"
	mock_fetch.side_effect = ConnectionError(error_msg)

	with patch("gradio.Info"), patch("gradio.Warning"):
	df, status_msg, count = await card_generator.orchestrate_card_generation(
	client_manager=manager, cache=cache, **args
	)

	mock_gr_error.assert_called_once_with(
	f"Failed to get content from URL: {error_msg}"
	)
	assert isinstance(df, pd.DataFrame)
	assert df.empty
	assert df.columns.tolist() == get_dataframe_columns()
	assert status_msg == "Failed to get content from URL."
	assert count == 0


	@patch("ankigen_core.card_generator.structured_output_completion") # Patch SOC
	@patch("ankigen_core.card_generator.generate_cards_batch")
	async def test_orchestrate_generation_batch_error(
	mock_gcb, mock_soc, mock_client_manager_fixture, mock_response_cache_fixture
	):
	"""Test orchestrator handles errors from generate_cards_batch."""
	manager, client = mock_client_manager_fixture
	cache = mock_response_cache_fixture
	args = base_orchestrator_args(generation_mode="subject")
	error_msg = "LLM generation failed" # Define error_msg here

	# Mock the first SOC call (for topics) - needs to succeed
	mock_soc.return_value = {
	"topics": [
	{"name": "Topic 1", "difficulty": "beginner", "description": "Desc 1"}
	]
	}

	# Configure GCB to raise an error
	mock_gcb.side_effect = ValueError(error_msg)

	# Patch gr.Info/Warning and assert Warning is called
	# Removed pytest.raises
	with patch("gradio.Info"), patch("gradio.Warning") as mock_gr_warning:
	# Add the call to the function back in
	await card_generator.orchestrate_card_generation(
	client_manager=manager, cache=cache, **args
	)

	# Assert that the warning was called due to the GCB error
	mock_gr_warning.assert_called_with(
	"Failed to generate cards for 'Topic 1'. Skipping."
	)

	mock_soc.assert_called_once() # Ensure topic generation was attempted
	mock_gcb.assert_called_once() # Ensure card generation was attempted


	@patch("ankigen_core.card_generator.gr.Error")
	async def test_orchestrate_path_mode_raises_not_implemented(
	mock_gr_error, mock_client_manager_fixture, mock_response_cache_fixture
	):
	"""Test 'path' mode calls gr.Error for being unsupported."""
	manager, _ = mock_client_manager_fixture
	cache = mock_response_cache_fixture
	args = base_orchestrator_args(generation_mode="path")

	df, status_msg, count = await card_generator.orchestrate_card_generation(
	client_manager=manager, cache=cache, **args
	)

	mock_gr_error.assert_called_once_with("Unsupported generation mode selected: path")
	assert isinstance(df, pd.DataFrame)
	assert df.empty
	assert df.columns.tolist() == get_dataframe_columns()
	assert status_msg == "Unsupported mode."
	assert count == 0


	@patch("ankigen_core.card_generator.gr.Error")
	async def test_orchestrate_invalid_mode_raises_value_error(
	mock_gr_error, mock_client_manager_fixture, mock_response_cache_fixture
	):
	"""Test invalid mode calls gr.Error."""
	manager, _ = mock_client_manager_fixture
	cache = mock_response_cache_fixture
	args = base_orchestrator_args(generation_mode="invalid_mode")

	df, status_msg, count = await card_generator.orchestrate_card_generation(
	client_manager=manager, cache=cache, **args
	)

	mock_gr_error.assert_called_once_with(
	"Unsupported generation mode selected: invalid_mode"
	)
	assert isinstance(df, pd.DataFrame)
	assert df.empty
	assert df.columns.tolist() == get_dataframe_columns()
	assert status_msg == "Unsupported mode."
	assert count == 0


	@patch("ankigen_core.card_generator.gr.Error")
	async def test_orchestrate_no_api_key_raises_error(
	mock_gr_error, mock_client_manager_fixture, mock_response_cache_fixture
	):
	"""Test orchestrator calls gr.Error if API key is missing."""
	manager, _ = mock_client_manager_fixture
	cache = mock_response_cache_fixture
	args = base_orchestrator_args(api_key="") # Empty API key

	df, status_msg, count = await card_generator.orchestrate_card_generation(
	client_manager=manager, cache=cache, **args
	)

	mock_gr_error.assert_called_once_with("OpenAI API key is required")
	assert isinstance(df, pd.DataFrame)
	assert df.empty
	assert df.columns.tolist() == get_dataframe_columns()
	assert status_msg == "API key is required."
	assert count == 0


	@patch("ankigen_core.card_generator.gr.Error")
	async def test_orchestrate_client_init_error_raises_error(
	mock_gr_error, mock_client_manager_fixture, mock_response_cache_fixture
	):
	"""Test orchestrator calls gr.Error if client initialization fails."""
	manager, _ = mock_client_manager_fixture
	cache = mock_response_cache_fixture
	args = base_orchestrator_args()
	error_msg = "Invalid API Key"
	manager.initialize_client.side_effect = ValueError(error_msg)

	df, status_msg, count = await card_generator.orchestrate_card_generation(
	client_manager=manager, cache=cache, **args
	)

	mock_gr_error.assert_called_once_with(f"OpenAI Client Error: {error_msg}")
	assert isinstance(df, pd.DataFrame)
	assert df.empty
	assert df.columns.tolist() == get_dataframe_columns()
	assert status_msg == f"OpenAI Client Error: {error_msg}"
	assert count == 0


	# --- Tests for process_anki_card_data ---


	@pytest.fixture
	def sample_anki_card_data_list() -> list[AnkiCardData]:
	"""Provides a list of sample AnkiCardData objects for testing."""
	return [
	AnkiCardData(
	front="Question 1",
	back="Answer 1",
	tags=["tagA", "tagB"],
	source_url="http://example.com/source1",
	note_type="Basic",
	),
	AnkiCardData(
	front="Question 2",
	back="Answer 2",
	tags=[], # Changed from None to empty list
	source_url=None, # This is Optional[str], so None is fine
	note_type="Cloze",
	),
	AnkiCardData(
	front="Question 3",
	back="Answer 3",
	tags=[], # Empty tags list is fine
	source_url="http://example.com/source3",
	note_type="Basic", # Changed from None to "Basic"
	),
	]


	def test_process_anki_card_data_basic_conversion(sample_anki_card_data_list):
	"""Test basic conversion of AnkiCardData to dicts."""
	input_cards = sample_anki_card_data_list
	processed = card_generator.process_anki_card_data(input_cards)

	assert len(processed) == 3
	assert isinstance(processed[0], dict)
	assert processed[0]["front"] == "Question 1"
	assert (
	processed[0]["back"]
	== "Answer 1\\n\\n<hr><small>Source: <a href='http://example.com/source1'>http://example.com/source1</a></small>"
	)
	assert processed[0]["tags"] == "tagA tagB"
	assert processed[0]["note_type"] == "Basic"

	assert processed[1]["front"] == "Question 2"
	assert processed[1]["back"] == "Answer 2" # No source_url, so no extra HTML
	assert processed[1]["tags"] == "" # No tags, so empty string
	assert processed[1]["note_type"] == "Cloze"

	assert processed[2]["front"] == "Question 3"
	assert "<hr><small>Source" in processed[2]["back"]
	assert "http://example.com/source3" in processed[2]["back"]
	assert processed[2]["tags"] == "" # Empty tags list, so empty string
	assert processed[2]["note_type"] == "Basic" # None should default to Basic


	def test_process_anki_card_data_empty_list():
	"""Test processing an empty list of cards."""
	processed = card_generator.process_anki_card_data([])
	assert processed == []


	def test_process_anki_card_data_source_url_formatting(sample_anki_card_data_list):
	"""Test that the source_url is correctly formatted and appended to the back."""
	# Test with the first card that has a source_url
	card_with_source = [sample_anki_card_data_list[0]]
	processed = card_generator.process_anki_card_data(card_with_source)
	expected_back_html = "\\n\\n<hr><small>Source: <a href='http://example.com/source1'>http://example.com/source1</a></small>"
	assert processed[0]["back"].endswith(expected_back_html)

	# Test with the second card that has no source_url
	card_without_source = [sample_anki_card_data_list[1]]
	processed_no_source = card_generator.process_anki_card_data(card_without_source)
	assert "<hr><small>Source:" not in processed_no_source[0]["back"]


	def test_process_anki_card_data_tags_formatting(sample_anki_card_data_list):
	"""Test tags are correctly joined into a space-separated string."""
	processed = card_generator.process_anki_card_data(sample_anki_card_data_list)
	assert processed[0]["tags"] == "tagA tagB"
	assert processed[1]["tags"] == "" # None tags
	assert processed[2]["tags"] == "" # Empty list tags


	def test_process_anki_card_data_note_type_handling(sample_anki_card_data_list):
	"""Test note_type handling, including default."""
	processed = card_generator.process_anki_card_data(sample_anki_card_data_list)
	assert processed[0]["note_type"] == "Basic"
	assert processed[1]["note_type"] == "Cloze"
	assert processed[2]["note_type"] == "Basic" # Default for None

	# Test with a card where note_type is explicitly not set during AnkiCardData creation
	# (though Pydantic default in model definition would handle this, good to be robust)
	card_without_note_type_field = AnkiCardData(
	front="Q", back="A"
	) # note_type will use Pydantic default
	processed_single = card_generator.process_anki_card_data(
	[card_without_note_type_field]
	)
	# The function itself now has: card_item.note_type if hasattr(card_item, 'note_type') else "Basic"
	# If AnkiCardData Pydantic model has a default for note_type (e.g. "Basic"), hasattr might be true.
	# Let's check the AnkiCardData model definition again.
	# AnkiCardData model has: note_type: Optional[str] = "Basic"
	# So, card_item.note_type will always exist and default to "Basic".
	# The hasattr check in process_anki_card_data might be redundant then, but harmless.
	assert processed_single[0]["note_type"] == "Basic"


	# --- Tests for deduplicate_cards ---


	def test_deduplicate_cards_removes_duplicates():
	"""Test that duplicate cards (based on 'front' content) are removed."""
	cards_with_duplicates = [
	{"front": "Q1", "back": "A1"},
	{"front": "Q2", "back": "A2"},
	{"front": "Q1", "back": "A1_variant"}, # Duplicate front
	{"front": "Q3", "back": "A3"},
	{"front": "Q2", "back": "A2_variant"}, # Duplicate front
	]
	expected_cards = [
	{"front": "Q1", "back": "A1"},
	{"front": "Q2", "back": "A2"},
	{"front": "Q3", "back": "A3"},
	]
	assert card_generator.deduplicate_cards(cards_with_duplicates) == expected_cards


	def test_deduplicate_cards_preserves_order():
	"""Test that the order of first-seen unique cards is preserved."""
	ordered_cards = [
	{"front": "Q_alpha", "back": "A_alpha"},
	{"front": "Q_beta", "back": "A_beta"},
	{"front": "Q_gamma", "back": "A_gamma"},
	{"front": "Q_alpha", "back": "A_alpha_redux"}, # Duplicate
	]
	expected_ordered_cards = [
	{"front": "Q_alpha", "back": "A_alpha"},
	{"front": "Q_beta", "back": "A_beta"},
	{"front": "Q_gamma", "back": "A_gamma"},
	]
	assert card_generator.deduplicate_cards(ordered_cards) == expected_ordered_cards


	def test_deduplicate_cards_empty_list():
	"""Test deduplicating an empty list of cards."""
	assert card_generator.deduplicate_cards([]) == []


	def test_deduplicate_cards_all_unique():
	"""Test deduplicating a list where all cards are unique."""
	all_unique_cards = [
	{"front": "Unique1", "back": "Ans1"},
	{"front": "Unique2", "back": "Ans2"},
	{"front": "Unique3", "back": "Ans3"},
	]
	assert card_generator.deduplicate_cards(all_unique_cards) == all_unique_cards


	def test_deduplicate_cards_missing_front_key():
	"""Test that cards missing the 'front' key are skipped and logged."""
	cards_with_missing_front = [
	{"front": "Q1", "back": "A1"},
	{"foo": "bar", "back": "A2"}, # Missing 'front' key
	{"front": "Q3", "back": "A3"},
	]
	expected_cards = [
	{"front": "Q1", "back": "A1"},
	{"front": "Q3", "back": "A3"},
	]
	# Patch the logger within card_generator to check for the warning
	with patch.object(card_generator.logger, "warning") as mock_log_warning:
	result = card_generator.deduplicate_cards(cards_with_missing_front)
	assert result == expected_cards
	mock_log_warning.assert_called_once_with(
	"Card skipped during deduplication due to missing 'front' key: {'foo': 'bar', 'back': 'A2'}"
	)


	def test_deduplicate_cards_front_is_none():
	"""Test that cards where 'front' value is None are skipped and logged."""
	cards_with_none_front = [
	{"front": "Q1", "back": "A1"},
	{"front": None, "back": "A2"}, # Front is None
	{"front": "Q3", "back": "A3"},
	]
	expected_cards = [
	{"front": "Q1", "back": "A1"},
	{"front": "Q3", "back": "A3"},
	]
	with patch.object(card_generator.logger, "warning") as mock_log_warning:
	result = card_generator.deduplicate_cards(cards_with_none_front)
	assert result == expected_cards
	mock_log_warning.assert_called_once_with(
	"Card skipped during deduplication due to missing 'front' key: {'front': None, 'back': 'A2'}"
	) # The log message says missing 'front' key for None value as well, due to card.get('front') then checking if front_text is None.


	# --- Tests for generate_cards_from_crawled_content ---


	@patch("ankigen_core.card_generator.deduplicate_cards")
	@patch("ankigen_core.card_generator.process_anki_card_data")
	def test_generate_cards_from_crawled_content_orchestration(
	mock_process_anki_card_data,
	mock_deduplicate_cards,
	sample_anki_card_data_list, # Use the existing fixture
	):
	"""Test that generate_cards_from_crawled_content correctly orchestrates calls."""

	# Setup mock return values
	mock_processed_list = [{"front": "Processed Q1", "back": "Processed A1"}]
	mock_process_anki_card_data.return_value = mock_processed_list

	mock_unique_list = [{"front": "Unique Q1", "back": "Unique A1"}]
	mock_deduplicate_cards.return_value = mock_unique_list

	input_anki_cards = sample_anki_card_data_list # Sample AnkiCardData objects

	# Call the function under test
	result = card_generator.generate_cards_from_crawled_content(input_anki_cards)

	# Assertions
	mock_process_anki_card_data.assert_called_once_with(input_anki_cards)
	mock_deduplicate_cards.assert_called_once_with(mock_processed_list)
	assert result == mock_unique_list


	def test_generate_cards_from_crawled_content_empty_input():
	"""Test with an empty list of AnkiCardData objects."""
	with (
	patch(
	"ankigen_core.card_generator.process_anki_card_data", return_value=[]
	) as mock_process,
	patch(
	"ankigen_core.card_generator.deduplicate_cards", return_value=[]
	) as mock_dedup,
	):
	result = card_generator.generate_cards_from_crawled_content([])
	mock_process.assert_called_once_with([])
	mock_dedup.assert_called_once_with([])
	assert result == []


	# Example of an integration-style test (optional, as unit tests for sub-components are thorough)
	# This would not mock the internal calls.
	def test_generate_cards_from_crawled_content_integration(sample_anki_card_data_list):
	"""
	A more integration-style test to ensure the flow works with real sub-functions.
	This relies on the correctness of process_anki_card_data and deduplicate_cards.
	"""
	# Construct a list that will actually have duplicates after processing
	card1 = AnkiCardData(front="Q1", back="A1", tags=["test"], note_type="Basic")
	card2_dup = AnkiCardData(
	front="Q1", back="A1_variant", tags=["test"], note_type="Basic"
	) # Duplicate front
	card3 = AnkiCardData(front="Q2", back="A2", tags=["test"], note_type="Basic")

	input_list = [card1, card2_dup, card3]

	result = card_generator.generate_cards_from_crawled_content(input_list)

	# Expected result after processing and deduplication:
	# Card1 (original) should be present. Card2_dup should be removed. Card3 should be present.
	# Check lengths
	assert len(result) == 2

	# Check content (simplified check based on front)
	result_fronts = [item["front"] for item in result]
	assert "Q1" in result_fronts
	assert "Q2" in result_fronts

	# Check that the first version of Q1 was kept (A1, not A1_variant)
	# This depends on the details of process_anki_card_data output
	q1_card_in_result = next(item for item in result if item["front"] == "Q1")
	assert (
	"A1" in q1_card_in_result["back"]
	) # Basic check, might need refinement based on exact source_url append
	assert "A1_variant" not in q1_card_in_result["back"]
	# More detailed checks could verify the full structure if needed