# Tests for ankigen_core/card_generator.py import pytest from unittest.mock import patch, MagicMock, ANY import pandas as pd # Assuming Pydantic models, ResponseCache etc. are needed from ankigen_core.models import Card, CardFront, CardBack, AnkiCardData from ankigen_core.utils import ResponseCache from ankigen_core.llm_interface import OpenAIClientManager # Needed for type hints # Module to test from ankigen_core import card_generator from ankigen_core.card_generator import ( get_dataframe_columns, ) # Import for use in error returns # --- Constants Tests (Optional but good practice) --- def test_constants_exist_and_have_expected_type(): """Test that constants exist and are lists.""" assert isinstance(card_generator.AVAILABLE_MODELS, list) assert isinstance(card_generator.GENERATION_MODES, list) assert len(card_generator.AVAILABLE_MODELS) > 0 assert len(card_generator.GENERATION_MODES) > 0 # --- generate_cards_batch Tests --- @pytest.fixture def mock_openai_client_fixture(): # Renamed to avoid conflict with llm_interface tests fixture """Provides a MagicMock OpenAI client.""" return MagicMock() @pytest.fixture def mock_response_cache_fixture(): """Provides a MagicMock ResponseCache.""" cache = MagicMock(spec=ResponseCache) cache.get.return_value = None # Default to cache miss return cache @patch("ankigen_core.card_generator.structured_output_completion") async def test_generate_cards_batch_success( mock_soc, mock_openai_client_fixture, mock_response_cache_fixture ): """Test successful card generation using generate_cards_batch.""" mock_openai_client = mock_openai_client_fixture mock_response_cache = mock_response_cache_fixture model = "gpt-test" topic = "Test Topic" num_cards = 2 system_prompt = "System prompt" generate_cloze = False # Mock the response from structured_output_completion mock_soc.return_value = { "cards": [ { "card_type": "basic", "front": {"question": "Q1"}, "back": {"answer": "A1", "explanation": "E1", "example": "Ex1"}, "metadata": {"difficulty": "beginner"}, }, { "card_type": "cloze", "front": {"question": "{{c1::Q2}}"}, "back": {"answer": "A2_full", "explanation": "E2", "example": "Ex2"}, "metadata": {"difficulty": "intermediate"}, }, ] } result_cards = await card_generator.generate_cards_batch( openai_client=mock_openai_client, cache=mock_response_cache, model=model, topic=topic, num_cards=num_cards, system_prompt=system_prompt, generate_cloze=generate_cloze, ) assert len(result_cards) == 2 assert isinstance(result_cards[0], Card) assert result_cards[0].card_type == "basic" assert result_cards[0].front.question == "Q1" assert result_cards[1].card_type == "cloze" assert result_cards[1].front.question == "{{c1::Q2}}" assert result_cards[1].metadata["difficulty"] == "intermediate" mock_soc.assert_called_once() call_args = mock_soc.call_args[1] # Get keyword args assert call_args["openai_client"] == mock_openai_client assert call_args["cache"] == mock_response_cache assert call_args["model"] == model assert call_args["system_prompt"] == system_prompt assert topic in call_args["user_prompt"] assert str(num_cards) in call_args["user_prompt"] # Check cloze instruction is NOT present assert "generate Cloze deletion cards" not in call_args["user_prompt"] @patch("ankigen_core.card_generator.structured_output_completion") async def test_generate_cards_batch_cloze_prompt( mock_soc, mock_openai_client_fixture, mock_response_cache_fixture ): """Test generate_cards_batch includes cloze instructions when requested.""" mock_openai_client = mock_openai_client_fixture mock_response_cache = mock_response_cache_fixture mock_soc.return_value = {"cards": []} # Return empty for simplicity await card_generator.generate_cards_batch( openai_client=mock_openai_client, cache=mock_response_cache, model="gpt-test", topic="Cloze Topic", num_cards=1, system_prompt="System", generate_cloze=True, ) mock_soc.assert_called_once() call_args = mock_soc.call_args[1] # Check that specific cloze instructions are present assert "generate Cloze deletion cards" in call_args["user_prompt"] # Corrected check: Look for instruction text, not the JSON example syntax assert ( "Format the question field using Anki's cloze syntax" in call_args["user_prompt"] ) @patch("ankigen_core.card_generator.structured_output_completion") async def test_generate_cards_batch_api_error( mock_soc, mock_openai_client_fixture, mock_response_cache_fixture ): """Test generate_cards_batch handles API errors by re-raising.""" mock_openai_client = mock_openai_client_fixture mock_response_cache = mock_response_cache_fixture error_message = "API Error" mock_soc.side_effect = ValueError(error_message) # Simulate error from SOC with pytest.raises(ValueError, match=error_message): await card_generator.generate_cards_batch( openai_client=mock_openai_client, cache=mock_response_cache, model="gpt-test", topic="Error Topic", num_cards=1, system_prompt="System", generate_cloze=False, ) @patch("ankigen_core.card_generator.structured_output_completion") async def test_generate_cards_batch_invalid_response( mock_soc, mock_openai_client_fixture, mock_response_cache_fixture ): """Test generate_cards_batch handles invalid JSON or missing keys.""" mock_openai_client = mock_openai_client_fixture mock_response_cache = mock_response_cache_fixture mock_soc.return_value = {"wrong_key": []} # Missing 'cards' key with pytest.raises(ValueError, match="Failed to generate cards"): await card_generator.generate_cards_batch( openai_client=mock_openai_client, cache=mock_response_cache, model="gpt-test", topic="Invalid Response Topic", num_cards=1, system_prompt="System", generate_cloze=False, ) # --- orchestrate_card_generation Tests --- @pytest.fixture def mock_client_manager_fixture(): """Provides a MagicMock OpenAIClientManager.""" manager = MagicMock(spec=OpenAIClientManager) mock_client = MagicMock() # Mock the client instance it returns manager.get_client.return_value = mock_client # Simulate successful initialization by default manager.initialize_client.return_value = None return manager, mock_client def base_orchestrator_args(api_key="valid_key", **kwargs): """Base arguments for orchestrate_card_generation.""" base_args = { "api_key_input": api_key, "subject": "Subject", "generation_mode": "subject", # Default mode "source_text": "Source text", "url_input": "http://example.com", "model_name": "gpt-test", "topic_number": 1, # Corresponds to num_cards in generate_cards_batch "cards_per_topic": 5, # Corresponds to num_cards in generate_cards_batch "preference_prompt": "Pref prompt", # Corresponds to system_prompt "generate_cloze": False, } base_args.update(kwargs) # Update with any provided kwargs return base_args @patch("ankigen_core.card_generator.structured_output_completion") @patch("ankigen_core.card_generator.generate_cards_batch") async def test_orchestrate_subject_mode( mock_gcb, mock_soc, mock_client_manager_fixture, mock_response_cache_fixture ): """Test orchestrate_card_generation in 'subject' mode.""" manager, client = mock_client_manager_fixture cache = mock_response_cache_fixture args = base_orchestrator_args(generation_mode="subject") # Mock the first SOC call (for topics) mock_soc.return_value = { "topics": [ {"name": "Topic 1", "difficulty": "beginner", "description": "Desc 1"} ] } # Mock return value from generate_cards_batch (called inside loop) mock_gcb.return_value = [ Card( front=CardFront(question="Q1"), back=CardBack(answer="A1", explanation="E1", example="Ex1"), ) ] # Patch gr.Info/Warning with patch("gradio.Info"), patch("gradio.Warning"): df_result, status, count = await card_generator.orchestrate_card_generation( client_manager=manager, cache=cache, **args ) manager.initialize_client.assert_called_once_with(args["api_key_input"]) manager.get_client.assert_called_once() # Check SOC call for topics mock_soc.assert_called_once() soc_call_args = mock_soc.call_args[1] assert soc_call_args["openai_client"] == client assert "Generate the top" in soc_call_args["user_prompt"] assert args["subject"] in soc_call_args["user_prompt"] # Check GCB call for the generated topic mock_gcb.assert_called_once_with( openai_client=client, cache=cache, model=args["model_name"], topic="Topic 1", # Topic name from mock_soc response num_cards=args["cards_per_topic"], system_prompt=ANY, # System prompt is constructed internally generate_cloze=args["generate_cloze"], ) assert count == 1 assert isinstance(df_result, pd.DataFrame) assert len(df_result) == 1 assert df_result.iloc[0]["Question"] == "Q1" # Correct assertion to check for the returned HTML string (ignoring precise whitespace) assert "Generation complete!" in status assert "Total cards generated: 1" in status assert " #

✅ Generation complete!

#

Total cards generated: 1

# # ''' # assert status.strip() == expected_html_status.strip() @patch("ankigen_core.card_generator.structured_output_completion") @patch("ankigen_core.card_generator.generate_cards_batch") async def test_orchestrate_text_mode( mock_gcb, mock_soc, mock_client_manager_fixture, mock_response_cache_fixture ): """Test orchestrate_card_generation in 'text' mode.""" manager, client = mock_client_manager_fixture cache = mock_response_cache_fixture args = base_orchestrator_args(generation_mode="text") mock_soc.return_value = {"cards": []} await card_generator.orchestrate_card_generation( client_manager=manager, cache=cache, **args ) mock_soc.assert_called_once() call_args = mock_soc.call_args[1] assert args["source_text"] in call_args["user_prompt"] @patch("ankigen_core.card_generator.fetch_webpage_text") @patch("ankigen_core.card_generator.structured_output_completion") async def test_orchestrate_web_mode( mock_soc, mock_fetch, mock_client_manager_fixture, mock_response_cache_fixture ): """Test orchestrate_card_generation in 'web' mode.""" manager, client = mock_client_manager_fixture cache = mock_response_cache_fixture args = base_orchestrator_args(generation_mode="web") fetched_text = "This is the fetched web page text." mock_fetch.return_value = fetched_text mock_soc.return_value = { "cards": [] } # Mock successful SOC call returning empty cards # Mock gr.Info and gr.Warning to avoid Gradio UI calls during test # Removed the incorrect pytest.raises and mock_gr_warning patch from here with patch("gradio.Info"), patch("gradio.Warning"): await card_generator.orchestrate_card_generation( client_manager=manager, cache=cache, **args ) mock_fetch.assert_called_once_with(args["url_input"]) mock_soc.assert_called_once() call_args = mock_soc.call_args[1] assert fetched_text in call_args["user_prompt"] @patch("ankigen_core.card_generator.fetch_webpage_text") @patch( "ankigen_core.card_generator.gr.Error" ) # Mock gr.Error used by orchestrate_card_generation async def test_orchestrate_web_mode_fetch_error( mock_gr_error, mock_fetch, mock_client_manager_fixture, mock_response_cache_fixture ): """Test 'web' mode handles errors during webpage fetching by calling gr.Error.""" manager, _ = mock_client_manager_fixture cache = mock_response_cache_fixture args = base_orchestrator_args(generation_mode="web") error_msg = "Connection timed out" mock_fetch.side_effect = ConnectionError(error_msg) with patch("gradio.Info"), patch("gradio.Warning"): df, status_msg, count = await card_generator.orchestrate_card_generation( client_manager=manager, cache=cache, **args ) mock_gr_error.assert_called_once_with( f"Failed to get content from URL: {error_msg}" ) assert isinstance(df, pd.DataFrame) assert df.empty assert df.columns.tolist() == get_dataframe_columns() assert status_msg == "Failed to get content from URL." assert count == 0 @patch("ankigen_core.card_generator.structured_output_completion") # Patch SOC @patch("ankigen_core.card_generator.generate_cards_batch") async def test_orchestrate_generation_batch_error( mock_gcb, mock_soc, mock_client_manager_fixture, mock_response_cache_fixture ): """Test orchestrator handles errors from generate_cards_batch.""" manager, client = mock_client_manager_fixture cache = mock_response_cache_fixture args = base_orchestrator_args(generation_mode="subject") error_msg = "LLM generation failed" # Define error_msg here # Mock the first SOC call (for topics) - needs to succeed mock_soc.return_value = { "topics": [ {"name": "Topic 1", "difficulty": "beginner", "description": "Desc 1"} ] } # Configure GCB to raise an error mock_gcb.side_effect = ValueError(error_msg) # Patch gr.Info/Warning and assert Warning is called # Removed pytest.raises with patch("gradio.Info"), patch("gradio.Warning") as mock_gr_warning: # Add the call to the function back in await card_generator.orchestrate_card_generation( client_manager=manager, cache=cache, **args ) # Assert that the warning was called due to the GCB error mock_gr_warning.assert_called_with( "Failed to generate cards for 'Topic 1'. Skipping." ) mock_soc.assert_called_once() # Ensure topic generation was attempted mock_gcb.assert_called_once() # Ensure card generation was attempted @patch("ankigen_core.card_generator.gr.Error") async def test_orchestrate_path_mode_raises_not_implemented( mock_gr_error, mock_client_manager_fixture, mock_response_cache_fixture ): """Test 'path' mode calls gr.Error for being unsupported.""" manager, _ = mock_client_manager_fixture cache = mock_response_cache_fixture args = base_orchestrator_args(generation_mode="path") df, status_msg, count = await card_generator.orchestrate_card_generation( client_manager=manager, cache=cache, **args ) mock_gr_error.assert_called_once_with("Unsupported generation mode selected: path") assert isinstance(df, pd.DataFrame) assert df.empty assert df.columns.tolist() == get_dataframe_columns() assert status_msg == "Unsupported mode." assert count == 0 @patch("ankigen_core.card_generator.gr.Error") async def test_orchestrate_invalid_mode_raises_value_error( mock_gr_error, mock_client_manager_fixture, mock_response_cache_fixture ): """Test invalid mode calls gr.Error.""" manager, _ = mock_client_manager_fixture cache = mock_response_cache_fixture args = base_orchestrator_args(generation_mode="invalid_mode") df, status_msg, count = await card_generator.orchestrate_card_generation( client_manager=manager, cache=cache, **args ) mock_gr_error.assert_called_once_with( "Unsupported generation mode selected: invalid_mode" ) assert isinstance(df, pd.DataFrame) assert df.empty assert df.columns.tolist() == get_dataframe_columns() assert status_msg == "Unsupported mode." assert count == 0 @patch("ankigen_core.card_generator.gr.Error") async def test_orchestrate_no_api_key_raises_error( mock_gr_error, mock_client_manager_fixture, mock_response_cache_fixture ): """Test orchestrator calls gr.Error if API key is missing.""" manager, _ = mock_client_manager_fixture cache = mock_response_cache_fixture args = base_orchestrator_args(api_key="") # Empty API key df, status_msg, count = await card_generator.orchestrate_card_generation( client_manager=manager, cache=cache, **args ) mock_gr_error.assert_called_once_with("OpenAI API key is required") assert isinstance(df, pd.DataFrame) assert df.empty assert df.columns.tolist() == get_dataframe_columns() assert status_msg == "API key is required." assert count == 0 @patch("ankigen_core.card_generator.gr.Error") async def test_orchestrate_client_init_error_raises_error( mock_gr_error, mock_client_manager_fixture, mock_response_cache_fixture ): """Test orchestrator calls gr.Error if client initialization fails.""" manager, _ = mock_client_manager_fixture cache = mock_response_cache_fixture args = base_orchestrator_args() error_msg = "Invalid API Key" manager.initialize_client.side_effect = ValueError(error_msg) df, status_msg, count = await card_generator.orchestrate_card_generation( client_manager=manager, cache=cache, **args ) mock_gr_error.assert_called_once_with(f"OpenAI Client Error: {error_msg}") assert isinstance(df, pd.DataFrame) assert df.empty assert df.columns.tolist() == get_dataframe_columns() assert status_msg == f"OpenAI Client Error: {error_msg}" assert count == 0 # --- Tests for process_anki_card_data --- @pytest.fixture def sample_anki_card_data_list() -> list[AnkiCardData]: """Provides a list of sample AnkiCardData objects for testing.""" return [ AnkiCardData( front="Question 1", back="Answer 1", tags=["tagA", "tagB"], source_url="http://example.com/source1", note_type="Basic", ), AnkiCardData( front="Question 2", back="Answer 2", tags=[], # Changed from None to empty list source_url=None, # This is Optional[str], so None is fine note_type="Cloze", ), AnkiCardData( front="Question 3", back="Answer 3", tags=[], # Empty tags list is fine source_url="http://example.com/source3", note_type="Basic", # Changed from None to "Basic" ), ] def test_process_anki_card_data_basic_conversion(sample_anki_card_data_list): """Test basic conversion of AnkiCardData to dicts.""" input_cards = sample_anki_card_data_list processed = card_generator.process_anki_card_data(input_cards) assert len(processed) == 3 assert isinstance(processed[0], dict) assert processed[0]["front"] == "Question 1" assert ( processed[0]["back"] == "Answer 1\\n\\n
Source: http://example.com/source1" ) assert processed[0]["tags"] == "tagA tagB" assert processed[0]["note_type"] == "Basic" assert processed[1]["front"] == "Question 2" assert processed[1]["back"] == "Answer 2" # No source_url, so no extra HTML assert processed[1]["tags"] == "" # No tags, so empty string assert processed[1]["note_type"] == "Cloze" assert processed[2]["front"] == "Question 3" assert "
Source" in processed[2]["back"] assert "http://example.com/source3" in processed[2]["back"] assert processed[2]["tags"] == "" # Empty tags list, so empty string assert processed[2]["note_type"] == "Basic" # None should default to Basic def test_process_anki_card_data_empty_list(): """Test processing an empty list of cards.""" processed = card_generator.process_anki_card_data([]) assert processed == [] def test_process_anki_card_data_source_url_formatting(sample_anki_card_data_list): """Test that the source_url is correctly formatted and appended to the back.""" # Test with the first card that has a source_url card_with_source = [sample_anki_card_data_list[0]] processed = card_generator.process_anki_card_data(card_with_source) expected_back_html = "\\n\\n
Source: http://example.com/source1" assert processed[0]["back"].endswith(expected_back_html) # Test with the second card that has no source_url card_without_source = [sample_anki_card_data_list[1]] processed_no_source = card_generator.process_anki_card_data(card_without_source) assert "
Source:" not in processed_no_source[0]["back"] def test_process_anki_card_data_tags_formatting(sample_anki_card_data_list): """Test tags are correctly joined into a space-separated string.""" processed = card_generator.process_anki_card_data(sample_anki_card_data_list) assert processed[0]["tags"] == "tagA tagB" assert processed[1]["tags"] == "" # None tags assert processed[2]["tags"] == "" # Empty list tags def test_process_anki_card_data_note_type_handling(sample_anki_card_data_list): """Test note_type handling, including default.""" processed = card_generator.process_anki_card_data(sample_anki_card_data_list) assert processed[0]["note_type"] == "Basic" assert processed[1]["note_type"] == "Cloze" assert processed[2]["note_type"] == "Basic" # Default for None # Test with a card where note_type is explicitly not set during AnkiCardData creation # (though Pydantic default in model definition would handle this, good to be robust) card_without_note_type_field = AnkiCardData( front="Q", back="A" ) # note_type will use Pydantic default processed_single = card_generator.process_anki_card_data( [card_without_note_type_field] ) # The function itself now has: card_item.note_type if hasattr(card_item, 'note_type') else "Basic" # If AnkiCardData Pydantic model has a default for note_type (e.g. "Basic"), hasattr might be true. # Let's check the AnkiCardData model definition again. # AnkiCardData model has: note_type: Optional[str] = "Basic" # So, card_item.note_type will always exist and default to "Basic". # The hasattr check in process_anki_card_data might be redundant then, but harmless. assert processed_single[0]["note_type"] == "Basic" # --- Tests for deduplicate_cards --- def test_deduplicate_cards_removes_duplicates(): """Test that duplicate cards (based on 'front' content) are removed.""" cards_with_duplicates = [ {"front": "Q1", "back": "A1"}, {"front": "Q2", "back": "A2"}, {"front": "Q1", "back": "A1_variant"}, # Duplicate front {"front": "Q3", "back": "A3"}, {"front": "Q2", "back": "A2_variant"}, # Duplicate front ] expected_cards = [ {"front": "Q1", "back": "A1"}, {"front": "Q2", "back": "A2"}, {"front": "Q3", "back": "A3"}, ] assert card_generator.deduplicate_cards(cards_with_duplicates) == expected_cards def test_deduplicate_cards_preserves_order(): """Test that the order of first-seen unique cards is preserved.""" ordered_cards = [ {"front": "Q_alpha", "back": "A_alpha"}, {"front": "Q_beta", "back": "A_beta"}, {"front": "Q_gamma", "back": "A_gamma"}, {"front": "Q_alpha", "back": "A_alpha_redux"}, # Duplicate ] expected_ordered_cards = [ {"front": "Q_alpha", "back": "A_alpha"}, {"front": "Q_beta", "back": "A_beta"}, {"front": "Q_gamma", "back": "A_gamma"}, ] assert card_generator.deduplicate_cards(ordered_cards) == expected_ordered_cards def test_deduplicate_cards_empty_list(): """Test deduplicating an empty list of cards.""" assert card_generator.deduplicate_cards([]) == [] def test_deduplicate_cards_all_unique(): """Test deduplicating a list where all cards are unique.""" all_unique_cards = [ {"front": "Unique1", "back": "Ans1"}, {"front": "Unique2", "back": "Ans2"}, {"front": "Unique3", "back": "Ans3"}, ] assert card_generator.deduplicate_cards(all_unique_cards) == all_unique_cards def test_deduplicate_cards_missing_front_key(): """Test that cards missing the 'front' key are skipped and logged.""" cards_with_missing_front = [ {"front": "Q1", "back": "A1"}, {"foo": "bar", "back": "A2"}, # Missing 'front' key {"front": "Q3", "back": "A3"}, ] expected_cards = [ {"front": "Q1", "back": "A1"}, {"front": "Q3", "back": "A3"}, ] # Patch the logger within card_generator to check for the warning with patch.object(card_generator.logger, "warning") as mock_log_warning: result = card_generator.deduplicate_cards(cards_with_missing_front) assert result == expected_cards mock_log_warning.assert_called_once_with( "Card skipped during deduplication due to missing 'front' key: {'foo': 'bar', 'back': 'A2'}" ) def test_deduplicate_cards_front_is_none(): """Test that cards where 'front' value is None are skipped and logged.""" cards_with_none_front = [ {"front": "Q1", "back": "A1"}, {"front": None, "back": "A2"}, # Front is None {"front": "Q3", "back": "A3"}, ] expected_cards = [ {"front": "Q1", "back": "A1"}, {"front": "Q3", "back": "A3"}, ] with patch.object(card_generator.logger, "warning") as mock_log_warning: result = card_generator.deduplicate_cards(cards_with_none_front) assert result == expected_cards mock_log_warning.assert_called_once_with( "Card skipped during deduplication due to missing 'front' key: {'front': None, 'back': 'A2'}" ) # The log message says missing 'front' key for None value as well, due to card.get('front') then checking if front_text is None. # --- Tests for generate_cards_from_crawled_content --- @patch("ankigen_core.card_generator.deduplicate_cards") @patch("ankigen_core.card_generator.process_anki_card_data") def test_generate_cards_from_crawled_content_orchestration( mock_process_anki_card_data, mock_deduplicate_cards, sample_anki_card_data_list, # Use the existing fixture ): """Test that generate_cards_from_crawled_content correctly orchestrates calls.""" # Setup mock return values mock_processed_list = [{"front": "Processed Q1", "back": "Processed A1"}] mock_process_anki_card_data.return_value = mock_processed_list mock_unique_list = [{"front": "Unique Q1", "back": "Unique A1"}] mock_deduplicate_cards.return_value = mock_unique_list input_anki_cards = sample_anki_card_data_list # Sample AnkiCardData objects # Call the function under test result = card_generator.generate_cards_from_crawled_content(input_anki_cards) # Assertions mock_process_anki_card_data.assert_called_once_with(input_anki_cards) mock_deduplicate_cards.assert_called_once_with(mock_processed_list) assert result == mock_unique_list def test_generate_cards_from_crawled_content_empty_input(): """Test with an empty list of AnkiCardData objects.""" with ( patch( "ankigen_core.card_generator.process_anki_card_data", return_value=[] ) as mock_process, patch( "ankigen_core.card_generator.deduplicate_cards", return_value=[] ) as mock_dedup, ): result = card_generator.generate_cards_from_crawled_content([]) mock_process.assert_called_once_with([]) mock_dedup.assert_called_once_with([]) assert result == [] # Example of an integration-style test (optional, as unit tests for sub-components are thorough) # This would not mock the internal calls. def test_generate_cards_from_crawled_content_integration(sample_anki_card_data_list): """ A more integration-style test to ensure the flow works with real sub-functions. This relies on the correctness of process_anki_card_data and deduplicate_cards. """ # Construct a list that will actually have duplicates after processing card1 = AnkiCardData(front="Q1", back="A1", tags=["test"], note_type="Basic") card2_dup = AnkiCardData( front="Q1", back="A1_variant", tags=["test"], note_type="Basic" ) # Duplicate front card3 = AnkiCardData(front="Q2", back="A2", tags=["test"], note_type="Basic") input_list = [card1, card2_dup, card3] result = card_generator.generate_cards_from_crawled_content(input_list) # Expected result after processing and deduplication: # Card1 (original) should be present. Card2_dup should be removed. Card3 should be present. # Check lengths assert len(result) == 2 # Check content (simplified check based on front) result_fronts = [item["front"] for item in result] assert "Q1" in result_fronts assert "Q2" in result_fronts # Check that the first version of Q1 was kept (A1, not A1_variant) # This depends on the details of process_anki_card_data output q1_card_in_result = next(item for item in result if item["front"] == "Q1") assert ( "A1" in q1_card_in_result["back"] ) # Basic check, might need refinement based on exact source_url append assert "A1_variant" not in q1_card_in_result["back"] # More detailed checks could verify the full structure if needed