File size: 31,234 Bytes
d09f6aa 100024e d09f6aa 100024e d09f6aa 100024e d09f6aa 100024e d09f6aa 100024e d09f6aa 100024e d09f6aa 100024e d09f6aa 100024e d09f6aa 100024e d09f6aa 07fe6c3 d09f6aa 100024e d09f6aa 100024e d09f6aa 07fe6c3 d09f6aa 100024e d09f6aa 100024e d09f6aa 100024e d09f6aa 100024e d09f6aa 100024e d09f6aa 100024e d09f6aa 100024e d09f6aa 100024e d09f6aa 100024e d09f6aa 100024e d09f6aa 100024e d09f6aa 100024e d09f6aa 100024e d09f6aa 100024e d09f6aa 100024e d09f6aa 100024e d09f6aa 100024e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 |
# Tests for ankigen_core/card_generator.py
import pytest
from unittest.mock import patch, MagicMock, ANY
import pandas as pd
# Assuming Pydantic models, ResponseCache etc. are needed
from ankigen_core.models import Card, CardFront, CardBack, AnkiCardData
from ankigen_core.utils import ResponseCache
from ankigen_core.llm_interface import OpenAIClientManager # Needed for type hints
# Module to test
from ankigen_core import card_generator
from ankigen_core.card_generator import (
get_dataframe_columns,
) # Import for use in error returns
# --- Constants Tests (Optional but good practice) ---
def test_constants_exist_and_have_expected_type():
"""Test that constants exist and are lists."""
assert isinstance(card_generator.AVAILABLE_MODELS, list)
assert isinstance(card_generator.GENERATION_MODES, list)
assert len(card_generator.AVAILABLE_MODELS) > 0
assert len(card_generator.GENERATION_MODES) > 0
# --- generate_cards_batch Tests ---
@pytest.fixture
def mock_openai_client_fixture(): # Renamed to avoid conflict with llm_interface tests fixture
"""Provides a MagicMock OpenAI client."""
return MagicMock()
@pytest.fixture
def mock_response_cache_fixture():
"""Provides a MagicMock ResponseCache."""
cache = MagicMock(spec=ResponseCache)
cache.get.return_value = None # Default to cache miss
return cache
@patch("ankigen_core.card_generator.structured_output_completion")
async def test_generate_cards_batch_success(
mock_soc, mock_openai_client_fixture, mock_response_cache_fixture
):
"""Test successful card generation using generate_cards_batch."""
mock_openai_client = mock_openai_client_fixture
mock_response_cache = mock_response_cache_fixture
model = "gpt-test"
topic = "Test Topic"
num_cards = 2
system_prompt = "System prompt"
generate_cloze = False
# Mock the response from structured_output_completion
mock_soc.return_value = {
"cards": [
{
"card_type": "basic",
"front": {"question": "Q1"},
"back": {"answer": "A1", "explanation": "E1", "example": "Ex1"},
"metadata": {"difficulty": "beginner"},
},
{
"card_type": "cloze",
"front": {"question": "{{c1::Q2}}"},
"back": {"answer": "A2_full", "explanation": "E2", "example": "Ex2"},
"metadata": {"difficulty": "intermediate"},
},
]
}
result_cards = await card_generator.generate_cards_batch(
openai_client=mock_openai_client,
cache=mock_response_cache,
model=model,
topic=topic,
num_cards=num_cards,
system_prompt=system_prompt,
generate_cloze=generate_cloze,
)
assert len(result_cards) == 2
assert isinstance(result_cards[0], Card)
assert result_cards[0].card_type == "basic"
assert result_cards[0].front.question == "Q1"
assert result_cards[1].card_type == "cloze"
assert result_cards[1].front.question == "{{c1::Q2}}"
assert result_cards[1].metadata["difficulty"] == "intermediate"
mock_soc.assert_called_once()
call_args = mock_soc.call_args[1] # Get keyword args
assert call_args["openai_client"] == mock_openai_client
assert call_args["cache"] == mock_response_cache
assert call_args["model"] == model
assert call_args["system_prompt"] == system_prompt
assert topic in call_args["user_prompt"]
assert str(num_cards) in call_args["user_prompt"]
# Check cloze instruction is NOT present
assert "generate Cloze deletion cards" not in call_args["user_prompt"]
@patch("ankigen_core.card_generator.structured_output_completion")
async def test_generate_cards_batch_cloze_prompt(
mock_soc, mock_openai_client_fixture, mock_response_cache_fixture
):
"""Test generate_cards_batch includes cloze instructions when requested."""
mock_openai_client = mock_openai_client_fixture
mock_response_cache = mock_response_cache_fixture
mock_soc.return_value = {"cards": []} # Return empty for simplicity
await card_generator.generate_cards_batch(
openai_client=mock_openai_client,
cache=mock_response_cache,
model="gpt-test",
topic="Cloze Topic",
num_cards=1,
system_prompt="System",
generate_cloze=True,
)
mock_soc.assert_called_once()
call_args = mock_soc.call_args[1]
# Check that specific cloze instructions are present
assert "generate Cloze deletion cards" in call_args["user_prompt"]
# Corrected check: Look for instruction text, not the JSON example syntax
assert (
"Format the question field using Anki's cloze syntax"
in call_args["user_prompt"]
)
@patch("ankigen_core.card_generator.structured_output_completion")
async def test_generate_cards_batch_api_error(
mock_soc, mock_openai_client_fixture, mock_response_cache_fixture
):
"""Test generate_cards_batch handles API errors by re-raising."""
mock_openai_client = mock_openai_client_fixture
mock_response_cache = mock_response_cache_fixture
error_message = "API Error"
mock_soc.side_effect = ValueError(error_message) # Simulate error from SOC
with pytest.raises(ValueError, match=error_message):
await card_generator.generate_cards_batch(
openai_client=mock_openai_client,
cache=mock_response_cache,
model="gpt-test",
topic="Error Topic",
num_cards=1,
system_prompt="System",
generate_cloze=False,
)
@patch("ankigen_core.card_generator.structured_output_completion")
async def test_generate_cards_batch_invalid_response(
mock_soc, mock_openai_client_fixture, mock_response_cache_fixture
):
"""Test generate_cards_batch handles invalid JSON or missing keys."""
mock_openai_client = mock_openai_client_fixture
mock_response_cache = mock_response_cache_fixture
mock_soc.return_value = {"wrong_key": []} # Missing 'cards' key
with pytest.raises(ValueError, match="Failed to generate cards"):
await card_generator.generate_cards_batch(
openai_client=mock_openai_client,
cache=mock_response_cache,
model="gpt-test",
topic="Invalid Response Topic",
num_cards=1,
system_prompt="System",
generate_cloze=False,
)
# --- orchestrate_card_generation Tests ---
@pytest.fixture
def mock_client_manager_fixture():
"""Provides a MagicMock OpenAIClientManager."""
manager = MagicMock(spec=OpenAIClientManager)
mock_client = MagicMock() # Mock the client instance it returns
manager.get_client.return_value = mock_client
# Simulate successful initialization by default
manager.initialize_client.return_value = None
return manager, mock_client
def base_orchestrator_args(api_key="valid_key", **kwargs):
"""Base arguments for orchestrate_card_generation."""
base_args = {
"api_key_input": api_key,
"subject": "Subject",
"generation_mode": "subject", # Default mode
"source_text": "Source text",
"url_input": "http://example.com",
"model_name": "gpt-test",
"topic_number": 1, # Corresponds to num_cards in generate_cards_batch
"cards_per_topic": 5, # Corresponds to num_cards in generate_cards_batch
"preference_prompt": "Pref prompt", # Corresponds to system_prompt
"generate_cloze": False,
"use_llm_judge": False,
}
base_args.update(kwargs) # Update with any provided kwargs
return base_args
@patch("ankigen_core.card_generator.structured_output_completion")
@patch("ankigen_core.card_generator.generate_cards_batch")
async def test_orchestrate_subject_mode(
mock_gcb, mock_soc, mock_client_manager_fixture, mock_response_cache_fixture
):
"""Test orchestrate_card_generation in 'subject' mode."""
manager, client = mock_client_manager_fixture
cache = mock_response_cache_fixture
args = base_orchestrator_args(generation_mode="subject")
# Mock the first SOC call (for topics)
mock_soc.return_value = {
"topics": [
{"name": "Topic 1", "difficulty": "beginner", "description": "Desc 1"}
]
}
# Mock return value from generate_cards_batch (called inside loop)
mock_gcb.return_value = [
Card(
front=CardFront(question="Q1"),
back=CardBack(answer="A1", explanation="E1", example="Ex1"),
)
]
# Patch gr.Info/Warning
with patch("gradio.Info"), patch("gradio.Warning"):
df_result, status, count = await card_generator.orchestrate_card_generation(
client_manager=manager, cache=cache, **args
)
manager.initialize_client.assert_called_once_with(args["api_key_input"])
manager.get_client.assert_called_once()
# Check SOC call for topics
mock_soc.assert_called_once()
soc_call_args = mock_soc.call_args[1]
assert soc_call_args["openai_client"] == client
assert "Generate the top" in soc_call_args["user_prompt"]
assert args["subject"] in soc_call_args["user_prompt"]
# Check GCB call for the generated topic
mock_gcb.assert_called_once_with(
openai_client=client,
cache=cache,
model=args["model_name"],
topic="Topic 1", # Topic name from mock_soc response
num_cards=args["cards_per_topic"],
system_prompt=ANY, # System prompt is constructed internally
generate_cloze=args["generate_cloze"],
)
assert count == 1
assert isinstance(df_result, pd.DataFrame)
assert len(df_result) == 1
assert df_result.iloc[0]["Question"] == "Q1"
# Correct assertion to check for the returned HTML string (ignoring precise whitespace)
assert "Generation complete!" in status
assert "Total cards generated: 1" in status
assert "<div" in status # Basic check for HTML structure
# expected_html_status = '''
# <div style="text-align: center">
# <p>✅ Generation complete!</p>
# <p>Total cards generated: 1</p>
# </div>
# '''
# assert status.strip() == expected_html_status.strip()
@patch("ankigen_core.card_generator.judge_cards")
@patch("ankigen_core.card_generator.structured_output_completion")
@patch("ankigen_core.card_generator.generate_cards_batch")
async def test_orchestrate_subject_mode_with_judge(
mock_gcb,
mock_soc,
mock_judge,
mock_client_manager_fixture,
mock_response_cache_fixture,
):
"""Test orchestrate_card_generation calls judge_cards when enabled."""
manager, client = mock_client_manager_fixture
cache = mock_response_cache_fixture
args = base_orchestrator_args(generation_mode="subject", use_llm_judge=True)
mock_soc.return_value = {
"topics": [{"name": "T1", "difficulty": "d", "description": "d"}]
}
sample_card = Card(
front=CardFront(question="Q1"),
back=CardBack(answer="A1", explanation="E1", example="Ex1"),
)
mock_gcb.return_value = [sample_card]
mock_judge.return_value = [sample_card]
with patch("gradio.Info"), patch("gradio.Warning"):
await card_generator.orchestrate_card_generation(
client_manager=manager,
cache=cache,
**args,
)
mock_judge.assert_called_once_with(client, cache, args["model_name"], [sample_card])
@patch("ankigen_core.card_generator.structured_output_completion")
@patch("ankigen_core.card_generator.generate_cards_batch")
async def test_orchestrate_text_mode(
mock_gcb, mock_soc, mock_client_manager_fixture, mock_response_cache_fixture
):
"""Test orchestrate_card_generation in 'text' mode."""
manager, client = mock_client_manager_fixture
cache = mock_response_cache_fixture
args = base_orchestrator_args(generation_mode="text")
mock_soc.return_value = {"cards": []}
await card_generator.orchestrate_card_generation(
client_manager=manager, cache=cache, **args
)
mock_soc.assert_called_once()
call_args = mock_soc.call_args[1]
assert args["source_text"] in call_args["user_prompt"]
@patch("ankigen_core.card_generator.fetch_webpage_text")
@patch("ankigen_core.card_generator.structured_output_completion")
async def test_orchestrate_web_mode(
mock_soc, mock_fetch, mock_client_manager_fixture, mock_response_cache_fixture
):
"""Test orchestrate_card_generation in 'web' mode."""
manager, client = mock_client_manager_fixture
cache = mock_response_cache_fixture
args = base_orchestrator_args(generation_mode="web")
fetched_text = "This is the fetched web page text."
mock_fetch.return_value = fetched_text
mock_soc.return_value = {
"cards": []
} # Mock successful SOC call returning empty cards
# Mock gr.Info and gr.Warning to avoid Gradio UI calls during test
# Removed the incorrect pytest.raises and mock_gr_warning patch from here
with patch("gradio.Info"), patch("gradio.Warning"):
await card_generator.orchestrate_card_generation(
client_manager=manager, cache=cache, **args
)
mock_fetch.assert_called_once_with(args["url_input"])
mock_soc.assert_called_once()
call_args = mock_soc.call_args[1]
assert fetched_text in call_args["user_prompt"]
@patch("ankigen_core.card_generator.fetch_webpage_text")
@patch(
"ankigen_core.card_generator.gr.Error"
) # Mock gr.Error used by orchestrate_card_generation
async def test_orchestrate_web_mode_fetch_error(
mock_gr_error, mock_fetch, mock_client_manager_fixture, mock_response_cache_fixture
):
"""Test 'web' mode handles errors during webpage fetching by calling gr.Error."""
manager, _ = mock_client_manager_fixture
cache = mock_response_cache_fixture
args = base_orchestrator_args(generation_mode="web")
error_msg = "Connection timed out"
mock_fetch.side_effect = ConnectionError(error_msg)
with patch("gradio.Info"), patch("gradio.Warning"):
df, status_msg, count = await card_generator.orchestrate_card_generation(
client_manager=manager, cache=cache, **args
)
mock_gr_error.assert_called_once_with(
f"Failed to get content from URL: {error_msg}"
)
assert isinstance(df, pd.DataFrame)
assert df.empty
assert df.columns.tolist() == get_dataframe_columns()
assert status_msg == "Failed to get content from URL."
assert count == 0
@patch("ankigen_core.card_generator.structured_output_completion") # Patch SOC
@patch("ankigen_core.card_generator.generate_cards_batch")
async def test_orchestrate_generation_batch_error(
mock_gcb, mock_soc, mock_client_manager_fixture, mock_response_cache_fixture
):
"""Test orchestrator handles errors from generate_cards_batch."""
manager, client = mock_client_manager_fixture
cache = mock_response_cache_fixture
args = base_orchestrator_args(generation_mode="subject")
error_msg = "LLM generation failed" # Define error_msg here
# Mock the first SOC call (for topics) - needs to succeed
mock_soc.return_value = {
"topics": [
{"name": "Topic 1", "difficulty": "beginner", "description": "Desc 1"}
]
}
# Configure GCB to raise an error
mock_gcb.side_effect = ValueError(error_msg)
# Patch gr.Info/Warning and assert Warning is called
# Removed pytest.raises
with patch("gradio.Info"), patch("gradio.Warning") as mock_gr_warning:
# Add the call to the function back in
await card_generator.orchestrate_card_generation(
client_manager=manager, cache=cache, **args
)
# Assert that the warning was called due to the GCB error
mock_gr_warning.assert_called_with(
"Failed to generate cards for 'Topic 1'. Skipping."
)
mock_soc.assert_called_once() # Ensure topic generation was attempted
mock_gcb.assert_called_once() # Ensure card generation was attempted
@patch("ankigen_core.card_generator.gr.Error")
async def test_orchestrate_path_mode_raises_not_implemented(
mock_gr_error, mock_client_manager_fixture, mock_response_cache_fixture
):
"""Test 'path' mode calls gr.Error for being unsupported."""
manager, _ = mock_client_manager_fixture
cache = mock_response_cache_fixture
args = base_orchestrator_args(generation_mode="path")
df, status_msg, count = await card_generator.orchestrate_card_generation(
client_manager=manager, cache=cache, **args
)
mock_gr_error.assert_called_once_with("Unsupported generation mode selected: path")
assert isinstance(df, pd.DataFrame)
assert df.empty
assert df.columns.tolist() == get_dataframe_columns()
assert status_msg == "Unsupported mode."
assert count == 0
@patch("ankigen_core.card_generator.gr.Error")
async def test_orchestrate_invalid_mode_raises_value_error(
mock_gr_error, mock_client_manager_fixture, mock_response_cache_fixture
):
"""Test invalid mode calls gr.Error."""
manager, _ = mock_client_manager_fixture
cache = mock_response_cache_fixture
args = base_orchestrator_args(generation_mode="invalid_mode")
df, status_msg, count = await card_generator.orchestrate_card_generation(
client_manager=manager, cache=cache, **args
)
mock_gr_error.assert_called_once_with(
"Unsupported generation mode selected: invalid_mode"
)
assert isinstance(df, pd.DataFrame)
assert df.empty
assert df.columns.tolist() == get_dataframe_columns()
assert status_msg == "Unsupported mode."
assert count == 0
@patch("ankigen_core.card_generator.gr.Error")
async def test_orchestrate_no_api_key_raises_error(
mock_gr_error, mock_client_manager_fixture, mock_response_cache_fixture
):
"""Test orchestrator calls gr.Error if API key is missing."""
manager, _ = mock_client_manager_fixture
cache = mock_response_cache_fixture
args = base_orchestrator_args(api_key="") # Empty API key
df, status_msg, count = await card_generator.orchestrate_card_generation(
client_manager=manager, cache=cache, **args
)
mock_gr_error.assert_called_once_with("OpenAI API key is required")
assert isinstance(df, pd.DataFrame)
assert df.empty
assert df.columns.tolist() == get_dataframe_columns()
assert status_msg == "API key is required."
assert count == 0
@patch("ankigen_core.card_generator.gr.Error")
async def test_orchestrate_client_init_error_raises_error(
mock_gr_error, mock_client_manager_fixture, mock_response_cache_fixture
):
"""Test orchestrator calls gr.Error if client initialization fails."""
manager, _ = mock_client_manager_fixture
cache = mock_response_cache_fixture
args = base_orchestrator_args()
error_msg = "Invalid API Key"
manager.initialize_client.side_effect = ValueError(error_msg)
df, status_msg, count = await card_generator.orchestrate_card_generation(
client_manager=manager, cache=cache, **args
)
mock_gr_error.assert_called_once_with(f"OpenAI Client Error: {error_msg}")
assert isinstance(df, pd.DataFrame)
assert df.empty
assert df.columns.tolist() == get_dataframe_columns()
assert status_msg == f"OpenAI Client Error: {error_msg}"
assert count == 0
# --- Tests for process_anki_card_data ---
@pytest.fixture
def sample_anki_card_data_list() -> list[AnkiCardData]:
"""Provides a list of sample AnkiCardData objects for testing."""
return [
AnkiCardData(
front="Question 1",
back="Answer 1",
tags=["tagA", "tagB"],
source_url="http://example.com/source1",
note_type="Basic",
),
AnkiCardData(
front="Question 2",
back="Answer 2",
tags=[], # Changed from None to empty list
source_url=None, # This is Optional[str], so None is fine
note_type="Cloze",
),
AnkiCardData(
front="Question 3",
back="Answer 3",
tags=[], # Empty tags list is fine
source_url="http://example.com/source3",
note_type="Basic", # Changed from None to "Basic"
),
]
def test_process_anki_card_data_basic_conversion(sample_anki_card_data_list):
"""Test basic conversion of AnkiCardData to dicts."""
input_cards = sample_anki_card_data_list
processed = card_generator.process_anki_card_data(input_cards)
assert len(processed) == 3
assert isinstance(processed[0], dict)
assert processed[0]["front"] == "Question 1"
assert (
processed[0]["back"]
== "Answer 1\\n\\n<hr><small>Source: <a href='http://example.com/source1'>http://example.com/source1</a></small>"
)
assert processed[0]["tags"] == "tagA tagB"
assert processed[0]["note_type"] == "Basic"
assert processed[1]["front"] == "Question 2"
assert processed[1]["back"] == "Answer 2" # No source_url, so no extra HTML
assert processed[1]["tags"] == "" # No tags, so empty string
assert processed[1]["note_type"] == "Cloze"
assert processed[2]["front"] == "Question 3"
assert "<hr><small>Source" in processed[2]["back"]
assert "http://example.com/source3" in processed[2]["back"]
assert processed[2]["tags"] == "" # Empty tags list, so empty string
assert processed[2]["note_type"] == "Basic" # None should default to Basic
def test_process_anki_card_data_empty_list():
"""Test processing an empty list of cards."""
processed = card_generator.process_anki_card_data([])
assert processed == []
def test_process_anki_card_data_source_url_formatting(sample_anki_card_data_list):
"""Test that the source_url is correctly formatted and appended to the back."""
# Test with the first card that has a source_url
card_with_source = [sample_anki_card_data_list[0]]
processed = card_generator.process_anki_card_data(card_with_source)
expected_back_html = "\\n\\n<hr><small>Source: <a href='http://example.com/source1'>http://example.com/source1</a></small>"
assert processed[0]["back"].endswith(expected_back_html)
# Test with the second card that has no source_url
card_without_source = [sample_anki_card_data_list[1]]
processed_no_source = card_generator.process_anki_card_data(card_without_source)
assert "<hr><small>Source:" not in processed_no_source[0]["back"]
def test_process_anki_card_data_tags_formatting(sample_anki_card_data_list):
"""Test tags are correctly joined into a space-separated string."""
processed = card_generator.process_anki_card_data(sample_anki_card_data_list)
assert processed[0]["tags"] == "tagA tagB"
assert processed[1]["tags"] == "" # None tags
assert processed[2]["tags"] == "" # Empty list tags
def test_process_anki_card_data_note_type_handling(sample_anki_card_data_list):
"""Test note_type handling, including default."""
processed = card_generator.process_anki_card_data(sample_anki_card_data_list)
assert processed[0]["note_type"] == "Basic"
assert processed[1]["note_type"] == "Cloze"
assert processed[2]["note_type"] == "Basic" # Default for None
# Test with a card where note_type is explicitly not set during AnkiCardData creation
# (though Pydantic default in model definition would handle this, good to be robust)
card_without_note_type_field = AnkiCardData(
front="Q", back="A"
) # note_type will use Pydantic default
processed_single = card_generator.process_anki_card_data(
[card_without_note_type_field]
)
# The function itself now has: card_item.note_type if hasattr(card_item, 'note_type') else "Basic"
# If AnkiCardData Pydantic model has a default for note_type (e.g. "Basic"), hasattr might be true.
# Let's check the AnkiCardData model definition again.
# AnkiCardData model has: note_type: Optional[str] = "Basic"
# So, card_item.note_type will always exist and default to "Basic".
# The hasattr check in process_anki_card_data might be redundant then, but harmless.
assert processed_single[0]["note_type"] == "Basic"
# --- Tests for deduplicate_cards ---
def test_deduplicate_cards_removes_duplicates():
"""Test that duplicate cards (based on 'front' content) are removed."""
cards_with_duplicates = [
{"front": "Q1", "back": "A1"},
{"front": "Q2", "back": "A2"},
{"front": "Q1", "back": "A1_variant"}, # Duplicate front
{"front": "Q3", "back": "A3"},
{"front": "Q2", "back": "A2_variant"}, # Duplicate front
]
expected_cards = [
{"front": "Q1", "back": "A1"},
{"front": "Q2", "back": "A2"},
{"front": "Q3", "back": "A3"},
]
assert card_generator.deduplicate_cards(cards_with_duplicates) == expected_cards
def test_deduplicate_cards_preserves_order():
"""Test that the order of first-seen unique cards is preserved."""
ordered_cards = [
{"front": "Q_alpha", "back": "A_alpha"},
{"front": "Q_beta", "back": "A_beta"},
{"front": "Q_gamma", "back": "A_gamma"},
{"front": "Q_alpha", "back": "A_alpha_redux"}, # Duplicate
]
expected_ordered_cards = [
{"front": "Q_alpha", "back": "A_alpha"},
{"front": "Q_beta", "back": "A_beta"},
{"front": "Q_gamma", "back": "A_gamma"},
]
assert card_generator.deduplicate_cards(ordered_cards) == expected_ordered_cards
def test_deduplicate_cards_empty_list():
"""Test deduplicating an empty list of cards."""
assert card_generator.deduplicate_cards([]) == []
def test_deduplicate_cards_all_unique():
"""Test deduplicating a list where all cards are unique."""
all_unique_cards = [
{"front": "Unique1", "back": "Ans1"},
{"front": "Unique2", "back": "Ans2"},
{"front": "Unique3", "back": "Ans3"},
]
assert card_generator.deduplicate_cards(all_unique_cards) == all_unique_cards
def test_deduplicate_cards_missing_front_key():
"""Test that cards missing the 'front' key are skipped and logged."""
cards_with_missing_front = [
{"front": "Q1", "back": "A1"},
{"foo": "bar", "back": "A2"}, # Missing 'front' key
{"front": "Q3", "back": "A3"},
]
expected_cards = [
{"front": "Q1", "back": "A1"},
{"front": "Q3", "back": "A3"},
]
# Patch the logger within card_generator to check for the warning
with patch.object(card_generator.logger, "warning") as mock_log_warning:
result = card_generator.deduplicate_cards(cards_with_missing_front)
assert result == expected_cards
mock_log_warning.assert_called_once_with(
"Card skipped during deduplication due to missing 'front' key: {'foo': 'bar', 'back': 'A2'}"
)
def test_deduplicate_cards_front_is_none():
"""Test that cards where 'front' value is None are skipped and logged."""
cards_with_none_front = [
{"front": "Q1", "back": "A1"},
{"front": None, "back": "A2"}, # Front is None
{"front": "Q3", "back": "A3"},
]
expected_cards = [
{"front": "Q1", "back": "A1"},
{"front": "Q3", "back": "A3"},
]
with patch.object(card_generator.logger, "warning") as mock_log_warning:
result = card_generator.deduplicate_cards(cards_with_none_front)
assert result == expected_cards
mock_log_warning.assert_called_once_with(
"Card skipped during deduplication due to missing 'front' key: {'front': None, 'back': 'A2'}"
) # The log message says missing 'front' key for None value as well, due to card.get('front') then checking if front_text is None.
# --- Tests for generate_cards_from_crawled_content ---
@patch("ankigen_core.card_generator.deduplicate_cards")
@patch("ankigen_core.card_generator.process_anki_card_data")
def test_generate_cards_from_crawled_content_orchestration(
mock_process_anki_card_data,
mock_deduplicate_cards,
sample_anki_card_data_list, # Use the existing fixture
):
"""Test that generate_cards_from_crawled_content correctly orchestrates calls."""
# Setup mock return values
mock_processed_list = [{"front": "Processed Q1", "back": "Processed A1"}]
mock_process_anki_card_data.return_value = mock_processed_list
mock_unique_list = [{"front": "Unique Q1", "back": "Unique A1"}]
mock_deduplicate_cards.return_value = mock_unique_list
input_anki_cards = sample_anki_card_data_list # Sample AnkiCardData objects
# Call the function under test
result = card_generator.generate_cards_from_crawled_content(input_anki_cards)
# Assertions
mock_process_anki_card_data.assert_called_once_with(input_anki_cards)
mock_deduplicate_cards.assert_called_once_with(mock_processed_list)
assert result == mock_unique_list
def test_generate_cards_from_crawled_content_empty_input():
"""Test with an empty list of AnkiCardData objects."""
with (
patch(
"ankigen_core.card_generator.process_anki_card_data", return_value=[]
) as mock_process,
patch(
"ankigen_core.card_generator.deduplicate_cards", return_value=[]
) as mock_dedup,
):
result = card_generator.generate_cards_from_crawled_content([])
mock_process.assert_called_once_with([])
mock_dedup.assert_called_once_with([])
assert result == []
# Example of an integration-style test (optional, as unit tests for sub-components are thorough)
# This would not mock the internal calls.
def test_generate_cards_from_crawled_content_integration(sample_anki_card_data_list):
"""
A more integration-style test to ensure the flow works with real sub-functions.
This relies on the correctness of process_anki_card_data and deduplicate_cards.
"""
# Construct a list that will actually have duplicates after processing
card1 = AnkiCardData(front="Q1", back="A1", tags=["test"], note_type="Basic")
card2_dup = AnkiCardData(
front="Q1", back="A1_variant", tags=["test"], note_type="Basic"
) # Duplicate front
card3 = AnkiCardData(front="Q2", back="A2", tags=["test"], note_type="Basic")
input_list = [card1, card2_dup, card3]
result = card_generator.generate_cards_from_crawled_content(input_list)
# Expected result after processing and deduplication:
# Card1 (original) should be present. Card2_dup should be removed. Card3 should be present.
# Check lengths
assert len(result) == 2
# Check content (simplified check based on front)
result_fronts = [item["front"] for item in result]
assert "Q1" in result_fronts
assert "Q2" in result_fronts
# Check that the first version of Q1 was kept (A1, not A1_variant)
# This depends on the details of process_anki_card_data output
q1_card_in_result = next(item for item in result if item["front"] == "Q1")
assert (
"A1" in q1_card_in_result["back"]
) # Basic check, might need refinement based on exact source_url append
assert "A1_variant" not in q1_card_in_result["back"]
# More detailed checks could verify the full structure if needed
|