teachingAssistant / tests /unit /domain /models /test_text_content.py
Michael Hu
refactor based on DDD
5009cb8
"""Unit tests for TextContent value object."""
import pytest
from src.domain.models.text_content import TextContent
class TestTextContent:
"""Test cases for TextContent value object."""
def test_valid_text_content_creation(self):
"""Test creating valid TextContent instance."""
text = TextContent(
text="Hello, world!",
language="en",
encoding="utf-8"
)
assert text.text == "Hello, world!"
assert text.language == "en"
assert text.encoding == "utf-8"
assert text.word_count == 2
assert text.character_count == 13
assert text.is_empty is False
def test_text_content_with_default_encoding(self):
"""Test creating TextContent with default encoding."""
text = TextContent(
text="Hello, world!",
language="en"
)
assert text.encoding == "utf-8"
def test_non_string_text_raises_error(self):
"""Test that non-string text raises TypeError."""
with pytest.raises(TypeError, match="Text must be a string"):
TextContent(
text=123, # type: ignore
language="en"
)
def test_empty_text_raises_error(self):
"""Test that empty text raises ValueError."""
with pytest.raises(ValueError, match="Text content cannot be empty or whitespace only"):
TextContent(
text="",
language="en"
)
def test_whitespace_only_text_raises_error(self):
"""Test that whitespace-only text raises ValueError."""
with pytest.raises(ValueError, match="Text content cannot be empty or whitespace only"):
TextContent(
text=" \n\t ",
language="en"
)
def test_text_too_long_raises_error(self):
"""Test that text over 50,000 characters raises ValueError."""
long_text = "a" * 50001
with pytest.raises(ValueError, match="Text content too long"):
TextContent(
text=long_text,
language="en"
)
def test_text_at_max_length(self):
"""Test text at maximum allowed length."""
max_text = "a" * 50000
text = TextContent(
text=max_text,
language="en"
)
assert len(text.text) == 50000
def test_non_string_language_raises_error(self):
"""Test that non-string language raises TypeError."""
with pytest.raises(TypeError, match="Language must be a string"):
TextContent(
text="Hello",
language=123 # type: ignore
)
def test_empty_language_raises_error(self):
"""Test that empty language raises ValueError."""
with pytest.raises(ValueError, match="Language cannot be empty"):
TextContent(
text="Hello",
language=""
)
def test_whitespace_language_raises_error(self):
"""Test that whitespace-only language raises ValueError."""
with pytest.raises(ValueError, match="Language cannot be empty"):
TextContent(
text="Hello",
language=" "
)
def test_invalid_language_code_format_raises_error(self):
"""Test that invalid language code format raises ValueError."""
invalid_codes = ["e", "ENG", "en-us", "en-USA", "123", "en_US"]
for code in invalid_codes:
with pytest.raises(ValueError, match="Invalid language code format"):
TextContent(
text="Hello",
language=code
)
def test_valid_language_codes(self):
"""Test valid language code formats."""
valid_codes = ["en", "fr", "de", "es", "zh", "ja", "en-US", "fr-FR", "zh-CN"]
for code in valid_codes:
text = TextContent(
text="Hello",
language=code
)
assert text.language == code
def test_non_string_encoding_raises_error(self):
"""Test that non-string encoding raises TypeError."""
with pytest.raises(TypeError, match="Encoding must be a string"):
TextContent(
text="Hello",
language="en",
encoding=123 # type: ignore
)
def test_unsupported_encoding_raises_error(self):
"""Test that unsupported encoding raises ValueError."""
with pytest.raises(ValueError, match="Unsupported encoding: xyz"):
TextContent(
text="Hello",
language="en",
encoding="xyz"
)
def test_supported_encodings(self):
"""Test all supported encodings."""
supported_encodings = ['utf-8', 'utf-16', 'ascii', 'latin-1']
for encoding in supported_encodings:
text = TextContent(
text="Hello",
language="en",
encoding=encoding
)
assert text.encoding == encoding
def test_text_encoding_compatibility(self):
"""Test that text is compatible with specified encoding."""
# ASCII text with UTF-8 encoding should work
text = TextContent(
text="Hello",
language="en",
encoding="ascii"
)
assert text.encoding == "ascii"
# Unicode text with ASCII encoding should fail
with pytest.raises(ValueError, match="Text cannot be encoded with ascii encoding"):
TextContent(
text="Héllo", # Contains non-ASCII character
language="en",
encoding="ascii"
)
def test_word_count_property(self):
"""Test word_count property calculation."""
test_cases = [
("Hello world", 2),
("Hello", 1),
("Hello world test", 3),
("Hello, world! Test.", 3), # Multiple spaces and punctuation
("", 1), # Empty string split returns ['']
]
for text_str, expected_count in test_cases:
if text_str: # Skip empty string test as it would fail validation
text = TextContent(text=text_str, language="en")
assert text.word_count == expected_count
def test_character_count_property(self):
"""Test character_count property."""
text_str = "Hello, world!"
text = TextContent(text=text_str, language="en")
assert text.character_count == len(text_str)
def test_is_empty_property(self):
"""Test is_empty property."""
# Non-empty text
text = TextContent(text="Hello", language="en")
assert text.is_empty is False
# Text with only meaningful content
text2 = TextContent(text=" Hello ", language="en")
assert text2.is_empty is False
def test_truncate_method(self):
"""Test truncate method."""
text = TextContent(text="Hello, world! This is a test.", language="en")
# Truncate to shorter length
truncated = text.truncate(10)
assert len(truncated.text) <= 10
assert truncated.language == text.language
assert truncated.encoding == text.encoding
assert isinstance(truncated, TextContent)
# Truncate to longer length (should return same)
not_truncated = text.truncate(100)
assert not_truncated.text == text.text
def test_truncate_with_invalid_length(self):
"""Test truncate with invalid max_length."""
text = TextContent(text="Hello", language="en")
with pytest.raises(ValueError, match="Max length must be positive"):
text.truncate(0)
with pytest.raises(ValueError, match="Max length must be positive"):
text.truncate(-1)
def test_text_content_is_immutable(self):
"""Test that TextContent is immutable (frozen dataclass)."""
text = TextContent(text="Hello", language="en")
with pytest.raises(AttributeError):
text.text = "Goodbye" # type: ignore
def test_truncate_preserves_word_boundaries(self):
"""Test that truncate method preserves word boundaries by rstripping."""
text = TextContent(text="Hello world test", language="en")
# Truncate in middle of word
truncated = text.truncate(12) # "Hello world " -> "Hello world" after rstrip
assert not truncated.text.endswith(" ")
assert truncated.text == "Hello world"