Spaces:
Build error
Build error
"""Unit tests for TextContent value object.""" | |
import pytest | |
from src.domain.models.text_content import TextContent | |
class TestTextContent: | |
"""Test cases for TextContent value object.""" | |
def test_valid_text_content_creation(self): | |
"""Test creating valid TextContent instance.""" | |
text = TextContent( | |
text="Hello, world!", | |
language="en", | |
encoding="utf-8" | |
) | |
assert text.text == "Hello, world!" | |
assert text.language == "en" | |
assert text.encoding == "utf-8" | |
assert text.word_count == 2 | |
assert text.character_count == 13 | |
assert text.is_empty is False | |
def test_text_content_with_default_encoding(self): | |
"""Test creating TextContent with default encoding.""" | |
text = TextContent( | |
text="Hello, world!", | |
language="en" | |
) | |
assert text.encoding == "utf-8" | |
def test_non_string_text_raises_error(self): | |
"""Test that non-string text raises TypeError.""" | |
with pytest.raises(TypeError, match="Text must be a string"): | |
TextContent( | |
text=123, # type: ignore | |
language="en" | |
) | |
def test_empty_text_raises_error(self): | |
"""Test that empty text raises ValueError.""" | |
with pytest.raises(ValueError, match="Text content cannot be empty or whitespace only"): | |
TextContent( | |
text="", | |
language="en" | |
) | |
def test_whitespace_only_text_raises_error(self): | |
"""Test that whitespace-only text raises ValueError.""" | |
with pytest.raises(ValueError, match="Text content cannot be empty or whitespace only"): | |
TextContent( | |
text=" \n\t ", | |
language="en" | |
) | |
def test_text_too_long_raises_error(self): | |
"""Test that text over 50,000 characters raises ValueError.""" | |
long_text = "a" * 50001 | |
with pytest.raises(ValueError, match="Text content too long"): | |
TextContent( | |
text=long_text, | |
language="en" | |
) | |
def test_text_at_max_length(self): | |
"""Test text at maximum allowed length.""" | |
max_text = "a" * 50000 | |
text = TextContent( | |
text=max_text, | |
language="en" | |
) | |
assert len(text.text) == 50000 | |
def test_non_string_language_raises_error(self): | |
"""Test that non-string language raises TypeError.""" | |
with pytest.raises(TypeError, match="Language must be a string"): | |
TextContent( | |
text="Hello", | |
language=123 # type: ignore | |
) | |
def test_empty_language_raises_error(self): | |
"""Test that empty language raises ValueError.""" | |
with pytest.raises(ValueError, match="Language cannot be empty"): | |
TextContent( | |
text="Hello", | |
language="" | |
) | |
def test_whitespace_language_raises_error(self): | |
"""Test that whitespace-only language raises ValueError.""" | |
with pytest.raises(ValueError, match="Language cannot be empty"): | |
TextContent( | |
text="Hello", | |
language=" " | |
) | |
def test_invalid_language_code_format_raises_error(self): | |
"""Test that invalid language code format raises ValueError.""" | |
invalid_codes = ["e", "ENG", "en-us", "en-USA", "123", "en_US"] | |
for code in invalid_codes: | |
with pytest.raises(ValueError, match="Invalid language code format"): | |
TextContent( | |
text="Hello", | |
language=code | |
) | |
def test_valid_language_codes(self): | |
"""Test valid language code formats.""" | |
valid_codes = ["en", "fr", "de", "es", "zh", "ja", "en-US", "fr-FR", "zh-CN"] | |
for code in valid_codes: | |
text = TextContent( | |
text="Hello", | |
language=code | |
) | |
assert text.language == code | |
def test_non_string_encoding_raises_error(self): | |
"""Test that non-string encoding raises TypeError.""" | |
with pytest.raises(TypeError, match="Encoding must be a string"): | |
TextContent( | |
text="Hello", | |
language="en", | |
encoding=123 # type: ignore | |
) | |
def test_unsupported_encoding_raises_error(self): | |
"""Test that unsupported encoding raises ValueError.""" | |
with pytest.raises(ValueError, match="Unsupported encoding: xyz"): | |
TextContent( | |
text="Hello", | |
language="en", | |
encoding="xyz" | |
) | |
def test_supported_encodings(self): | |
"""Test all supported encodings.""" | |
supported_encodings = ['utf-8', 'utf-16', 'ascii', 'latin-1'] | |
for encoding in supported_encodings: | |
text = TextContent( | |
text="Hello", | |
language="en", | |
encoding=encoding | |
) | |
assert text.encoding == encoding | |
def test_text_encoding_compatibility(self): | |
"""Test that text is compatible with specified encoding.""" | |
# ASCII text with UTF-8 encoding should work | |
text = TextContent( | |
text="Hello", | |
language="en", | |
encoding="ascii" | |
) | |
assert text.encoding == "ascii" | |
# Unicode text with ASCII encoding should fail | |
with pytest.raises(ValueError, match="Text cannot be encoded with ascii encoding"): | |
TextContent( | |
text="Héllo", # Contains non-ASCII character | |
language="en", | |
encoding="ascii" | |
) | |
def test_word_count_property(self): | |
"""Test word_count property calculation.""" | |
test_cases = [ | |
("Hello world", 2), | |
("Hello", 1), | |
("Hello world test", 3), | |
("Hello, world! Test.", 3), # Multiple spaces and punctuation | |
("", 1), # Empty string split returns [''] | |
] | |
for text_str, expected_count in test_cases: | |
if text_str: # Skip empty string test as it would fail validation | |
text = TextContent(text=text_str, language="en") | |
assert text.word_count == expected_count | |
def test_character_count_property(self): | |
"""Test character_count property.""" | |
text_str = "Hello, world!" | |
text = TextContent(text=text_str, language="en") | |
assert text.character_count == len(text_str) | |
def test_is_empty_property(self): | |
"""Test is_empty property.""" | |
# Non-empty text | |
text = TextContent(text="Hello", language="en") | |
assert text.is_empty is False | |
# Text with only meaningful content | |
text2 = TextContent(text=" Hello ", language="en") | |
assert text2.is_empty is False | |
def test_truncate_method(self): | |
"""Test truncate method.""" | |
text = TextContent(text="Hello, world! This is a test.", language="en") | |
# Truncate to shorter length | |
truncated = text.truncate(10) | |
assert len(truncated.text) <= 10 | |
assert truncated.language == text.language | |
assert truncated.encoding == text.encoding | |
assert isinstance(truncated, TextContent) | |
# Truncate to longer length (should return same) | |
not_truncated = text.truncate(100) | |
assert not_truncated.text == text.text | |
def test_truncate_with_invalid_length(self): | |
"""Test truncate with invalid max_length.""" | |
text = TextContent(text="Hello", language="en") | |
with pytest.raises(ValueError, match="Max length must be positive"): | |
text.truncate(0) | |
with pytest.raises(ValueError, match="Max length must be positive"): | |
text.truncate(-1) | |
def test_text_content_is_immutable(self): | |
"""Test that TextContent is immutable (frozen dataclass).""" | |
text = TextContent(text="Hello", language="en") | |
with pytest.raises(AttributeError): | |
text.text = "Goodbye" # type: ignore | |
def test_truncate_preserves_word_boundaries(self): | |
"""Test that truncate method preserves word boundaries by rstripping.""" | |
text = TextContent(text="Hello world test", language="en") | |
# Truncate in middle of word | |
truncated = text.truncate(12) # "Hello world " -> "Hello world" after rstrip | |
assert not truncated.text.endswith(" ") | |
assert truncated.text == "Hello world" |