"""Unit tests for TextContent value object.""" import pytest from src.domain.models.text_content import TextContent class TestTextContent: """Test cases for TextContent value object.""" def test_valid_text_content_creation(self): """Test creating valid TextContent instance.""" text = TextContent( text="Hello, world!", language="en", encoding="utf-8" ) assert text.text == "Hello, world!" assert text.language == "en" assert text.encoding == "utf-8" assert text.word_count == 2 assert text.character_count == 13 assert text.is_empty is False def test_text_content_with_default_encoding(self): """Test creating TextContent with default encoding.""" text = TextContent( text="Hello, world!", language="en" ) assert text.encoding == "utf-8" def test_non_string_text_raises_error(self): """Test that non-string text raises TypeError.""" with pytest.raises(TypeError, match="Text must be a string"): TextContent( text=123, # type: ignore language="en" ) def test_empty_text_raises_error(self): """Test that empty text raises ValueError.""" with pytest.raises(ValueError, match="Text content cannot be empty or whitespace only"): TextContent( text="", language="en" ) def test_whitespace_only_text_raises_error(self): """Test that whitespace-only text raises ValueError.""" with pytest.raises(ValueError, match="Text content cannot be empty or whitespace only"): TextContent( text=" \n\t ", language="en" ) def test_text_too_long_raises_error(self): """Test that text over 50,000 characters raises ValueError.""" long_text = "a" * 50001 with pytest.raises(ValueError, match="Text content too long"): TextContent( text=long_text, language="en" ) def test_text_at_max_length(self): """Test text at maximum allowed length.""" max_text = "a" * 50000 text = TextContent( text=max_text, language="en" ) assert len(text.text) == 50000 def test_non_string_language_raises_error(self): """Test that non-string language raises TypeError.""" with pytest.raises(TypeError, match="Language must be a string"): TextContent( text="Hello", language=123 # type: ignore ) def test_empty_language_raises_error(self): """Test that empty language raises ValueError.""" with pytest.raises(ValueError, match="Language cannot be empty"): TextContent( text="Hello", language="" ) def test_whitespace_language_raises_error(self): """Test that whitespace-only language raises ValueError.""" with pytest.raises(ValueError, match="Language cannot be empty"): TextContent( text="Hello", language=" " ) def test_invalid_language_code_format_raises_error(self): """Test that invalid language code format raises ValueError.""" invalid_codes = ["e", "ENG", "en-us", "en-USA", "123", "en_US"] for code in invalid_codes: with pytest.raises(ValueError, match="Invalid language code format"): TextContent( text="Hello", language=code ) def test_valid_language_codes(self): """Test valid language code formats.""" valid_codes = ["en", "fr", "de", "es", "zh", "ja", "en-US", "fr-FR", "zh-CN"] for code in valid_codes: text = TextContent( text="Hello", language=code ) assert text.language == code def test_non_string_encoding_raises_error(self): """Test that non-string encoding raises TypeError.""" with pytest.raises(TypeError, match="Encoding must be a string"): TextContent( text="Hello", language="en", encoding=123 # type: ignore ) def test_unsupported_encoding_raises_error(self): """Test that unsupported encoding raises ValueError.""" with pytest.raises(ValueError, match="Unsupported encoding: xyz"): TextContent( text="Hello", language="en", encoding="xyz" ) def test_supported_encodings(self): """Test all supported encodings.""" supported_encodings = ['utf-8', 'utf-16', 'ascii', 'latin-1'] for encoding in supported_encodings: text = TextContent( text="Hello", language="en", encoding=encoding ) assert text.encoding == encoding def test_text_encoding_compatibility(self): """Test that text is compatible with specified encoding.""" # ASCII text with UTF-8 encoding should work text = TextContent( text="Hello", language="en", encoding="ascii" ) assert text.encoding == "ascii" # Unicode text with ASCII encoding should fail with pytest.raises(ValueError, match="Text cannot be encoded with ascii encoding"): TextContent( text="Héllo", # Contains non-ASCII character language="en", encoding="ascii" ) def test_word_count_property(self): """Test word_count property calculation.""" test_cases = [ ("Hello world", 2), ("Hello", 1), ("Hello world test", 3), ("Hello, world! Test.", 3), # Multiple spaces and punctuation ("", 1), # Empty string split returns [''] ] for text_str, expected_count in test_cases: if text_str: # Skip empty string test as it would fail validation text = TextContent(text=text_str, language="en") assert text.word_count == expected_count def test_character_count_property(self): """Test character_count property.""" text_str = "Hello, world!" text = TextContent(text=text_str, language="en") assert text.character_count == len(text_str) def test_is_empty_property(self): """Test is_empty property.""" # Non-empty text text = TextContent(text="Hello", language="en") assert text.is_empty is False # Text with only meaningful content text2 = TextContent(text=" Hello ", language="en") assert text2.is_empty is False def test_truncate_method(self): """Test truncate method.""" text = TextContent(text="Hello, world! This is a test.", language="en") # Truncate to shorter length truncated = text.truncate(10) assert len(truncated.text) <= 10 assert truncated.language == text.language assert truncated.encoding == text.encoding assert isinstance(truncated, TextContent) # Truncate to longer length (should return same) not_truncated = text.truncate(100) assert not_truncated.text == text.text def test_truncate_with_invalid_length(self): """Test truncate with invalid max_length.""" text = TextContent(text="Hello", language="en") with pytest.raises(ValueError, match="Max length must be positive"): text.truncate(0) with pytest.raises(ValueError, match="Max length must be positive"): text.truncate(-1) def test_text_content_is_immutable(self): """Test that TextContent is immutable (frozen dataclass).""" text = TextContent(text="Hello", language="en") with pytest.raises(AttributeError): text.text = "Goodbye" # type: ignore def test_truncate_preserves_word_boundaries(self): """Test that truncate method preserves word boundaries by rstripping.""" text = TextContent(text="Hello world test", language="en") # Truncate in middle of word truncated = text.truncate(12) # "Hello world " -> "Hello world" after rstrip assert not truncated.text.endswith(" ") assert truncated.text == "Hello world"