Spaces:
Build error
Build error
File size: 8,783 Bytes
5009cb8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 |
"""Unit tests for TextContent value object."""
import pytest
from src.domain.models.text_content import TextContent
class TestTextContent:
"""Test cases for TextContent value object."""
def test_valid_text_content_creation(self):
"""Test creating valid TextContent instance."""
text = TextContent(
text="Hello, world!",
language="en",
encoding="utf-8"
)
assert text.text == "Hello, world!"
assert text.language == "en"
assert text.encoding == "utf-8"
assert text.word_count == 2
assert text.character_count == 13
assert text.is_empty is False
def test_text_content_with_default_encoding(self):
"""Test creating TextContent with default encoding."""
text = TextContent(
text="Hello, world!",
language="en"
)
assert text.encoding == "utf-8"
def test_non_string_text_raises_error(self):
"""Test that non-string text raises TypeError."""
with pytest.raises(TypeError, match="Text must be a string"):
TextContent(
text=123, # type: ignore
language="en"
)
def test_empty_text_raises_error(self):
"""Test that empty text raises ValueError."""
with pytest.raises(ValueError, match="Text content cannot be empty or whitespace only"):
TextContent(
text="",
language="en"
)
def test_whitespace_only_text_raises_error(self):
"""Test that whitespace-only text raises ValueError."""
with pytest.raises(ValueError, match="Text content cannot be empty or whitespace only"):
TextContent(
text=" \n\t ",
language="en"
)
def test_text_too_long_raises_error(self):
"""Test that text over 50,000 characters raises ValueError."""
long_text = "a" * 50001
with pytest.raises(ValueError, match="Text content too long"):
TextContent(
text=long_text,
language="en"
)
def test_text_at_max_length(self):
"""Test text at maximum allowed length."""
max_text = "a" * 50000
text = TextContent(
text=max_text,
language="en"
)
assert len(text.text) == 50000
def test_non_string_language_raises_error(self):
"""Test that non-string language raises TypeError."""
with pytest.raises(TypeError, match="Language must be a string"):
TextContent(
text="Hello",
language=123 # type: ignore
)
def test_empty_language_raises_error(self):
"""Test that empty language raises ValueError."""
with pytest.raises(ValueError, match="Language cannot be empty"):
TextContent(
text="Hello",
language=""
)
def test_whitespace_language_raises_error(self):
"""Test that whitespace-only language raises ValueError."""
with pytest.raises(ValueError, match="Language cannot be empty"):
TextContent(
text="Hello",
language=" "
)
def test_invalid_language_code_format_raises_error(self):
"""Test that invalid language code format raises ValueError."""
invalid_codes = ["e", "ENG", "en-us", "en-USA", "123", "en_US"]
for code in invalid_codes:
with pytest.raises(ValueError, match="Invalid language code format"):
TextContent(
text="Hello",
language=code
)
def test_valid_language_codes(self):
"""Test valid language code formats."""
valid_codes = ["en", "fr", "de", "es", "zh", "ja", "en-US", "fr-FR", "zh-CN"]
for code in valid_codes:
text = TextContent(
text="Hello",
language=code
)
assert text.language == code
def test_non_string_encoding_raises_error(self):
"""Test that non-string encoding raises TypeError."""
with pytest.raises(TypeError, match="Encoding must be a string"):
TextContent(
text="Hello",
language="en",
encoding=123 # type: ignore
)
def test_unsupported_encoding_raises_error(self):
"""Test that unsupported encoding raises ValueError."""
with pytest.raises(ValueError, match="Unsupported encoding: xyz"):
TextContent(
text="Hello",
language="en",
encoding="xyz"
)
def test_supported_encodings(self):
"""Test all supported encodings."""
supported_encodings = ['utf-8', 'utf-16', 'ascii', 'latin-1']
for encoding in supported_encodings:
text = TextContent(
text="Hello",
language="en",
encoding=encoding
)
assert text.encoding == encoding
def test_text_encoding_compatibility(self):
"""Test that text is compatible with specified encoding."""
# ASCII text with UTF-8 encoding should work
text = TextContent(
text="Hello",
language="en",
encoding="ascii"
)
assert text.encoding == "ascii"
# Unicode text with ASCII encoding should fail
with pytest.raises(ValueError, match="Text cannot be encoded with ascii encoding"):
TextContent(
text="Héllo", # Contains non-ASCII character
language="en",
encoding="ascii"
)
def test_word_count_property(self):
"""Test word_count property calculation."""
test_cases = [
("Hello world", 2),
("Hello", 1),
("Hello world test", 3),
("Hello, world! Test.", 3), # Multiple spaces and punctuation
("", 1), # Empty string split returns ['']
]
for text_str, expected_count in test_cases:
if text_str: # Skip empty string test as it would fail validation
text = TextContent(text=text_str, language="en")
assert text.word_count == expected_count
def test_character_count_property(self):
"""Test character_count property."""
text_str = "Hello, world!"
text = TextContent(text=text_str, language="en")
assert text.character_count == len(text_str)
def test_is_empty_property(self):
"""Test is_empty property."""
# Non-empty text
text = TextContent(text="Hello", language="en")
assert text.is_empty is False
# Text with only meaningful content
text2 = TextContent(text=" Hello ", language="en")
assert text2.is_empty is False
def test_truncate_method(self):
"""Test truncate method."""
text = TextContent(text="Hello, world! This is a test.", language="en")
# Truncate to shorter length
truncated = text.truncate(10)
assert len(truncated.text) <= 10
assert truncated.language == text.language
assert truncated.encoding == text.encoding
assert isinstance(truncated, TextContent)
# Truncate to longer length (should return same)
not_truncated = text.truncate(100)
assert not_truncated.text == text.text
def test_truncate_with_invalid_length(self):
"""Test truncate with invalid max_length."""
text = TextContent(text="Hello", language="en")
with pytest.raises(ValueError, match="Max length must be positive"):
text.truncate(0)
with pytest.raises(ValueError, match="Max length must be positive"):
text.truncate(-1)
def test_text_content_is_immutable(self):
"""Test that TextContent is immutable (frozen dataclass)."""
text = TextContent(text="Hello", language="en")
with pytest.raises(AttributeError):
text.text = "Goodbye" # type: ignore
def test_truncate_preserves_word_boundaries(self):
"""Test that truncate method preserves word boundaries by rstripping."""
text = TextContent(text="Hello world test", language="en")
# Truncate in middle of word
truncated = text.truncate(12) # "Hello world " -> "Hello world" after rstrip
assert not truncated.text.endswith(" ")
assert truncated.text == "Hello world" |