Spaces:

DroolingPanda
/

teachingAssistant

Build error

File size: 8,783 Bytes

5009cb8

"""Unit tests for TextContent value object."""

import pytest
from src.domain.models.text_content import TextContent


class TestTextContent:
    """Test cases for TextContent value object."""
    
    def test_valid_text_content_creation(self):
        """Test creating valid TextContent instance."""
        text = TextContent(
            text="Hello, world!",
            language="en",
            encoding="utf-8"
        )
        
        assert text.text == "Hello, world!"
        assert text.language == "en"
        assert text.encoding == "utf-8"
        assert text.word_count == 2
        assert text.character_count == 13
        assert text.is_empty is False
    
    def test_text_content_with_default_encoding(self):
        """Test creating TextContent with default encoding."""
        text = TextContent(
            text="Hello, world!",
            language="en"
        )
        
        assert text.encoding == "utf-8"
    
    def test_non_string_text_raises_error(self):
        """Test that non-string text raises TypeError."""
        with pytest.raises(TypeError, match="Text must be a string"):
            TextContent(
                text=123,  # type: ignore
                language="en"
            )
    
    def test_empty_text_raises_error(self):
        """Test that empty text raises ValueError."""
        with pytest.raises(ValueError, match="Text content cannot be empty or whitespace only"):
            TextContent(
                text="",
                language="en"
            )
    
    def test_whitespace_only_text_raises_error(self):
        """Test that whitespace-only text raises ValueError."""
        with pytest.raises(ValueError, match="Text content cannot be empty or whitespace only"):
            TextContent(
                text="   \n\t  ",
                language="en"
            )
    
    def test_text_too_long_raises_error(self):
        """Test that text over 50,000 characters raises ValueError."""
        long_text = "a" * 50001
        with pytest.raises(ValueError, match="Text content too long"):
            TextContent(
                text=long_text,
                language="en"
            )
    
    def test_text_at_max_length(self):
        """Test text at maximum allowed length."""
        max_text = "a" * 50000
        text = TextContent(
            text=max_text,
            language="en"
        )
        assert len(text.text) == 50000
    
    def test_non_string_language_raises_error(self):
        """Test that non-string language raises TypeError."""
        with pytest.raises(TypeError, match="Language must be a string"):
            TextContent(
                text="Hello",
                language=123  # type: ignore
            )
    
    def test_empty_language_raises_error(self):
        """Test that empty language raises ValueError."""
        with pytest.raises(ValueError, match="Language cannot be empty"):
            TextContent(
                text="Hello",
                language=""
            )
    
    def test_whitespace_language_raises_error(self):
        """Test that whitespace-only language raises ValueError."""
        with pytest.raises(ValueError, match="Language cannot be empty"):
            TextContent(
                text="Hello",
                language="   "
            )
    
    def test_invalid_language_code_format_raises_error(self):
        """Test that invalid language code format raises ValueError."""
        invalid_codes = ["e", "ENG", "en-us", "en-USA", "123", "en_US"]
        
        for code in invalid_codes:
            with pytest.raises(ValueError, match="Invalid language code format"):
                TextContent(
                    text="Hello",
                    language=code
                )
    
    def test_valid_language_codes(self):
        """Test valid language code formats."""
        valid_codes = ["en", "fr", "de", "es", "zh", "ja", "en-US", "fr-FR", "zh-CN"]
        
        for code in valid_codes:
            text = TextContent(
                text="Hello",
                language=code
            )
            assert text.language == code
    
    def test_non_string_encoding_raises_error(self):
        """Test that non-string encoding raises TypeError."""
        with pytest.raises(TypeError, match="Encoding must be a string"):
            TextContent(
                text="Hello",
                language="en",
                encoding=123  # type: ignore
            )
    
    def test_unsupported_encoding_raises_error(self):
        """Test that unsupported encoding raises ValueError."""
        with pytest.raises(ValueError, match="Unsupported encoding: xyz"):
            TextContent(
                text="Hello",
                language="en",
                encoding="xyz"
            )
    
    def test_supported_encodings(self):
        """Test all supported encodings."""
        supported_encodings = ['utf-8', 'utf-16', 'ascii', 'latin-1']
        
        for encoding in supported_encodings:
            text = TextContent(
                text="Hello",
                language="en",
                encoding=encoding
            )
            assert text.encoding == encoding
    
    def test_text_encoding_compatibility(self):
        """Test that text is compatible with specified encoding."""
        # ASCII text with UTF-8 encoding should work
        text = TextContent(
            text="Hello",
            language="en",
            encoding="ascii"
        )
        assert text.encoding == "ascii"
        
        # Unicode text with ASCII encoding should fail
        with pytest.raises(ValueError, match="Text cannot be encoded with ascii encoding"):
            TextContent(
                text="Héllo",  # Contains non-ASCII character
                language="en",
                encoding="ascii"
            )
    
    def test_word_count_property(self):
        """Test word_count property calculation."""
        test_cases = [
            ("Hello world", 2),
            ("Hello", 1),
            ("Hello world test", 3),
            ("Hello,  world!  Test.", 3),  # Multiple spaces and punctuation
            ("", 1),  # Empty string split returns ['']
        ]
        
        for text_str, expected_count in test_cases:
            if text_str:  # Skip empty string test as it would fail validation
                text = TextContent(text=text_str, language="en")
                assert text.word_count == expected_count
    
    def test_character_count_property(self):
        """Test character_count property."""
        text_str = "Hello, world!"
        text = TextContent(text=text_str, language="en")
        assert text.character_count == len(text_str)
    
    def test_is_empty_property(self):
        """Test is_empty property."""
        # Non-empty text
        text = TextContent(text="Hello", language="en")
        assert text.is_empty is False
        
        # Text with only meaningful content
        text2 = TextContent(text="  Hello  ", language="en")
        assert text2.is_empty is False
    
    def test_truncate_method(self):
        """Test truncate method."""
        text = TextContent(text="Hello, world! This is a test.", language="en")
        
        # Truncate to shorter length
        truncated = text.truncate(10)
        assert len(truncated.text) <= 10
        assert truncated.language == text.language
        assert truncated.encoding == text.encoding
        assert isinstance(truncated, TextContent)
        
        # Truncate to longer length (should return same)
        not_truncated = text.truncate(100)
        assert not_truncated.text == text.text
    
    def test_truncate_with_invalid_length(self):
        """Test truncate with invalid max_length."""
        text = TextContent(text="Hello", language="en")
        
        with pytest.raises(ValueError, match="Max length must be positive"):
            text.truncate(0)
        
        with pytest.raises(ValueError, match="Max length must be positive"):
            text.truncate(-1)
    
    def test_text_content_is_immutable(self):
        """Test that TextContent is immutable (frozen dataclass)."""
        text = TextContent(text="Hello", language="en")
        
        with pytest.raises(AttributeError):
            text.text = "Goodbye"  # type: ignore
    
    def test_truncate_preserves_word_boundaries(self):
        """Test that truncate method preserves word boundaries by rstripping."""
        text = TextContent(text="Hello world test", language="en")
        
        # Truncate in middle of word
        truncated = text.truncate(12)  # "Hello world " -> "Hello world" after rstrip
        assert not truncated.text.endswith(" ")
        assert truncated.text == "Hello world"