Spaces:
Sleeping
Sleeping
| """Unit tests for TranslationProviderBase abstract class.""" | |
| import pytest | |
| from unittest.mock import Mock, patch | |
| from src.infrastructure.base.translation_provider_base import TranslationProviderBase | |
| from src.domain.models.translation_request import TranslationRequest | |
| from src.domain.models.text_content import TextContent | |
| from src.domain.exceptions import TranslationFailedException | |
| class ConcreteTranslationProvider(TranslationProviderBase): | |
| """Concrete implementation for testing.""" | |
| def __init__(self, provider_name="test", supported_languages=None, available=True): | |
| super().__init__(provider_name, supported_languages) | |
| self._available = available | |
| self._should_fail = False | |
| self._translation_result = "Translated text" | |
| def _translate_chunk(self, text, source_language, target_language): | |
| if self._should_fail: | |
| raise Exception("Test translation error") | |
| return f"{self._translation_result} ({source_language}->{target_language})" | |
| def is_available(self): | |
| return self._available | |
| def get_supported_languages(self): | |
| return self.supported_languages | |
| def set_should_fail(self, should_fail): | |
| self._should_fail = should_fail | |
| def set_translation_result(self, result): | |
| self._translation_result = result | |
| class TestTranslationProviderBase: | |
| """Test cases for TranslationProviderBase abstract class.""" | |
| def setup_method(self): | |
| """Set up test fixtures.""" | |
| self.provider = ConcreteTranslationProvider() | |
| self.source_text = TextContent(text="Hello world", language="en") | |
| self.request = TranslationRequest( | |
| source_text=self.source_text, | |
| target_language="es" | |
| ) | |
| def test_provider_initialization(self): | |
| """Test provider initialization with default values.""" | |
| supported_langs = {"en": ["es", "fr"], "es": ["en"]} | |
| provider = ConcreteTranslationProvider("test_provider", supported_langs) | |
| assert provider.provider_name == "test_provider" | |
| assert provider.supported_languages == supported_langs | |
| assert provider.max_chunk_length == 1000 | |
| def test_provider_initialization_no_languages(self): | |
| """Test provider initialization without supported languages.""" | |
| provider = ConcreteTranslationProvider("test_provider") | |
| assert provider.provider_name == "test_provider" | |
| assert provider.supported_languages == {} | |
| def test_translate_success(self): | |
| """Test successful translation.""" | |
| result = self.provider.translate(self.request) | |
| assert isinstance(result, TextContent) | |
| assert result.text == "Translated text (en->es)" | |
| assert result.language == "es" | |
| assert result.encoding == "utf-8" | |
| def test_translate_with_language_validation(self): | |
| """Test translation with language validation.""" | |
| supported_langs = {"en": ["es", "fr"], "es": ["en"]} | |
| provider = ConcreteTranslationProvider("test", supported_langs) | |
| # Valid language pair should work | |
| result = provider.translate(self.request) | |
| assert isinstance(result, TextContent) | |
| # Invalid source language should fail | |
| invalid_request = TranslationRequest( | |
| source_text=TextContent(text="Hello", language="de"), | |
| target_language="es" | |
| ) | |
| with pytest.raises(TranslationFailedException, match="Source language de not supported"): | |
| provider.translate(invalid_request) | |
| # Invalid target language should fail | |
| invalid_request2 = TranslationRequest( | |
| source_text=self.source_text, | |
| target_language="de" | |
| ) | |
| with pytest.raises(TranslationFailedException, match="Translation from en to de not supported"): | |
| provider.translate(invalid_request2) | |
| def test_translate_empty_text_fails(self): | |
| """Test that empty text raises exception.""" | |
| empty_request = TranslationRequest( | |
| source_text=TextContent(text="", language="en"), | |
| target_language="es" | |
| ) | |
| with pytest.raises(TranslationFailedException, match="Source text cannot be empty"): | |
| self.provider.translate(empty_request) | |
| def test_translate_whitespace_text_fails(self): | |
| """Test that whitespace-only text raises exception.""" | |
| whitespace_request = TranslationRequest( | |
| source_text=TextContent(text=" ", language="en"), | |
| target_language="es" | |
| ) | |
| with pytest.raises(TranslationFailedException, match="Source text cannot be empty"): | |
| self.provider.translate(whitespace_request) | |
| def test_translate_same_language_fails(self): | |
| """Test that same source and target language raises exception.""" | |
| same_lang_request = TranslationRequest( | |
| source_text=self.source_text, | |
| target_language="en" | |
| ) | |
| with pytest.raises(TranslationFailedException, match="Source and target languages cannot be the same"): | |
| self.provider.translate(same_lang_request) | |
| def test_translate_provider_error(self): | |
| """Test handling of provider-specific errors.""" | |
| self.provider.set_should_fail(True) | |
| with pytest.raises(TranslationFailedException, match="Translation failed"): | |
| self.provider.translate(self.request) | |
| def test_translate_long_text_chunking(self): | |
| """Test translation of long text with chunking.""" | |
| # Create long text that will be chunked | |
| long_text = "This is a sentence. " * 100 # Much longer than default chunk size | |
| long_request = TranslationRequest( | |
| source_text=TextContent(text=long_text, language="en"), | |
| target_language="es" | |
| ) | |
| result = self.provider.translate(long_request) | |
| assert isinstance(result, TextContent) | |
| # Should contain multiple translated chunks | |
| assert "Translated text (en->es)" in result.text | |
| def test_chunk_text_short_text(self): | |
| """Test text chunking with short text.""" | |
| short_text = "Hello world" | |
| chunks = self.provider._chunk_text(short_text) | |
| assert len(chunks) == 1 | |
| assert chunks[0] == short_text | |
| def test_chunk_text_long_text(self): | |
| """Test text chunking with long text.""" | |
| # Create text longer than chunk size | |
| long_text = "This is a sentence. " * 100 | |
| self.provider.max_chunk_length = 50 # Small chunk size for testing | |
| chunks = self.provider._chunk_text(long_text) | |
| assert len(chunks) > 1 | |
| for chunk in chunks: | |
| assert len(chunk) <= self.provider.max_chunk_length | |
| def test_split_into_sentences(self): | |
| """Test sentence splitting.""" | |
| text = "First sentence. Second sentence! Third sentence? Fourth sentence." | |
| sentences = self.provider._split_into_sentences(text) | |
| assert len(sentences) == 4 | |
| assert "First sentence" in sentences[0] | |
| assert "Second sentence" in sentences[1] | |
| assert "Third sentence" in sentences[2] | |
| assert "Fourth sentence" in sentences[3] | |
| def test_split_into_sentences_no_punctuation(self): | |
| """Test sentence splitting with no punctuation.""" | |
| text = "Just one long sentence without proper punctuation" | |
| sentences = self.provider._split_into_sentences(text) | |
| assert len(sentences) == 1 | |
| assert sentences[0] == text | |
| def test_split_long_sentence(self): | |
| """Test splitting of long sentences by words.""" | |
| long_sentence = "word " * 100 # Very long sentence | |
| self.provider.max_chunk_length = 20 # Small chunk size | |
| chunks = self.provider._split_long_sentence(long_sentence) | |
| assert len(chunks) > 1 | |
| for chunk in chunks: | |
| assert len(chunk) <= self.provider.max_chunk_length | |
| def test_split_long_sentence_single_long_word(self): | |
| """Test splitting with a single very long word.""" | |
| long_word = "a" * 100 | |
| self.provider.max_chunk_length = 20 | |
| chunks = self.provider._split_long_sentence(long_word) | |
| assert len(chunks) == 1 | |
| assert chunks[0] == long_word # Should include the long word as-is | |
| def test_reassemble_chunks(self): | |
| """Test reassembling translated chunks.""" | |
| chunks = ["First chunk", "Second chunk", "Third chunk"] | |
| result = self.provider._reassemble_chunks(chunks) | |
| assert result == "First chunk Second chunk Third chunk" | |
| def test_reassemble_chunks_with_empty(self): | |
| """Test reassembling chunks with empty strings.""" | |
| chunks = ["First chunk", "", "Third chunk", " "] | |
| result = self.provider._reassemble_chunks(chunks) | |
| assert result == "First chunk Third chunk" | |
| def test_preprocess_text(self): | |
| """Test text preprocessing.""" | |
| messy_text = " Hello world \n\n with extra spaces " | |
| processed = self.provider._preprocess_text(messy_text) | |
| assert processed == "Hello world with extra spaces" | |
| def test_postprocess_text(self): | |
| """Test text postprocessing.""" | |
| messy_text = "Hello world . This is a test ! Another sentence ?" | |
| processed = self.provider._postprocess_text(messy_text) | |
| assert processed == "Hello world. This is a test! Another sentence?" | |
| def test_postprocess_text_sentence_spacing(self): | |
| """Test postprocessing fixes sentence spacing.""" | |
| text = "First sentence.Second sentence!Third sentence?" | |
| processed = self.provider._postprocess_text(text) | |
| assert processed == "First sentence. Second sentence! Third sentence?" | |
| def test_handle_provider_error(self): | |
| """Test provider error handling.""" | |
| original_error = ValueError("Original error") | |
| with pytest.raises(TranslationFailedException) as exc_info: | |
| self.provider._handle_provider_error(original_error, "testing") | |
| assert "test error during testing: Original error" in str(exc_info.value) | |
| assert exc_info.value.__cause__ is original_error | |
| def test_handle_provider_error_no_context(self): | |
| """Test provider error handling without context.""" | |
| original_error = ValueError("Original error") | |
| with pytest.raises(TranslationFailedException) as exc_info: | |
| self.provider._handle_provider_error(original_error) | |
| assert "test error: Original error" in str(exc_info.value) | |
| assert exc_info.value.__cause__ is original_error | |
| def test_set_chunk_size(self): | |
| """Test setting chunk size.""" | |
| self.provider.set_chunk_size(500) | |
| assert self.provider.max_chunk_length == 500 | |
| def test_set_chunk_size_invalid(self): | |
| """Test setting invalid chunk size.""" | |
| with pytest.raises(ValueError, match="Chunk size must be positive"): | |
| self.provider.set_chunk_size(0) | |
| with pytest.raises(ValueError, match="Chunk size must be positive"): | |
| self.provider.set_chunk_size(-1) | |
| def test_get_translation_stats(self): | |
| """Test getting translation statistics.""" | |
| stats = self.provider.get_translation_stats(self.request) | |
| assert stats['provider'] == 'test' | |
| assert stats['source_language'] == 'en' | |
| assert stats['target_language'] == 'es' | |
| assert stats['text_length'] == len(self.request.source_text.text) | |
| assert stats['word_count'] == len(self.request.source_text.text.split()) | |
| assert stats['chunk_count'] >= 1 | |
| assert 'max_chunk_length' in stats | |
| assert 'avg_chunk_length' in stats | |
| def test_get_translation_stats_empty_text(self): | |
| """Test getting translation statistics for empty text.""" | |
| empty_request = TranslationRequest( | |
| source_text=TextContent(text="", language="en"), | |
| target_language="es" | |
| ) | |
| stats = self.provider.get_translation_stats(empty_request) | |
| assert stats['text_length'] == 0 | |
| assert stats['word_count'] == 0 | |
| assert stats['chunk_count'] == 0 | |
| assert stats['max_chunk_length'] == 0 | |
| assert stats['avg_chunk_length'] == 0 | |
| def test_abstract_methods_not_implemented(self): | |
| """Test that abstract methods raise NotImplementedError.""" | |
| # Create instance of base class directly (should fail) | |
| with pytest.raises(TypeError): | |
| TranslationProviderBase("test") | |
| def test_provider_unavailable(self): | |
| """Test behavior when provider is unavailable.""" | |
| provider = ConcreteTranslationProvider(available=False) | |
| assert provider.is_available() is False | |
| def test_no_supported_languages(self): | |
| """Test behavior when no languages are supported.""" | |
| provider = ConcreteTranslationProvider(supported_languages={}) | |
| assert provider.get_supported_languages() == {} |