"""Unit tests for TranslationProviderBase abstract class.""" import pytest from unittest.mock import Mock, patch from src.infrastructure.base.translation_provider_base import TranslationProviderBase from src.domain.models.translation_request import TranslationRequest from src.domain.models.text_content import TextContent from src.domain.exceptions import TranslationFailedException class ConcreteTranslationProvider(TranslationProviderBase): """Concrete implementation for testing.""" def __init__(self, provider_name="test", supported_languages=None, available=True): super().__init__(provider_name, supported_languages) self._available = available self._should_fail = False self._translation_result = "Translated text" def _translate_chunk(self, text, source_language, target_language): if self._should_fail: raise Exception("Test translation error") return f"{self._translation_result} ({source_language}->{target_language})" def is_available(self): return self._available def get_supported_languages(self): return self.supported_languages def set_should_fail(self, should_fail): self._should_fail = should_fail def set_translation_result(self, result): self._translation_result = result class TestTranslationProviderBase: """Test cases for TranslationProviderBase abstract class.""" def setup_method(self): """Set up test fixtures.""" self.provider = ConcreteTranslationProvider() self.source_text = TextContent(text="Hello world", language="en") self.request = TranslationRequest( source_text=self.source_text, target_language="es" ) def test_provider_initialization(self): """Test provider initialization with default values.""" supported_langs = {"en": ["es", "fr"], "es": ["en"]} provider = ConcreteTranslationProvider("test_provider", supported_langs) assert provider.provider_name == "test_provider" assert provider.supported_languages == supported_langs assert provider.max_chunk_length == 1000 def test_provider_initialization_no_languages(self): """Test provider initialization without supported languages.""" provider = ConcreteTranslationProvider("test_provider") assert provider.provider_name == "test_provider" assert provider.supported_languages == {} def test_translate_success(self): """Test successful translation.""" result = self.provider.translate(self.request) assert isinstance(result, TextContent) assert result.text == "Translated text (en->es)" assert result.language == "es" assert result.encoding == "utf-8" def test_translate_with_language_validation(self): """Test translation with language validation.""" supported_langs = {"en": ["es", "fr"], "es": ["en"]} provider = ConcreteTranslationProvider("test", supported_langs) # Valid language pair should work result = provider.translate(self.request) assert isinstance(result, TextContent) # Invalid source language should fail invalid_request = TranslationRequest( source_text=TextContent(text="Hello", language="de"), target_language="es" ) with pytest.raises(TranslationFailedException, match="Source language de not supported"): provider.translate(invalid_request) # Invalid target language should fail invalid_request2 = TranslationRequest( source_text=self.source_text, target_language="de" ) with pytest.raises(TranslationFailedException, match="Translation from en to de not supported"): provider.translate(invalid_request2) def test_translate_empty_text_fails(self): """Test that empty text raises exception.""" empty_request = TranslationRequest( source_text=TextContent(text="", language="en"), target_language="es" ) with pytest.raises(TranslationFailedException, match="Source text cannot be empty"): self.provider.translate(empty_request) def test_translate_whitespace_text_fails(self): """Test that whitespace-only text raises exception.""" whitespace_request = TranslationRequest( source_text=TextContent(text=" ", language="en"), target_language="es" ) with pytest.raises(TranslationFailedException, match="Source text cannot be empty"): self.provider.translate(whitespace_request) def test_translate_same_language_fails(self): """Test that same source and target language raises exception.""" same_lang_request = TranslationRequest( source_text=self.source_text, target_language="en" ) with pytest.raises(TranslationFailedException, match="Source and target languages cannot be the same"): self.provider.translate(same_lang_request) def test_translate_provider_error(self): """Test handling of provider-specific errors.""" self.provider.set_should_fail(True) with pytest.raises(TranslationFailedException, match="Translation failed"): self.provider.translate(self.request) def test_translate_long_text_chunking(self): """Test translation of long text with chunking.""" # Create long text that will be chunked long_text = "This is a sentence. " * 100 # Much longer than default chunk size long_request = TranslationRequest( source_text=TextContent(text=long_text, language="en"), target_language="es" ) result = self.provider.translate(long_request) assert isinstance(result, TextContent) # Should contain multiple translated chunks assert "Translated text (en->es)" in result.text def test_chunk_text_short_text(self): """Test text chunking with short text.""" short_text = "Hello world" chunks = self.provider._chunk_text(short_text) assert len(chunks) == 1 assert chunks[0] == short_text def test_chunk_text_long_text(self): """Test text chunking with long text.""" # Create text longer than chunk size long_text = "This is a sentence. " * 100 self.provider.max_chunk_length = 50 # Small chunk size for testing chunks = self.provider._chunk_text(long_text) assert len(chunks) > 1 for chunk in chunks: assert len(chunk) <= self.provider.max_chunk_length def test_split_into_sentences(self): """Test sentence splitting.""" text = "First sentence. Second sentence! Third sentence? Fourth sentence." sentences = self.provider._split_into_sentences(text) assert len(sentences) == 4 assert "First sentence" in sentences[0] assert "Second sentence" in sentences[1] assert "Third sentence" in sentences[2] assert "Fourth sentence" in sentences[3] def test_split_into_sentences_no_punctuation(self): """Test sentence splitting with no punctuation.""" text = "Just one long sentence without proper punctuation" sentences = self.provider._split_into_sentences(text) assert len(sentences) == 1 assert sentences[0] == text def test_split_long_sentence(self): """Test splitting of long sentences by words.""" long_sentence = "word " * 100 # Very long sentence self.provider.max_chunk_length = 20 # Small chunk size chunks = self.provider._split_long_sentence(long_sentence) assert len(chunks) > 1 for chunk in chunks: assert len(chunk) <= self.provider.max_chunk_length def test_split_long_sentence_single_long_word(self): """Test splitting with a single very long word.""" long_word = "a" * 100 self.provider.max_chunk_length = 20 chunks = self.provider._split_long_sentence(long_word) assert len(chunks) == 1 assert chunks[0] == long_word # Should include the long word as-is def test_reassemble_chunks(self): """Test reassembling translated chunks.""" chunks = ["First chunk", "Second chunk", "Third chunk"] result = self.provider._reassemble_chunks(chunks) assert result == "First chunk Second chunk Third chunk" def test_reassemble_chunks_with_empty(self): """Test reassembling chunks with empty strings.""" chunks = ["First chunk", "", "Third chunk", " "] result = self.provider._reassemble_chunks(chunks) assert result == "First chunk Third chunk" def test_preprocess_text(self): """Test text preprocessing.""" messy_text = " Hello world \n\n with extra spaces " processed = self.provider._preprocess_text(messy_text) assert processed == "Hello world with extra spaces" def test_postprocess_text(self): """Test text postprocessing.""" messy_text = "Hello world . This is a test ! Another sentence ?" processed = self.provider._postprocess_text(messy_text) assert processed == "Hello world. This is a test! Another sentence?" def test_postprocess_text_sentence_spacing(self): """Test postprocessing fixes sentence spacing.""" text = "First sentence.Second sentence!Third sentence?" processed = self.provider._postprocess_text(text) assert processed == "First sentence. Second sentence! Third sentence?" def test_handle_provider_error(self): """Test provider error handling.""" original_error = ValueError("Original error") with pytest.raises(TranslationFailedException) as exc_info: self.provider._handle_provider_error(original_error, "testing") assert "test error during testing: Original error" in str(exc_info.value) assert exc_info.value.__cause__ is original_error def test_handle_provider_error_no_context(self): """Test provider error handling without context.""" original_error = ValueError("Original error") with pytest.raises(TranslationFailedException) as exc_info: self.provider._handle_provider_error(original_error) assert "test error: Original error" in str(exc_info.value) assert exc_info.value.__cause__ is original_error def test_set_chunk_size(self): """Test setting chunk size.""" self.provider.set_chunk_size(500) assert self.provider.max_chunk_length == 500 def test_set_chunk_size_invalid(self): """Test setting invalid chunk size.""" with pytest.raises(ValueError, match="Chunk size must be positive"): self.provider.set_chunk_size(0) with pytest.raises(ValueError, match="Chunk size must be positive"): self.provider.set_chunk_size(-1) def test_get_translation_stats(self): """Test getting translation statistics.""" stats = self.provider.get_translation_stats(self.request) assert stats['provider'] == 'test' assert stats['source_language'] == 'en' assert stats['target_language'] == 'es' assert stats['text_length'] == len(self.request.source_text.text) assert stats['word_count'] == len(self.request.source_text.text.split()) assert stats['chunk_count'] >= 1 assert 'max_chunk_length' in stats assert 'avg_chunk_length' in stats def test_get_translation_stats_empty_text(self): """Test getting translation statistics for empty text.""" empty_request = TranslationRequest( source_text=TextContent(text="", language="en"), target_language="es" ) stats = self.provider.get_translation_stats(empty_request) assert stats['text_length'] == 0 assert stats['word_count'] == 0 assert stats['chunk_count'] == 0 assert stats['max_chunk_length'] == 0 assert stats['avg_chunk_length'] == 0 def test_abstract_methods_not_implemented(self): """Test that abstract methods raise NotImplementedError.""" # Create instance of base class directly (should fail) with pytest.raises(TypeError): TranslationProviderBase("test") def test_provider_unavailable(self): """Test behavior when provider is unavailable.""" provider = ConcreteTranslationProvider(available=False) assert provider.is_available() is False def test_no_supported_languages(self): """Test behavior when no languages are supported.""" provider = ConcreteTranslationProvider(supported_languages={}) assert provider.get_supported_languages() == {}