Spaces:
Build error
Build error
"""Unit tests for TranslationProviderBase abstract class.""" | |
import pytest | |
from unittest.mock import Mock, patch | |
from src.infrastructure.base.translation_provider_base import TranslationProviderBase | |
from src.domain.models.translation_request import TranslationRequest | |
from src.domain.models.text_content import TextContent | |
from src.domain.exceptions import TranslationFailedException | |
class ConcreteTranslationProvider(TranslationProviderBase): | |
"""Concrete implementation for testing.""" | |
def __init__(self, provider_name="test", supported_languages=None, available=True): | |
super().__init__(provider_name, supported_languages) | |
self._available = available | |
self._should_fail = False | |
self._translation_result = "Translated text" | |
def _translate_chunk(self, text, source_language, target_language): | |
if self._should_fail: | |
raise Exception("Test translation error") | |
return f"{self._translation_result} ({source_language}->{target_language})" | |
def is_available(self): | |
return self._available | |
def get_supported_languages(self): | |
return self.supported_languages | |
def set_should_fail(self, should_fail): | |
self._should_fail = should_fail | |
def set_translation_result(self, result): | |
self._translation_result = result | |
class TestTranslationProviderBase: | |
"""Test cases for TranslationProviderBase abstract class.""" | |
def setup_method(self): | |
"""Set up test fixtures.""" | |
self.provider = ConcreteTranslationProvider() | |
self.source_text = TextContent(text="Hello world", language="en") | |
self.request = TranslationRequest( | |
source_text=self.source_text, | |
target_language="es" | |
) | |
def test_provider_initialization(self): | |
"""Test provider initialization with default values.""" | |
supported_langs = {"en": ["es", "fr"], "es": ["en"]} | |
provider = ConcreteTranslationProvider("test_provider", supported_langs) | |
assert provider.provider_name == "test_provider" | |
assert provider.supported_languages == supported_langs | |
assert provider.max_chunk_length == 1000 | |
def test_provider_initialization_no_languages(self): | |
"""Test provider initialization without supported languages.""" | |
provider = ConcreteTranslationProvider("test_provider") | |
assert provider.provider_name == "test_provider" | |
assert provider.supported_languages == {} | |
def test_translate_success(self): | |
"""Test successful translation.""" | |
result = self.provider.translate(self.request) | |
assert isinstance(result, TextContent) | |
assert result.text == "Translated text (en->es)" | |
assert result.language == "es" | |
assert result.encoding == "utf-8" | |
def test_translate_with_language_validation(self): | |
"""Test translation with language validation.""" | |
supported_langs = {"en": ["es", "fr"], "es": ["en"]} | |
provider = ConcreteTranslationProvider("test", supported_langs) | |
# Valid language pair should work | |
result = provider.translate(self.request) | |
assert isinstance(result, TextContent) | |
# Invalid source language should fail | |
invalid_request = TranslationRequest( | |
source_text=TextContent(text="Hello", language="de"), | |
target_language="es" | |
) | |
with pytest.raises(TranslationFailedException, match="Source language de not supported"): | |
provider.translate(invalid_request) | |
# Invalid target language should fail | |
invalid_request2 = TranslationRequest( | |
source_text=self.source_text, | |
target_language="de" | |
) | |
with pytest.raises(TranslationFailedException, match="Translation from en to de not supported"): | |
provider.translate(invalid_request2) | |
def test_translate_empty_text_fails(self): | |
"""Test that empty text raises exception.""" | |
empty_request = TranslationRequest( | |
source_text=TextContent(text="", language="en"), | |
target_language="es" | |
) | |
with pytest.raises(TranslationFailedException, match="Source text cannot be empty"): | |
self.provider.translate(empty_request) | |
def test_translate_whitespace_text_fails(self): | |
"""Test that whitespace-only text raises exception.""" | |
whitespace_request = TranslationRequest( | |
source_text=TextContent(text=" ", language="en"), | |
target_language="es" | |
) | |
with pytest.raises(TranslationFailedException, match="Source text cannot be empty"): | |
self.provider.translate(whitespace_request) | |
def test_translate_same_language_fails(self): | |
"""Test that same source and target language raises exception.""" | |
same_lang_request = TranslationRequest( | |
source_text=self.source_text, | |
target_language="en" | |
) | |
with pytest.raises(TranslationFailedException, match="Source and target languages cannot be the same"): | |
self.provider.translate(same_lang_request) | |
def test_translate_provider_error(self): | |
"""Test handling of provider-specific errors.""" | |
self.provider.set_should_fail(True) | |
with pytest.raises(TranslationFailedException, match="Translation failed"): | |
self.provider.translate(self.request) | |
def test_translate_long_text_chunking(self): | |
"""Test translation of long text with chunking.""" | |
# Create long text that will be chunked | |
long_text = "This is a sentence. " * 100 # Much longer than default chunk size | |
long_request = TranslationRequest( | |
source_text=TextContent(text=long_text, language="en"), | |
target_language="es" | |
) | |
result = self.provider.translate(long_request) | |
assert isinstance(result, TextContent) | |
# Should contain multiple translated chunks | |
assert "Translated text (en->es)" in result.text | |
def test_chunk_text_short_text(self): | |
"""Test text chunking with short text.""" | |
short_text = "Hello world" | |
chunks = self.provider._chunk_text(short_text) | |
assert len(chunks) == 1 | |
assert chunks[0] == short_text | |
def test_chunk_text_long_text(self): | |
"""Test text chunking with long text.""" | |
# Create text longer than chunk size | |
long_text = "This is a sentence. " * 100 | |
self.provider.max_chunk_length = 50 # Small chunk size for testing | |
chunks = self.provider._chunk_text(long_text) | |
assert len(chunks) > 1 | |
for chunk in chunks: | |
assert len(chunk) <= self.provider.max_chunk_length | |
def test_split_into_sentences(self): | |
"""Test sentence splitting.""" | |
text = "First sentence. Second sentence! Third sentence? Fourth sentence." | |
sentences = self.provider._split_into_sentences(text) | |
assert len(sentences) == 4 | |
assert "First sentence" in sentences[0] | |
assert "Second sentence" in sentences[1] | |
assert "Third sentence" in sentences[2] | |
assert "Fourth sentence" in sentences[3] | |
def test_split_into_sentences_no_punctuation(self): | |
"""Test sentence splitting with no punctuation.""" | |
text = "Just one long sentence without proper punctuation" | |
sentences = self.provider._split_into_sentences(text) | |
assert len(sentences) == 1 | |
assert sentences[0] == text | |
def test_split_long_sentence(self): | |
"""Test splitting of long sentences by words.""" | |
long_sentence = "word " * 100 # Very long sentence | |
self.provider.max_chunk_length = 20 # Small chunk size | |
chunks = self.provider._split_long_sentence(long_sentence) | |
assert len(chunks) > 1 | |
for chunk in chunks: | |
assert len(chunk) <= self.provider.max_chunk_length | |
def test_split_long_sentence_single_long_word(self): | |
"""Test splitting with a single very long word.""" | |
long_word = "a" * 100 | |
self.provider.max_chunk_length = 20 | |
chunks = self.provider._split_long_sentence(long_word) | |
assert len(chunks) == 1 | |
assert chunks[0] == long_word # Should include the long word as-is | |
def test_reassemble_chunks(self): | |
"""Test reassembling translated chunks.""" | |
chunks = ["First chunk", "Second chunk", "Third chunk"] | |
result = self.provider._reassemble_chunks(chunks) | |
assert result == "First chunk Second chunk Third chunk" | |
def test_reassemble_chunks_with_empty(self): | |
"""Test reassembling chunks with empty strings.""" | |
chunks = ["First chunk", "", "Third chunk", " "] | |
result = self.provider._reassemble_chunks(chunks) | |
assert result == "First chunk Third chunk" | |
def test_preprocess_text(self): | |
"""Test text preprocessing.""" | |
messy_text = " Hello world \n\n with extra spaces " | |
processed = self.provider._preprocess_text(messy_text) | |
assert processed == "Hello world with extra spaces" | |
def test_postprocess_text(self): | |
"""Test text postprocessing.""" | |
messy_text = "Hello world . This is a test ! Another sentence ?" | |
processed = self.provider._postprocess_text(messy_text) | |
assert processed == "Hello world. This is a test! Another sentence?" | |
def test_postprocess_text_sentence_spacing(self): | |
"""Test postprocessing fixes sentence spacing.""" | |
text = "First sentence.Second sentence!Third sentence?" | |
processed = self.provider._postprocess_text(text) | |
assert processed == "First sentence. Second sentence! Third sentence?" | |
def test_handle_provider_error(self): | |
"""Test provider error handling.""" | |
original_error = ValueError("Original error") | |
with pytest.raises(TranslationFailedException) as exc_info: | |
self.provider._handle_provider_error(original_error, "testing") | |
assert "test error during testing: Original error" in str(exc_info.value) | |
assert exc_info.value.__cause__ is original_error | |
def test_handle_provider_error_no_context(self): | |
"""Test provider error handling without context.""" | |
original_error = ValueError("Original error") | |
with pytest.raises(TranslationFailedException) as exc_info: | |
self.provider._handle_provider_error(original_error) | |
assert "test error: Original error" in str(exc_info.value) | |
assert exc_info.value.__cause__ is original_error | |
def test_set_chunk_size(self): | |
"""Test setting chunk size.""" | |
self.provider.set_chunk_size(500) | |
assert self.provider.max_chunk_length == 500 | |
def test_set_chunk_size_invalid(self): | |
"""Test setting invalid chunk size.""" | |
with pytest.raises(ValueError, match="Chunk size must be positive"): | |
self.provider.set_chunk_size(0) | |
with pytest.raises(ValueError, match="Chunk size must be positive"): | |
self.provider.set_chunk_size(-1) | |
def test_get_translation_stats(self): | |
"""Test getting translation statistics.""" | |
stats = self.provider.get_translation_stats(self.request) | |
assert stats['provider'] == 'test' | |
assert stats['source_language'] == 'en' | |
assert stats['target_language'] == 'es' | |
assert stats['text_length'] == len(self.request.source_text.text) | |
assert stats['word_count'] == len(self.request.source_text.text.split()) | |
assert stats['chunk_count'] >= 1 | |
assert 'max_chunk_length' in stats | |
assert 'avg_chunk_length' in stats | |
def test_get_translation_stats_empty_text(self): | |
"""Test getting translation statistics for empty text.""" | |
empty_request = TranslationRequest( | |
source_text=TextContent(text="", language="en"), | |
target_language="es" | |
) | |
stats = self.provider.get_translation_stats(empty_request) | |
assert stats['text_length'] == 0 | |
assert stats['word_count'] == 0 | |
assert stats['chunk_count'] == 0 | |
assert stats['max_chunk_length'] == 0 | |
assert stats['avg_chunk_length'] == 0 | |
def test_abstract_methods_not_implemented(self): | |
"""Test that abstract methods raise NotImplementedError.""" | |
# Create instance of base class directly (should fail) | |
with pytest.raises(TypeError): | |
TranslationProviderBase("test") | |
def test_provider_unavailable(self): | |
"""Test behavior when provider is unavailable.""" | |
provider = ConcreteTranslationProvider(available=False) | |
assert provider.is_available() is False | |
def test_no_supported_languages(self): | |
"""Test behavior when no languages are supported.""" | |
provider = ConcreteTranslationProvider(supported_languages={}) | |
assert provider.get_supported_languages() == {} |