teachingAssistant / tests /unit /infrastructure /base /test_translation_provider_base.py
Michael Hu
Create unit tests for infrastructure layer
93dc283
"""Unit tests for TranslationProviderBase abstract class."""
import pytest
from unittest.mock import Mock, patch
from src.infrastructure.base.translation_provider_base import TranslationProviderBase
from src.domain.models.translation_request import TranslationRequest
from src.domain.models.text_content import TextContent
from src.domain.exceptions import TranslationFailedException
class ConcreteTranslationProvider(TranslationProviderBase):
"""Concrete implementation for testing."""
def __init__(self, provider_name="test", supported_languages=None, available=True):
super().__init__(provider_name, supported_languages)
self._available = available
self._should_fail = False
self._translation_result = "Translated text"
def _translate_chunk(self, text, source_language, target_language):
if self._should_fail:
raise Exception("Test translation error")
return f"{self._translation_result} ({source_language}->{target_language})"
def is_available(self):
return self._available
def get_supported_languages(self):
return self.supported_languages
def set_should_fail(self, should_fail):
self._should_fail = should_fail
def set_translation_result(self, result):
self._translation_result = result
class TestTranslationProviderBase:
"""Test cases for TranslationProviderBase abstract class."""
def setup_method(self):
"""Set up test fixtures."""
self.provider = ConcreteTranslationProvider()
self.source_text = TextContent(text="Hello world", language="en")
self.request = TranslationRequest(
source_text=self.source_text,
target_language="es"
)
def test_provider_initialization(self):
"""Test provider initialization with default values."""
supported_langs = {"en": ["es", "fr"], "es": ["en"]}
provider = ConcreteTranslationProvider("test_provider", supported_langs)
assert provider.provider_name == "test_provider"
assert provider.supported_languages == supported_langs
assert provider.max_chunk_length == 1000
def test_provider_initialization_no_languages(self):
"""Test provider initialization without supported languages."""
provider = ConcreteTranslationProvider("test_provider")
assert provider.provider_name == "test_provider"
assert provider.supported_languages == {}
def test_translate_success(self):
"""Test successful translation."""
result = self.provider.translate(self.request)
assert isinstance(result, TextContent)
assert result.text == "Translated text (en->es)"
assert result.language == "es"
assert result.encoding == "utf-8"
def test_translate_with_language_validation(self):
"""Test translation with language validation."""
supported_langs = {"en": ["es", "fr"], "es": ["en"]}
provider = ConcreteTranslationProvider("test", supported_langs)
# Valid language pair should work
result = provider.translate(self.request)
assert isinstance(result, TextContent)
# Invalid source language should fail
invalid_request = TranslationRequest(
source_text=TextContent(text="Hello", language="de"),
target_language="es"
)
with pytest.raises(TranslationFailedException, match="Source language de not supported"):
provider.translate(invalid_request)
# Invalid target language should fail
invalid_request2 = TranslationRequest(
source_text=self.source_text,
target_language="de"
)
with pytest.raises(TranslationFailedException, match="Translation from en to de not supported"):
provider.translate(invalid_request2)
def test_translate_empty_text_fails(self):
"""Test that empty text raises exception."""
empty_request = TranslationRequest(
source_text=TextContent(text="", language="en"),
target_language="es"
)
with pytest.raises(TranslationFailedException, match="Source text cannot be empty"):
self.provider.translate(empty_request)
def test_translate_whitespace_text_fails(self):
"""Test that whitespace-only text raises exception."""
whitespace_request = TranslationRequest(
source_text=TextContent(text=" ", language="en"),
target_language="es"
)
with pytest.raises(TranslationFailedException, match="Source text cannot be empty"):
self.provider.translate(whitespace_request)
def test_translate_same_language_fails(self):
"""Test that same source and target language raises exception."""
same_lang_request = TranslationRequest(
source_text=self.source_text,
target_language="en"
)
with pytest.raises(TranslationFailedException, match="Source and target languages cannot be the same"):
self.provider.translate(same_lang_request)
def test_translate_provider_error(self):
"""Test handling of provider-specific errors."""
self.provider.set_should_fail(True)
with pytest.raises(TranslationFailedException, match="Translation failed"):
self.provider.translate(self.request)
def test_translate_long_text_chunking(self):
"""Test translation of long text with chunking."""
# Create long text that will be chunked
long_text = "This is a sentence. " * 100 # Much longer than default chunk size
long_request = TranslationRequest(
source_text=TextContent(text=long_text, language="en"),
target_language="es"
)
result = self.provider.translate(long_request)
assert isinstance(result, TextContent)
# Should contain multiple translated chunks
assert "Translated text (en->es)" in result.text
def test_chunk_text_short_text(self):
"""Test text chunking with short text."""
short_text = "Hello world"
chunks = self.provider._chunk_text(short_text)
assert len(chunks) == 1
assert chunks[0] == short_text
def test_chunk_text_long_text(self):
"""Test text chunking with long text."""
# Create text longer than chunk size
long_text = "This is a sentence. " * 100
self.provider.max_chunk_length = 50 # Small chunk size for testing
chunks = self.provider._chunk_text(long_text)
assert len(chunks) > 1
for chunk in chunks:
assert len(chunk) <= self.provider.max_chunk_length
def test_split_into_sentences(self):
"""Test sentence splitting."""
text = "First sentence. Second sentence! Third sentence? Fourth sentence."
sentences = self.provider._split_into_sentences(text)
assert len(sentences) == 4
assert "First sentence" in sentences[0]
assert "Second sentence" in sentences[1]
assert "Third sentence" in sentences[2]
assert "Fourth sentence" in sentences[3]
def test_split_into_sentences_no_punctuation(self):
"""Test sentence splitting with no punctuation."""
text = "Just one long sentence without proper punctuation"
sentences = self.provider._split_into_sentences(text)
assert len(sentences) == 1
assert sentences[0] == text
def test_split_long_sentence(self):
"""Test splitting of long sentences by words."""
long_sentence = "word " * 100 # Very long sentence
self.provider.max_chunk_length = 20 # Small chunk size
chunks = self.provider._split_long_sentence(long_sentence)
assert len(chunks) > 1
for chunk in chunks:
assert len(chunk) <= self.provider.max_chunk_length
def test_split_long_sentence_single_long_word(self):
"""Test splitting with a single very long word."""
long_word = "a" * 100
self.provider.max_chunk_length = 20
chunks = self.provider._split_long_sentence(long_word)
assert len(chunks) == 1
assert chunks[0] == long_word # Should include the long word as-is
def test_reassemble_chunks(self):
"""Test reassembling translated chunks."""
chunks = ["First chunk", "Second chunk", "Third chunk"]
result = self.provider._reassemble_chunks(chunks)
assert result == "First chunk Second chunk Third chunk"
def test_reassemble_chunks_with_empty(self):
"""Test reassembling chunks with empty strings."""
chunks = ["First chunk", "", "Third chunk", " "]
result = self.provider._reassemble_chunks(chunks)
assert result == "First chunk Third chunk"
def test_preprocess_text(self):
"""Test text preprocessing."""
messy_text = " Hello world \n\n with extra spaces "
processed = self.provider._preprocess_text(messy_text)
assert processed == "Hello world with extra spaces"
def test_postprocess_text(self):
"""Test text postprocessing."""
messy_text = "Hello world . This is a test ! Another sentence ?"
processed = self.provider._postprocess_text(messy_text)
assert processed == "Hello world. This is a test! Another sentence?"
def test_postprocess_text_sentence_spacing(self):
"""Test postprocessing fixes sentence spacing."""
text = "First sentence.Second sentence!Third sentence?"
processed = self.provider._postprocess_text(text)
assert processed == "First sentence. Second sentence! Third sentence?"
def test_handle_provider_error(self):
"""Test provider error handling."""
original_error = ValueError("Original error")
with pytest.raises(TranslationFailedException) as exc_info:
self.provider._handle_provider_error(original_error, "testing")
assert "test error during testing: Original error" in str(exc_info.value)
assert exc_info.value.__cause__ is original_error
def test_handle_provider_error_no_context(self):
"""Test provider error handling without context."""
original_error = ValueError("Original error")
with pytest.raises(TranslationFailedException) as exc_info:
self.provider._handle_provider_error(original_error)
assert "test error: Original error" in str(exc_info.value)
assert exc_info.value.__cause__ is original_error
def test_set_chunk_size(self):
"""Test setting chunk size."""
self.provider.set_chunk_size(500)
assert self.provider.max_chunk_length == 500
def test_set_chunk_size_invalid(self):
"""Test setting invalid chunk size."""
with pytest.raises(ValueError, match="Chunk size must be positive"):
self.provider.set_chunk_size(0)
with pytest.raises(ValueError, match="Chunk size must be positive"):
self.provider.set_chunk_size(-1)
def test_get_translation_stats(self):
"""Test getting translation statistics."""
stats = self.provider.get_translation_stats(self.request)
assert stats['provider'] == 'test'
assert stats['source_language'] == 'en'
assert stats['target_language'] == 'es'
assert stats['text_length'] == len(self.request.source_text.text)
assert stats['word_count'] == len(self.request.source_text.text.split())
assert stats['chunk_count'] >= 1
assert 'max_chunk_length' in stats
assert 'avg_chunk_length' in stats
def test_get_translation_stats_empty_text(self):
"""Test getting translation statistics for empty text."""
empty_request = TranslationRequest(
source_text=TextContent(text="", language="en"),
target_language="es"
)
stats = self.provider.get_translation_stats(empty_request)
assert stats['text_length'] == 0
assert stats['word_count'] == 0
assert stats['chunk_count'] == 0
assert stats['max_chunk_length'] == 0
assert stats['avg_chunk_length'] == 0
def test_abstract_methods_not_implemented(self):
"""Test that abstract methods raise NotImplementedError."""
# Create instance of base class directly (should fail)
with pytest.raises(TypeError):
TranslationProviderBase("test")
def test_provider_unavailable(self):
"""Test behavior when provider is unavailable."""
provider = ConcreteTranslationProvider(available=False)
assert provider.is_available() is False
def test_no_supported_languages(self):
"""Test behavior when no languages are supported."""
provider = ConcreteTranslationProvider(supported_languages={})
assert provider.get_supported_languages() == {}