Spaces:
Build error
Build error
File size: 13,325 Bytes
93dc283 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 |
"""Unit tests for TranslationProviderBase abstract class."""
import pytest
from unittest.mock import Mock, patch
from src.infrastructure.base.translation_provider_base import TranslationProviderBase
from src.domain.models.translation_request import TranslationRequest
from src.domain.models.text_content import TextContent
from src.domain.exceptions import TranslationFailedException
class ConcreteTranslationProvider(TranslationProviderBase):
"""Concrete implementation for testing."""
def __init__(self, provider_name="test", supported_languages=None, available=True):
super().__init__(provider_name, supported_languages)
self._available = available
self._should_fail = False
self._translation_result = "Translated text"
def _translate_chunk(self, text, source_language, target_language):
if self._should_fail:
raise Exception("Test translation error")
return f"{self._translation_result} ({source_language}->{target_language})"
def is_available(self):
return self._available
def get_supported_languages(self):
return self.supported_languages
def set_should_fail(self, should_fail):
self._should_fail = should_fail
def set_translation_result(self, result):
self._translation_result = result
class TestTranslationProviderBase:
"""Test cases for TranslationProviderBase abstract class."""
def setup_method(self):
"""Set up test fixtures."""
self.provider = ConcreteTranslationProvider()
self.source_text = TextContent(text="Hello world", language="en")
self.request = TranslationRequest(
source_text=self.source_text,
target_language="es"
)
def test_provider_initialization(self):
"""Test provider initialization with default values."""
supported_langs = {"en": ["es", "fr"], "es": ["en"]}
provider = ConcreteTranslationProvider("test_provider", supported_langs)
assert provider.provider_name == "test_provider"
assert provider.supported_languages == supported_langs
assert provider.max_chunk_length == 1000
def test_provider_initialization_no_languages(self):
"""Test provider initialization without supported languages."""
provider = ConcreteTranslationProvider("test_provider")
assert provider.provider_name == "test_provider"
assert provider.supported_languages == {}
def test_translate_success(self):
"""Test successful translation."""
result = self.provider.translate(self.request)
assert isinstance(result, TextContent)
assert result.text == "Translated text (en->es)"
assert result.language == "es"
assert result.encoding == "utf-8"
def test_translate_with_language_validation(self):
"""Test translation with language validation."""
supported_langs = {"en": ["es", "fr"], "es": ["en"]}
provider = ConcreteTranslationProvider("test", supported_langs)
# Valid language pair should work
result = provider.translate(self.request)
assert isinstance(result, TextContent)
# Invalid source language should fail
invalid_request = TranslationRequest(
source_text=TextContent(text="Hello", language="de"),
target_language="es"
)
with pytest.raises(TranslationFailedException, match="Source language de not supported"):
provider.translate(invalid_request)
# Invalid target language should fail
invalid_request2 = TranslationRequest(
source_text=self.source_text,
target_language="de"
)
with pytest.raises(TranslationFailedException, match="Translation from en to de not supported"):
provider.translate(invalid_request2)
def test_translate_empty_text_fails(self):
"""Test that empty text raises exception."""
empty_request = TranslationRequest(
source_text=TextContent(text="", language="en"),
target_language="es"
)
with pytest.raises(TranslationFailedException, match="Source text cannot be empty"):
self.provider.translate(empty_request)
def test_translate_whitespace_text_fails(self):
"""Test that whitespace-only text raises exception."""
whitespace_request = TranslationRequest(
source_text=TextContent(text=" ", language="en"),
target_language="es"
)
with pytest.raises(TranslationFailedException, match="Source text cannot be empty"):
self.provider.translate(whitespace_request)
def test_translate_same_language_fails(self):
"""Test that same source and target language raises exception."""
same_lang_request = TranslationRequest(
source_text=self.source_text,
target_language="en"
)
with pytest.raises(TranslationFailedException, match="Source and target languages cannot be the same"):
self.provider.translate(same_lang_request)
def test_translate_provider_error(self):
"""Test handling of provider-specific errors."""
self.provider.set_should_fail(True)
with pytest.raises(TranslationFailedException, match="Translation failed"):
self.provider.translate(self.request)
def test_translate_long_text_chunking(self):
"""Test translation of long text with chunking."""
# Create long text that will be chunked
long_text = "This is a sentence. " * 100 # Much longer than default chunk size
long_request = TranslationRequest(
source_text=TextContent(text=long_text, language="en"),
target_language="es"
)
result = self.provider.translate(long_request)
assert isinstance(result, TextContent)
# Should contain multiple translated chunks
assert "Translated text (en->es)" in result.text
def test_chunk_text_short_text(self):
"""Test text chunking with short text."""
short_text = "Hello world"
chunks = self.provider._chunk_text(short_text)
assert len(chunks) == 1
assert chunks[0] == short_text
def test_chunk_text_long_text(self):
"""Test text chunking with long text."""
# Create text longer than chunk size
long_text = "This is a sentence. " * 100
self.provider.max_chunk_length = 50 # Small chunk size for testing
chunks = self.provider._chunk_text(long_text)
assert len(chunks) > 1
for chunk in chunks:
assert len(chunk) <= self.provider.max_chunk_length
def test_split_into_sentences(self):
"""Test sentence splitting."""
text = "First sentence. Second sentence! Third sentence? Fourth sentence."
sentences = self.provider._split_into_sentences(text)
assert len(sentences) == 4
assert "First sentence" in sentences[0]
assert "Second sentence" in sentences[1]
assert "Third sentence" in sentences[2]
assert "Fourth sentence" in sentences[3]
def test_split_into_sentences_no_punctuation(self):
"""Test sentence splitting with no punctuation."""
text = "Just one long sentence without proper punctuation"
sentences = self.provider._split_into_sentences(text)
assert len(sentences) == 1
assert sentences[0] == text
def test_split_long_sentence(self):
"""Test splitting of long sentences by words."""
long_sentence = "word " * 100 # Very long sentence
self.provider.max_chunk_length = 20 # Small chunk size
chunks = self.provider._split_long_sentence(long_sentence)
assert len(chunks) > 1
for chunk in chunks:
assert len(chunk) <= self.provider.max_chunk_length
def test_split_long_sentence_single_long_word(self):
"""Test splitting with a single very long word."""
long_word = "a" * 100
self.provider.max_chunk_length = 20
chunks = self.provider._split_long_sentence(long_word)
assert len(chunks) == 1
assert chunks[0] == long_word # Should include the long word as-is
def test_reassemble_chunks(self):
"""Test reassembling translated chunks."""
chunks = ["First chunk", "Second chunk", "Third chunk"]
result = self.provider._reassemble_chunks(chunks)
assert result == "First chunk Second chunk Third chunk"
def test_reassemble_chunks_with_empty(self):
"""Test reassembling chunks with empty strings."""
chunks = ["First chunk", "", "Third chunk", " "]
result = self.provider._reassemble_chunks(chunks)
assert result == "First chunk Third chunk"
def test_preprocess_text(self):
"""Test text preprocessing."""
messy_text = " Hello world \n\n with extra spaces "
processed = self.provider._preprocess_text(messy_text)
assert processed == "Hello world with extra spaces"
def test_postprocess_text(self):
"""Test text postprocessing."""
messy_text = "Hello world . This is a test ! Another sentence ?"
processed = self.provider._postprocess_text(messy_text)
assert processed == "Hello world. This is a test! Another sentence?"
def test_postprocess_text_sentence_spacing(self):
"""Test postprocessing fixes sentence spacing."""
text = "First sentence.Second sentence!Third sentence?"
processed = self.provider._postprocess_text(text)
assert processed == "First sentence. Second sentence! Third sentence?"
def test_handle_provider_error(self):
"""Test provider error handling."""
original_error = ValueError("Original error")
with pytest.raises(TranslationFailedException) as exc_info:
self.provider._handle_provider_error(original_error, "testing")
assert "test error during testing: Original error" in str(exc_info.value)
assert exc_info.value.__cause__ is original_error
def test_handle_provider_error_no_context(self):
"""Test provider error handling without context."""
original_error = ValueError("Original error")
with pytest.raises(TranslationFailedException) as exc_info:
self.provider._handle_provider_error(original_error)
assert "test error: Original error" in str(exc_info.value)
assert exc_info.value.__cause__ is original_error
def test_set_chunk_size(self):
"""Test setting chunk size."""
self.provider.set_chunk_size(500)
assert self.provider.max_chunk_length == 500
def test_set_chunk_size_invalid(self):
"""Test setting invalid chunk size."""
with pytest.raises(ValueError, match="Chunk size must be positive"):
self.provider.set_chunk_size(0)
with pytest.raises(ValueError, match="Chunk size must be positive"):
self.provider.set_chunk_size(-1)
def test_get_translation_stats(self):
"""Test getting translation statistics."""
stats = self.provider.get_translation_stats(self.request)
assert stats['provider'] == 'test'
assert stats['source_language'] == 'en'
assert stats['target_language'] == 'es'
assert stats['text_length'] == len(self.request.source_text.text)
assert stats['word_count'] == len(self.request.source_text.text.split())
assert stats['chunk_count'] >= 1
assert 'max_chunk_length' in stats
assert 'avg_chunk_length' in stats
def test_get_translation_stats_empty_text(self):
"""Test getting translation statistics for empty text."""
empty_request = TranslationRequest(
source_text=TextContent(text="", language="en"),
target_language="es"
)
stats = self.provider.get_translation_stats(empty_request)
assert stats['text_length'] == 0
assert stats['word_count'] == 0
assert stats['chunk_count'] == 0
assert stats['max_chunk_length'] == 0
assert stats['avg_chunk_length'] == 0
def test_abstract_methods_not_implemented(self):
"""Test that abstract methods raise NotImplementedError."""
# Create instance of base class directly (should fail)
with pytest.raises(TypeError):
TranslationProviderBase("test")
def test_provider_unavailable(self):
"""Test behavior when provider is unavailable."""
provider = ConcreteTranslationProvider(available=False)
assert provider.is_available() is False
def test_no_supported_languages(self):
"""Test behavior when no languages are supported."""
provider = ConcreteTranslationProvider(supported_languages={})
assert provider.get_supported_languages() == {} |