File size: 13,325 Bytes
93dc283
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
"""Unit tests for TranslationProviderBase abstract class."""

import pytest
from unittest.mock import Mock, patch

from src.infrastructure.base.translation_provider_base import TranslationProviderBase
from src.domain.models.translation_request import TranslationRequest
from src.domain.models.text_content import TextContent
from src.domain.exceptions import TranslationFailedException


class ConcreteTranslationProvider(TranslationProviderBase):
    """Concrete implementation for testing."""
    
    def __init__(self, provider_name="test", supported_languages=None, available=True):
        super().__init__(provider_name, supported_languages)
        self._available = available
        self._should_fail = False
        self._translation_result = "Translated text"
    
    def _translate_chunk(self, text, source_language, target_language):
        if self._should_fail:
            raise Exception("Test translation error")
        return f"{self._translation_result} ({source_language}->{target_language})"
    
    def is_available(self):
        return self._available
    
    def get_supported_languages(self):
        return self.supported_languages
    
    def set_should_fail(self, should_fail):
        self._should_fail = should_fail
    
    def set_translation_result(self, result):
        self._translation_result = result


class TestTranslationProviderBase:
    """Test cases for TranslationProviderBase abstract class."""
    
    def setup_method(self):
        """Set up test fixtures."""
        self.provider = ConcreteTranslationProvider()
        self.source_text = TextContent(text="Hello world", language="en")
        self.request = TranslationRequest(
            source_text=self.source_text,
            target_language="es"
        )
    
    def test_provider_initialization(self):
        """Test provider initialization with default values."""
        supported_langs = {"en": ["es", "fr"], "es": ["en"]}
        provider = ConcreteTranslationProvider("test_provider", supported_langs)
        
        assert provider.provider_name == "test_provider"
        assert provider.supported_languages == supported_langs
        assert provider.max_chunk_length == 1000
    
    def test_provider_initialization_no_languages(self):
        """Test provider initialization without supported languages."""
        provider = ConcreteTranslationProvider("test_provider")
        
        assert provider.provider_name == "test_provider"
        assert provider.supported_languages == {}
    
    def test_translate_success(self):
        """Test successful translation."""
        result = self.provider.translate(self.request)
        
        assert isinstance(result, TextContent)
        assert result.text == "Translated text (en->es)"
        assert result.language == "es"
        assert result.encoding == "utf-8"
    
    def test_translate_with_language_validation(self):
        """Test translation with language validation."""
        supported_langs = {"en": ["es", "fr"], "es": ["en"]}
        provider = ConcreteTranslationProvider("test", supported_langs)
        
        # Valid language pair should work
        result = provider.translate(self.request)
        assert isinstance(result, TextContent)
        
        # Invalid source language should fail
        invalid_request = TranslationRequest(
            source_text=TextContent(text="Hello", language="de"),
            target_language="es"
        )
        
        with pytest.raises(TranslationFailedException, match="Source language de not supported"):
            provider.translate(invalid_request)
        
        # Invalid target language should fail
        invalid_request2 = TranslationRequest(
            source_text=self.source_text,
            target_language="de"
        )
        
        with pytest.raises(TranslationFailedException, match="Translation from en to de not supported"):
            provider.translate(invalid_request2)
    
    def test_translate_empty_text_fails(self):
        """Test that empty text raises exception."""
        empty_request = TranslationRequest(
            source_text=TextContent(text="", language="en"),
            target_language="es"
        )
        
        with pytest.raises(TranslationFailedException, match="Source text cannot be empty"):
            self.provider.translate(empty_request)
    
    def test_translate_whitespace_text_fails(self):
        """Test that whitespace-only text raises exception."""
        whitespace_request = TranslationRequest(
            source_text=TextContent(text="   ", language="en"),
            target_language="es"
        )
        
        with pytest.raises(TranslationFailedException, match="Source text cannot be empty"):
            self.provider.translate(whitespace_request)
    
    def test_translate_same_language_fails(self):
        """Test that same source and target language raises exception."""
        same_lang_request = TranslationRequest(
            source_text=self.source_text,
            target_language="en"
        )
        
        with pytest.raises(TranslationFailedException, match="Source and target languages cannot be the same"):
            self.provider.translate(same_lang_request)
    
    def test_translate_provider_error(self):
        """Test handling of provider-specific errors."""
        self.provider.set_should_fail(True)
        
        with pytest.raises(TranslationFailedException, match="Translation failed"):
            self.provider.translate(self.request)
    
    def test_translate_long_text_chunking(self):
        """Test translation of long text with chunking."""
        # Create long text that will be chunked
        long_text = "This is a sentence. " * 100  # Much longer than default chunk size
        long_request = TranslationRequest(
            source_text=TextContent(text=long_text, language="en"),
            target_language="es"
        )
        
        result = self.provider.translate(long_request)
        
        assert isinstance(result, TextContent)
        # Should contain multiple translated chunks
        assert "Translated text (en->es)" in result.text
    
    def test_chunk_text_short_text(self):
        """Test text chunking with short text."""
        short_text = "Hello world"
        chunks = self.provider._chunk_text(short_text)
        
        assert len(chunks) == 1
        assert chunks[0] == short_text
    
    def test_chunk_text_long_text(self):
        """Test text chunking with long text."""
        # Create text longer than chunk size
        long_text = "This is a sentence. " * 100
        self.provider.max_chunk_length = 50  # Small chunk size for testing
        
        chunks = self.provider._chunk_text(long_text)
        
        assert len(chunks) > 1
        for chunk in chunks:
            assert len(chunk) <= self.provider.max_chunk_length
    
    def test_split_into_sentences(self):
        """Test sentence splitting."""
        text = "First sentence. Second sentence! Third sentence? Fourth sentence."
        sentences = self.provider._split_into_sentences(text)
        
        assert len(sentences) == 4
        assert "First sentence" in sentences[0]
        assert "Second sentence" in sentences[1]
        assert "Third sentence" in sentences[2]
        assert "Fourth sentence" in sentences[3]
    
    def test_split_into_sentences_no_punctuation(self):
        """Test sentence splitting with no punctuation."""
        text = "Just one long sentence without proper punctuation"
        sentences = self.provider._split_into_sentences(text)
        
        assert len(sentences) == 1
        assert sentences[0] == text
    
    def test_split_long_sentence(self):
        """Test splitting of long sentences by words."""
        long_sentence = "word " * 100  # Very long sentence
        self.provider.max_chunk_length = 20  # Small chunk size
        
        chunks = self.provider._split_long_sentence(long_sentence)
        
        assert len(chunks) > 1
        for chunk in chunks:
            assert len(chunk) <= self.provider.max_chunk_length
    
    def test_split_long_sentence_single_long_word(self):
        """Test splitting with a single very long word."""
        long_word = "a" * 100
        self.provider.max_chunk_length = 20
        
        chunks = self.provider._split_long_sentence(long_word)
        
        assert len(chunks) == 1
        assert chunks[0] == long_word  # Should include the long word as-is
    
    def test_reassemble_chunks(self):
        """Test reassembling translated chunks."""
        chunks = ["First chunk", "Second chunk", "Third chunk"]
        result = self.provider._reassemble_chunks(chunks)
        
        assert result == "First chunk Second chunk Third chunk"
    
    def test_reassemble_chunks_with_empty(self):
        """Test reassembling chunks with empty strings."""
        chunks = ["First chunk", "", "Third chunk", "   "]
        result = self.provider._reassemble_chunks(chunks)
        
        assert result == "First chunk Third chunk"
    
    def test_preprocess_text(self):
        """Test text preprocessing."""
        messy_text = "  Hello    world  \n\n  with   extra   spaces  "
        processed = self.provider._preprocess_text(messy_text)
        
        assert processed == "Hello world with extra spaces"
    
    def test_postprocess_text(self):
        """Test text postprocessing."""
        messy_text = "Hello  world  .  This  is  a  test  !  Another  sentence  ?"
        processed = self.provider._postprocess_text(messy_text)
        
        assert processed == "Hello world. This is a test! Another sentence?"
    
    def test_postprocess_text_sentence_spacing(self):
        """Test postprocessing fixes sentence spacing."""
        text = "First sentence.Second sentence!Third sentence?"
        processed = self.provider._postprocess_text(text)
        
        assert processed == "First sentence. Second sentence! Third sentence?"
    
    def test_handle_provider_error(self):
        """Test provider error handling."""
        original_error = ValueError("Original error")
        
        with pytest.raises(TranslationFailedException) as exc_info:
            self.provider._handle_provider_error(original_error, "testing")
        
        assert "test error during testing: Original error" in str(exc_info.value)
        assert exc_info.value.__cause__ is original_error
    
    def test_handle_provider_error_no_context(self):
        """Test provider error handling without context."""
        original_error = ValueError("Original error")
        
        with pytest.raises(TranslationFailedException) as exc_info:
            self.provider._handle_provider_error(original_error)
        
        assert "test error: Original error" in str(exc_info.value)
        assert exc_info.value.__cause__ is original_error
    
    def test_set_chunk_size(self):
        """Test setting chunk size."""
        self.provider.set_chunk_size(500)
        assert self.provider.max_chunk_length == 500
    
    def test_set_chunk_size_invalid(self):
        """Test setting invalid chunk size."""
        with pytest.raises(ValueError, match="Chunk size must be positive"):
            self.provider.set_chunk_size(0)
        
        with pytest.raises(ValueError, match="Chunk size must be positive"):
            self.provider.set_chunk_size(-1)
    
    def test_get_translation_stats(self):
        """Test getting translation statistics."""
        stats = self.provider.get_translation_stats(self.request)
        
        assert stats['provider'] == 'test'
        assert stats['source_language'] == 'en'
        assert stats['target_language'] == 'es'
        assert stats['text_length'] == len(self.request.source_text.text)
        assert stats['word_count'] == len(self.request.source_text.text.split())
        assert stats['chunk_count'] >= 1
        assert 'max_chunk_length' in stats
        assert 'avg_chunk_length' in stats
    
    def test_get_translation_stats_empty_text(self):
        """Test getting translation statistics for empty text."""
        empty_request = TranslationRequest(
            source_text=TextContent(text="", language="en"),
            target_language="es"
        )
        
        stats = self.provider.get_translation_stats(empty_request)
        
        assert stats['text_length'] == 0
        assert stats['word_count'] == 0
        assert stats['chunk_count'] == 0
        assert stats['max_chunk_length'] == 0
        assert stats['avg_chunk_length'] == 0
    
    def test_abstract_methods_not_implemented(self):
        """Test that abstract methods raise NotImplementedError."""
        # Create instance of base class directly (should fail)
        with pytest.raises(TypeError):
            TranslationProviderBase("test")
    
    def test_provider_unavailable(self):
        """Test behavior when provider is unavailable."""
        provider = ConcreteTranslationProvider(available=False)
        assert provider.is_available() is False
    
    def test_no_supported_languages(self):
        """Test behavior when no languages are supported."""
        provider = ConcreteTranslationProvider(supported_languages={})
        assert provider.get_supported_languages() == {}