Michael Hu commited on
Commit
271b76a
·
1 Parent(s): 93dc283

Create integration tests for the complete pipeline

Browse files
tests/integration/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Integration tests package
tests/integration/test_audio_processing_pipeline.py ADDED
@@ -0,0 +1,452 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Integration tests for the complete audio processing pipeline."""
2
+
3
+ import os
4
+ import tempfile
5
+ import time
6
+ import pytest
7
+ from pathlib import Path
8
+ from unittest.mock import Mock, patch, MagicMock
9
+ from typing import Dict, Any, Optional
10
+
11
+ from src.application.services.audio_processing_service import AudioProcessingApplicationService
12
+ from src.application.dtos.audio_upload_dto import AudioUploadDto
13
+ from src.application.dtos.processing_request_dto import ProcessingRequestDto
14
+ from src.application.dtos.processing_result_dto import ProcessingResultDto
15
+ from src.infrastructure.config.dependency_container import DependencyContainer
16
+ from src.infrastructure.config.app_config import AppConfig
17
+ from src.domain.models.audio_content import AudioContent
18
+ from src.domain.models.text_content import TextContent
19
+ from src.domain.models.voice_settings import VoiceSettings
20
+ from src.domain.exceptions import (
21
+ SpeechRecognitionException,
22
+ TranslationFailedException,
23
+ SpeechSynthesisException
24
+ )
25
+
26
+
27
+ class TestAudioProcessingPipeline:
28
+ """Integration tests for the complete audio processing pipeline."""
29
+
30
+ @pytest.fixture
31
+ def temp_dir(self):
32
+ """Create temporary directory for test files."""
33
+ with tempfile.TemporaryDirectory() as temp_dir:
34
+ yield temp_dir
35
+
36
+ @pytest.fixture
37
+ def mock_config(self, temp_dir):
38
+ """Create mock configuration for testing."""
39
+ config = Mock(spec=AppConfig)
40
+
41
+ # Processing configuration
42
+ config.get_processing_config.return_value = {
43
+ 'max_file_size_mb': 50,
44
+ 'supported_audio_formats': ['wav', 'mp3', 'flac'],
45
+ 'temp_dir': temp_dir,
46
+ 'cleanup_temp_files': True
47
+ }
48
+
49
+ # Logging configuration
50
+ config.get_logging_config.return_value = {
51
+ 'level': 'INFO',
52
+ 'enable_file_logging': False,
53
+ 'log_file_path': os.path.join(temp_dir, 'test.log'),
54
+ 'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
55
+ }
56
+
57
+ # STT configuration
58
+ config.get_stt_config.return_value = {
59
+ 'preferred_providers': ['whisper-small', 'whisper-medium', 'parakeet']
60
+ }
61
+
62
+ # TTS configuration
63
+ config.get_tts_config.return_value = {
64
+ 'preferred_providers': ['kokoro', 'dia', 'cosyvoice2', 'dummy']
65
+ }
66
+
67
+ return config
68
+
69
+ @pytest.fixture
70
+ def mock_container(self, mock_config):
71
+ """Create mock dependency container for testing."""
72
+ container = Mock(spec=DependencyContainer)
73
+ container.resolve.return_value = mock_config
74
+
75
+ # Mock STT provider
76
+ mock_stt_provider = Mock()
77
+ mock_stt_provider.transcribe.return_value = TextContent(
78
+ text="Hello, this is a test transcription.",
79
+ language="en"
80
+ )
81
+ container.get_stt_provider.return_value = mock_stt_provider
82
+
83
+ # Mock translation provider
84
+ mock_translation_provider = Mock()
85
+ mock_translation_provider.translate.return_value = TextContent(
86
+ text="Hola, esta es una transcripción de prueba.",
87
+ language="es"
88
+ )
89
+ container.get_translation_provider.return_value = mock_translation_provider
90
+
91
+ # Mock TTS provider
92
+ mock_tts_provider = Mock()
93
+ mock_audio_content = AudioContent(
94
+ data=b"fake_audio_data",
95
+ format="wav",
96
+ sample_rate=22050,
97
+ duration=2.5
98
+ )
99
+ mock_tts_provider.synthesize.return_value = mock_audio_content
100
+ container.get_tts_provider.return_value = mock_tts_provider
101
+
102
+ return container
103
+
104
+ @pytest.fixture
105
+ def audio_service(self, mock_container, mock_config):
106
+ """Create audio processing service for testing."""
107
+ return AudioProcessingApplicationService(mock_container, mock_config)
108
+
109
+ @pytest.fixture
110
+ def sample_audio_upload(self):
111
+ """Create sample audio upload DTO."""
112
+ return AudioUploadDto(
113
+ filename="test_audio.wav",
114
+ content=b"fake_wav_audio_data",
115
+ content_type="audio/wav",
116
+ size=1024
117
+ )
118
+
119
+ @pytest.fixture
120
+ def sample_processing_request(self, sample_audio_upload):
121
+ """Create sample processing request DTO."""
122
+ return ProcessingRequestDto(
123
+ audio=sample_audio_upload,
124
+ asr_model="whisper-small",
125
+ target_language="es",
126
+ source_language="en",
127
+ voice="kokoro",
128
+ speed=1.0,
129
+ requires_translation=True
130
+ )
131
+
132
+ def test_complete_pipeline_success(self, audio_service, sample_processing_request):
133
+ """Test successful execution of the complete audio processing pipeline."""
134
+ # Execute the pipeline
135
+ result = audio_service.process_audio_pipeline(sample_processing_request)
136
+
137
+ # Verify successful result
138
+ assert isinstance(result, ProcessingResultDto)
139
+ assert result.success is True
140
+ assert result.error_message is None
141
+ assert result.original_text == "Hello, this is a test transcription."
142
+ assert result.translated_text == "Hola, esta es una transcripción de prueba."
143
+ assert result.audio_path is not None
144
+ assert result.processing_time > 0
145
+ assert result.metadata is not None
146
+ assert 'correlation_id' in result.metadata
147
+
148
+ def test_pipeline_without_translation(self, audio_service, sample_audio_upload):
149
+ """Test pipeline execution without translation (same language)."""
150
+ request = ProcessingRequestDto(
151
+ audio=sample_audio_upload,
152
+ asr_model="whisper-small",
153
+ target_language="en",
154
+ source_language="en",
155
+ voice="kokoro",
156
+ speed=1.0,
157
+ requires_translation=False
158
+ )
159
+
160
+ result = audio_service.process_audio_pipeline(request)
161
+
162
+ assert result.success is True
163
+ assert result.original_text == "Hello, this is a test transcription."
164
+ assert result.translated_text is None # No translation performed
165
+ assert result.audio_path is not None
166
+
167
+ def test_pipeline_with_different_voice_settings(self, audio_service, sample_audio_upload):
168
+ """Test pipeline with different voice settings."""
169
+ request = ProcessingRequestDto(
170
+ audio=sample_audio_upload,
171
+ asr_model="whisper-medium",
172
+ target_language="fr",
173
+ source_language="en",
174
+ voice="dia",
175
+ speed=1.5,
176
+ requires_translation=True
177
+ )
178
+
179
+ result = audio_service.process_audio_pipeline(request)
180
+
181
+ assert result.success is True
182
+ assert result.metadata['voice'] == "dia"
183
+ assert result.metadata['speed'] == 1.5
184
+ assert result.metadata['asr_model'] == "whisper-medium"
185
+
186
+ def test_pipeline_performance_metrics(self, audio_service, sample_processing_request):
187
+ """Test that pipeline captures performance metrics."""
188
+ start_time = time.time()
189
+ result = audio_service.process_audio_pipeline(sample_processing_request)
190
+ end_time = time.time()
191
+
192
+ assert result.success is True
193
+ assert result.processing_time > 0
194
+ assert result.processing_time <= (end_time - start_time) + 0.1 # Allow small margin
195
+ assert 'correlation_id' in result.metadata
196
+
197
+ def test_pipeline_with_large_file(self, audio_service, mock_config):
198
+ """Test pipeline behavior with large audio files."""
199
+ # Create large audio upload
200
+ large_audio = AudioUploadDto(
201
+ filename="large_audio.wav",
202
+ content=b"x" * (10 * 1024 * 1024), # 10MB
203
+ content_type="audio/wav",
204
+ size=10 * 1024 * 1024
205
+ )
206
+
207
+ request = ProcessingRequestDto(
208
+ audio=large_audio,
209
+ asr_model="whisper-small",
210
+ target_language="es",
211
+ voice="kokoro",
212
+ speed=1.0,
213
+ requires_translation=True
214
+ )
215
+
216
+ result = audio_service.process_audio_pipeline(request)
217
+
218
+ assert result.success is True
219
+ assert result.metadata['file_size'] == 10 * 1024 * 1024
220
+
221
+ def test_pipeline_file_cleanup(self, audio_service, sample_processing_request, temp_dir):
222
+ """Test that temporary files are properly cleaned up."""
223
+ # Count files before processing
224
+ files_before = len(list(Path(temp_dir).rglob("*")))
225
+
226
+ result = audio_service.process_audio_pipeline(sample_processing_request)
227
+
228
+ # Verify processing succeeded
229
+ assert result.success is True
230
+
231
+ # Verify cleanup occurred (no additional temp files)
232
+ files_after = len(list(Path(temp_dir).rglob("*")))
233
+ assert files_after <= files_before + 1 # Allow for output file
234
+
235
+ def test_pipeline_correlation_id_tracking(self, audio_service, sample_processing_request):
236
+ """Test that correlation IDs are properly tracked throughout the pipeline."""
237
+ result = audio_service.process_audio_pipeline(sample_processing_request)
238
+
239
+ assert result.success is True
240
+ assert 'correlation_id' in result.metadata
241
+
242
+ correlation_id = result.metadata['correlation_id']
243
+ assert isinstance(correlation_id, str)
244
+ assert len(correlation_id) > 0
245
+
246
+ # Verify correlation ID is used in status tracking
247
+ status = audio_service.get_processing_status(correlation_id)
248
+ assert status['correlation_id'] == correlation_id
249
+
250
+ def test_pipeline_metadata_completeness(self, audio_service, sample_processing_request):
251
+ """Test that pipeline result contains complete metadata."""
252
+ result = audio_service.process_audio_pipeline(sample_processing_request)
253
+
254
+ assert result.success is True
255
+ assert result.metadata is not None
256
+
257
+ expected_metadata_keys = [
258
+ 'correlation_id', 'asr_model', 'target_language',
259
+ 'voice', 'speed', 'translation_required'
260
+ ]
261
+
262
+ for key in expected_metadata_keys:
263
+ assert key in result.metadata
264
+
265
+ def test_pipeline_supported_configurations(self, audio_service):
266
+ """Test retrieval of supported pipeline configurations."""
267
+ config = audio_service.get_supported_configurations()
268
+
269
+ assert 'asr_models' in config
270
+ assert 'voices' in config
271
+ assert 'languages' in config
272
+ assert 'audio_formats' in config
273
+ assert 'max_file_size_mb' in config
274
+ assert 'speed_range' in config
275
+
276
+ assert isinstance(config['asr_models'], list)
277
+ assert isinstance(config['voices'], list)
278
+ assert isinstance(config['languages'], list)
279
+ assert len(config['asr_models']) > 0
280
+ assert len(config['voices']) > 0
281
+
282
+ def test_pipeline_context_manager(self, mock_container, mock_config):
283
+ """Test audio service as context manager."""
284
+ with AudioProcessingApplicationService(mock_container, mock_config) as service:
285
+ assert service is not None
286
+
287
+ # Service should be usable within context
288
+ config = service.get_supported_configurations()
289
+ assert config is not None
290
+
291
+ def test_pipeline_multiple_requests(self, audio_service, sample_audio_upload):
292
+ """Test processing multiple requests in sequence."""
293
+ requests = []
294
+ for i in range(3):
295
+ request = ProcessingRequestDto(
296
+ audio=sample_audio_upload,
297
+ asr_model="whisper-small",
298
+ target_language="es",
299
+ voice="kokoro",
300
+ speed=1.0,
301
+ requires_translation=True
302
+ )
303
+ requests.append(request)
304
+
305
+ results = []
306
+ for request in requests:
307
+ result = audio_service.process_audio_pipeline(request)
308
+ results.append(result)
309
+
310
+ # Verify all requests succeeded
311
+ for result in results:
312
+ assert result.success is True
313
+ assert result.original_text is not None
314
+ assert result.translated_text is not None
315
+
316
+ # Verify each request has unique correlation ID
317
+ correlation_ids = [r.metadata['correlation_id'] for r in results]
318
+ assert len(set(correlation_ids)) == 3 # All unique
319
+
320
+ def test_pipeline_concurrent_processing(self, audio_service, sample_processing_request):
321
+ """Test pipeline behavior under concurrent processing."""
322
+ import threading
323
+ import queue
324
+
325
+ results_queue = queue.Queue()
326
+
327
+ def process_request():
328
+ try:
329
+ result = audio_service.process_audio_pipeline(sample_processing_request)
330
+ results_queue.put(result)
331
+ except Exception as e:
332
+ results_queue.put(e)
333
+
334
+ # Start multiple threads
335
+ threads = []
336
+ for _ in range(3):
337
+ thread = threading.Thread(target=process_request)
338
+ threads.append(thread)
339
+ thread.start()
340
+
341
+ # Wait for completion
342
+ for thread in threads:
343
+ thread.join()
344
+
345
+ # Verify all results
346
+ results = []
347
+ while not results_queue.empty():
348
+ result = results_queue.get()
349
+ if isinstance(result, Exception):
350
+ pytest.fail(f"Concurrent processing failed: {result}")
351
+ results.append(result)
352
+
353
+ assert len(results) == 3
354
+ for result in results:
355
+ assert result.success is True
356
+
357
+ def test_pipeline_memory_usage(self, audio_service, sample_processing_request):
358
+ """Test pipeline memory usage and cleanup."""
359
+ import psutil
360
+ import os
361
+
362
+ process = psutil.Process(os.getpid())
363
+ memory_before = process.memory_info().rss
364
+
365
+ # Process multiple requests
366
+ for _ in range(5):
367
+ result = audio_service.process_audio_pipeline(sample_processing_request)
368
+ assert result.success is True
369
+
370
+ memory_after = process.memory_info().rss
371
+ memory_increase = memory_after - memory_before
372
+
373
+ # Memory increase should be reasonable (less than 50MB for test data)
374
+ assert memory_increase < 50 * 1024 * 1024
375
+
376
+ def test_pipeline_with_streaming_synthesis(self, audio_service, sample_processing_request, mock_container):
377
+ """Test pipeline with streaming TTS synthesis."""
378
+ # Mock streaming TTS provider
379
+ mock_tts_provider = mock_container.get_tts_provider.return_value
380
+
381
+ def mock_stream():
382
+ for i in range(3):
383
+ yield AudioContent(
384
+ data=f"chunk_{i}".encode(),
385
+ format="wav",
386
+ sample_rate=22050,
387
+ duration=0.5
388
+ )
389
+
390
+ mock_tts_provider.synthesize_stream.return_value = mock_stream()
391
+
392
+ result = audio_service.process_audio_pipeline(sample_processing_request)
393
+
394
+ assert result.success is True
395
+ assert result.audio_path is not None
396
+
397
+ def test_pipeline_configuration_validation(self, audio_service):
398
+ """Test pipeline configuration validation."""
399
+ config = audio_service.get_supported_configurations()
400
+
401
+ # Verify configuration structure
402
+ assert isinstance(config['asr_models'], list)
403
+ assert isinstance(config['voices'], list)
404
+ assert isinstance(config['languages'], list)
405
+ assert isinstance(config['audio_formats'], list)
406
+ assert isinstance(config['max_file_size_mb'], (int, float))
407
+ assert isinstance(config['speed_range'], dict)
408
+
409
+ # Verify speed range
410
+ speed_range = config['speed_range']
411
+ assert 'min' in speed_range
412
+ assert 'max' in speed_range
413
+ assert speed_range['min'] < speed_range['max']
414
+ assert speed_range['min'] > 0
415
+ assert speed_range['max'] <= 3.0
416
+
417
+ def test_pipeline_error_recovery_logging(self, audio_service, sample_processing_request, mock_container):
418
+ """Test that error recovery attempts are properly logged."""
419
+ # Mock STT provider to fail first time, succeed second time
420
+ mock_stt_provider = mock_container.get_stt_provider.return_value
421
+ mock_stt_provider.transcribe.side_effect = [
422
+ SpeechRecognitionException("First attempt failed"),
423
+ TextContent(text="Recovered transcription", language="en")
424
+ ]
425
+
426
+ with patch('src.application.services.audio_processing_service.logger') as mock_logger:
427
+ result = audio_service.process_audio_pipeline(sample_processing_request)
428
+
429
+ assert result.success is True
430
+ # Verify error and recovery were logged
431
+ mock_logger.warning.assert_called()
432
+ mock_logger.info.assert_called()
433
+
434
+ def test_pipeline_end_to_end_timing(self, audio_service, sample_processing_request):
435
+ """Test end-to-end pipeline timing and performance."""
436
+ start_time = time.time()
437
+ result = audio_service.process_audio_pipeline(sample_processing_request)
438
+ end_time = time.time()
439
+
440
+ total_time = end_time - start_time
441
+
442
+ assert result.success is True
443
+ assert result.processing_time > 0
444
+ assert result.processing_time <= total_time
445
+
446
+ # For mock providers, processing should be fast
447
+ assert total_time < 5.0 # Should complete within 5 seconds
448
+
449
+ # Verify timing metadata
450
+ assert 'correlation_id' in result.metadata
451
+ timing_info = result.metadata
452
+ assert timing_info is not None
tests/integration/test_file_handling.py ADDED
@@ -0,0 +1,580 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Integration tests for file handling and cleanup."""
2
+
3
+ import os
4
+ import tempfile
5
+ import shutil
6
+ import time
7
+ import pytest
8
+ from pathlib import Path
9
+ from unittest.mock import Mock, patch, MagicMock
10
+ from typing import List, Dict, Any
11
+
12
+ from src.application.services.audio_processing_service import AudioProcessingApplicationService
13
+ from src.application.dtos.audio_upload_dto import AudioUploadDto
14
+ from src.application.dtos.processing_request_dto import ProcessingRequestDto
15
+ from src.infrastructure.config.dependency_container import DependencyContainer
16
+ from src.infrastructure.config.app_config import AppConfig
17
+ from src.domain.models.audio_content import AudioContent
18
+ from src.domain.models.text_content import TextContent
19
+
20
+
21
+ class TestFileHandling:
22
+ """Integration tests for file handling and cleanup."""
23
+
24
+ @pytest.fixture
25
+ def temp_base_dir(self):
26
+ """Create base temporary directory for all tests."""
27
+ with tempfile.TemporaryDirectory() as temp_dir:
28
+ yield temp_dir
29
+
30
+ @pytest.fixture
31
+ def mock_config(self, temp_base_dir):
32
+ """Create mock configuration with temporary directories."""
33
+ config = Mock(spec=AppConfig)
34
+
35
+ # Processing configuration with temp directory
36
+ config.get_processing_config.return_value = {
37
+ 'max_file_size_mb': 50,
38
+ 'supported_audio_formats': ['wav', 'mp3', 'flac', 'ogg'],
39
+ 'temp_dir': temp_base_dir,
40
+ 'cleanup_temp_files': True,
41
+ 'max_temp_file_age_hours': 24,
42
+ 'temp_file_prefix': 'audio_processing_'
43
+ }
44
+
45
+ # Logging configuration
46
+ config.get_logging_config.return_value = {
47
+ 'level': 'INFO',
48
+ 'enable_file_logging': True,
49
+ 'log_file_path': os.path.join(temp_base_dir, 'processing.log'),
50
+ 'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
51
+ }
52
+
53
+ # STT configuration
54
+ config.get_stt_config.return_value = {
55
+ 'preferred_providers': ['whisper-small']
56
+ }
57
+
58
+ # TTS configuration
59
+ config.get_tts_config.return_value = {
60
+ 'preferred_providers': ['dummy']
61
+ }
62
+
63
+ return config
64
+
65
+ @pytest.fixture
66
+ def mock_container(self, mock_config):
67
+ """Create mock dependency container."""
68
+ container = Mock(spec=DependencyContainer)
69
+ container.resolve.return_value = mock_config
70
+
71
+ # Mock providers
72
+ mock_stt_provider = Mock()
73
+ mock_stt_provider.transcribe.return_value = TextContent(
74
+ text="Test transcription",
75
+ language="en"
76
+ )
77
+ container.get_stt_provider.return_value = mock_stt_provider
78
+
79
+ mock_translation_provider = Mock()
80
+ mock_translation_provider.translate.return_value = TextContent(
81
+ text="Prueba de transcripción",
82
+ language="es"
83
+ )
84
+ container.get_translation_provider.return_value = mock_translation_provider
85
+
86
+ mock_tts_provider = Mock()
87
+ mock_tts_provider.synthesize.return_value = AudioContent(
88
+ data=b"synthesized_audio_data",
89
+ format="wav",
90
+ sample_rate=22050,
91
+ duration=2.0
92
+ )
93
+ container.get_tts_provider.return_value = mock_tts_provider
94
+
95
+ return container
96
+
97
+ @pytest.fixture
98
+ def audio_service(self, mock_container, mock_config):
99
+ """Create audio processing service."""
100
+ return AudioProcessingApplicationService(mock_container, mock_config)
101
+
102
+ @pytest.fixture
103
+ def sample_audio_files(self, temp_base_dir):
104
+ """Create sample audio files for testing."""
105
+ files = {}
106
+
107
+ # Create different audio file types
108
+ audio_formats = {
109
+ 'wav': b'RIFF\x24\x00\x00\x00WAVEfmt \x10\x00\x00\x00',
110
+ 'mp3': b'\xff\xfb\x90\x00\x00\x00\x00\x00\x00\x00\x00\x00',
111
+ 'flac': b'fLaC\x00\x00\x00\x22\x10\x00\x10\x00',
112
+ 'ogg': b'OggS\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00'
113
+ }
114
+
115
+ for format_name, header in audio_formats.items():
116
+ file_path = os.path.join(temp_base_dir, f'test_audio.{format_name}')
117
+ with open(file_path, 'wb') as f:
118
+ f.write(header + b'\x00' * 1000) # Add some padding
119
+ files[format_name] = file_path
120
+
121
+ yield files
122
+
123
+ # Cleanup
124
+ for file_path in files.values():
125
+ if os.path.exists(file_path):
126
+ os.remove(file_path)
127
+
128
+ def test_temp_directory_creation(self, audio_service, temp_base_dir):
129
+ """Test temporary directory creation and structure."""
130
+ # Create a processing request to trigger temp directory creation
131
+ audio_upload = AudioUploadDto(
132
+ filename="test.wav",
133
+ content=b"fake_audio_data",
134
+ content_type="audio/wav",
135
+ size=len(b"fake_audio_data")
136
+ )
137
+
138
+ request = ProcessingRequestDto(
139
+ audio=audio_upload,
140
+ asr_model="whisper-small",
141
+ target_language="es",
142
+ voice="dummy",
143
+ speed=1.0,
144
+ requires_translation=True
145
+ )
146
+
147
+ # Process and check temp directory creation
148
+ result = audio_service.process_audio_pipeline(request)
149
+
150
+ assert result.success is True
151
+
152
+ # Verify base temp directory exists
153
+ assert os.path.exists(temp_base_dir)
154
+ assert os.path.isdir(temp_base_dir)
155
+
156
+ def test_input_file_handling(self, audio_service, sample_audio_files):
157
+ """Test handling of different input audio file formats."""
158
+ for format_name, file_path in sample_audio_files.items():
159
+ with open(file_path, 'rb') as f:
160
+ content = f.read()
161
+
162
+ audio_upload = AudioUploadDto(
163
+ filename=f"test.{format_name}",
164
+ content=content,
165
+ content_type=f"audio/{format_name}",
166
+ size=len(content)
167
+ )
168
+
169
+ request = ProcessingRequestDto(
170
+ audio=audio_upload,
171
+ asr_model="whisper-small",
172
+ target_language="en",
173
+ voice="dummy",
174
+ speed=1.0,
175
+ requires_translation=False
176
+ )
177
+
178
+ result = audio_service.process_audio_pipeline(request)
179
+
180
+ assert result.success is True, f"Failed to process {format_name} file"
181
+ assert result.audio_path is not None
182
+ assert os.path.exists(result.audio_path)
183
+
184
+ def test_output_file_generation(self, audio_service, temp_base_dir):
185
+ """Test output audio file generation."""
186
+ audio_upload = AudioUploadDto(
187
+ filename="input.wav",
188
+ content=b"input_audio_data",
189
+ content_type="audio/wav",
190
+ size=len(b"input_audio_data")
191
+ )
192
+
193
+ request = ProcessingRequestDto(
194
+ audio=audio_upload,
195
+ asr_model="whisper-small",
196
+ target_language="es",
197
+ voice="dummy",
198
+ speed=1.0,
199
+ requires_translation=True
200
+ )
201
+
202
+ result = audio_service.process_audio_pipeline(request)
203
+
204
+ assert result.success is True
205
+ assert result.audio_path is not None
206
+
207
+ # Verify output file exists and has content
208
+ assert os.path.exists(result.audio_path)
209
+ assert os.path.getsize(result.audio_path) > 0
210
+
211
+ # Verify file is in expected location
212
+ assert temp_base_dir in result.audio_path
213
+
214
+ def test_temp_file_cleanup_success(self, audio_service, temp_base_dir):
215
+ """Test temporary file cleanup after successful processing."""
216
+ initial_files = set(os.listdir(temp_base_dir))
217
+
218
+ audio_upload = AudioUploadDto(
219
+ filename="cleanup_test.wav",
220
+ content=b"cleanup_test_data",
221
+ content_type="audio/wav",
222
+ size=len(b"cleanup_test_data")
223
+ )
224
+
225
+ request = ProcessingRequestDto(
226
+ audio=audio_upload,
227
+ asr_model="whisper-small",
228
+ target_language="es",
229
+ voice="dummy",
230
+ speed=1.0,
231
+ requires_translation=True
232
+ )
233
+
234
+ result = audio_service.process_audio_pipeline(request)
235
+
236
+ assert result.success is True
237
+
238
+ # Check that temporary processing files are cleaned up
239
+ # (output file should remain)
240
+ final_files = set(os.listdir(temp_base_dir))
241
+ new_files = final_files - initial_files
242
+
243
+ # Should only have the output file and possibly log files
244
+ assert len(new_files) <= 2 # output file + possible log file
245
+
246
+ def test_temp_file_cleanup_on_error(self, audio_service, temp_base_dir, mock_container):
247
+ """Test temporary file cleanup when processing fails."""
248
+ # Mock STT provider to fail
249
+ mock_stt_provider = mock_container.get_stt_provider.return_value
250
+ mock_stt_provider.transcribe.side_effect = Exception("STT failed")
251
+
252
+ initial_files = set(os.listdir(temp_base_dir))
253
+
254
+ audio_upload = AudioUploadDto(
255
+ filename="error_test.wav",
256
+ content=b"error_test_data",
257
+ content_type="audio/wav",
258
+ size=len(b"error_test_data")
259
+ )
260
+
261
+ request = ProcessingRequestDto(
262
+ audio=audio_upload,
263
+ asr_model="whisper-small",
264
+ target_language="es",
265
+ voice="dummy",
266
+ speed=1.0,
267
+ requires_translation=True
268
+ )
269
+
270
+ result = audio_service.process_audio_pipeline(request)
271
+
272
+ assert result.success is False
273
+
274
+ # Verify cleanup occurred even on error
275
+ final_files = set(os.listdir(temp_base_dir))
276
+ new_files = final_files - initial_files
277
+
278
+ # Should have minimal new files (possibly just log files)
279
+ assert len(new_files) <= 1
280
+
281
+ def test_large_file_handling(self, audio_service, temp_base_dir):
282
+ """Test handling of large audio files."""
283
+ # Create large audio content (5MB)
284
+ large_content = b"x" * (5 * 1024 * 1024)
285
+
286
+ audio_upload = AudioUploadDto(
287
+ filename="large_file.wav",
288
+ content=large_content,
289
+ content_type="audio/wav",
290
+ size=len(large_content)
291
+ )
292
+
293
+ request = ProcessingRequestDto(
294
+ audio=audio_upload,
295
+ asr_model="whisper-small",
296
+ target_language="es",
297
+ voice="dummy",
298
+ speed=1.0,
299
+ requires_translation=True
300
+ )
301
+
302
+ result = audio_service.process_audio_pipeline(request)
303
+
304
+ assert result.success is True
305
+ assert result.audio_path is not None
306
+ assert os.path.exists(result.audio_path)
307
+
308
+ def test_concurrent_file_handling(self, audio_service, temp_base_dir):
309
+ """Test concurrent file handling and cleanup."""
310
+ import threading
311
+ import queue
312
+
313
+ results_queue = queue.Queue()
314
+
315
+ def process_file(file_id):
316
+ try:
317
+ audio_upload = AudioUploadDto(
318
+ filename=f"concurrent_{file_id}.wav",
319
+ content=f"concurrent_data_{file_id}".encode(),
320
+ content_type="audio/wav",
321
+ size=len(f"concurrent_data_{file_id}".encode())
322
+ )
323
+
324
+ request = ProcessingRequestDto(
325
+ audio=audio_upload,
326
+ asr_model="whisper-small",
327
+ target_language="es",
328
+ voice="dummy",
329
+ speed=1.0,
330
+ requires_translation=True
331
+ )
332
+
333
+ result = audio_service.process_audio_pipeline(request)
334
+ results_queue.put((file_id, result))
335
+ except Exception as e:
336
+ results_queue.put((file_id, e))
337
+
338
+ # Start multiple threads
339
+ threads = []
340
+ for i in range(3):
341
+ thread = threading.Thread(target=process_file, args=(i,))
342
+ threads.append(thread)
343
+ thread.start()
344
+
345
+ # Wait for completion
346
+ for thread in threads:
347
+ thread.join()
348
+
349
+ # Verify results
350
+ results = {}
351
+ while not results_queue.empty():
352
+ file_id, result = results_queue.get()
353
+ if isinstance(result, Exception):
354
+ pytest.fail(f"Concurrent processing failed for file {file_id}: {result}")
355
+ results[file_id] = result
356
+
357
+ assert len(results) == 3
358
+ for file_id, result in results.items():
359
+ assert result.success is True
360
+ assert result.audio_path is not None
361
+ assert os.path.exists(result.audio_path)
362
+
363
+ def test_file_permission_handling(self, audio_service, temp_base_dir):
364
+ """Test file permission handling."""
365
+ audio_upload = AudioUploadDto(
366
+ filename="permission_test.wav",
367
+ content=b"permission_test_data",
368
+ content_type="audio/wav",
369
+ size=len(b"permission_test_data")
370
+ )
371
+
372
+ request = ProcessingRequestDto(
373
+ audio=audio_upload,
374
+ asr_model="whisper-small",
375
+ target_language="es",
376
+ voice="dummy",
377
+ speed=1.0,
378
+ requires_translation=True
379
+ )
380
+
381
+ result = audio_service.process_audio_pipeline(request)
382
+
383
+ assert result.success is True
384
+ assert result.audio_path is not None
385
+
386
+ # Verify file permissions
387
+ file_stat = os.stat(result.audio_path)
388
+ assert file_stat.st_mode & 0o600 # At least owner read/write
389
+
390
+ def test_disk_space_monitoring(self, audio_service, temp_base_dir):
391
+ """Test disk space monitoring during processing."""
392
+ import shutil
393
+
394
+ # Get initial disk space
395
+ initial_space = shutil.disk_usage(temp_base_dir)
396
+
397
+ audio_upload = AudioUploadDto(
398
+ filename="space_test.wav",
399
+ content=b"space_test_data" * 1000, # Larger content
400
+ content_type="audio/wav",
401
+ size=len(b"space_test_data" * 1000)
402
+ )
403
+
404
+ request = ProcessingRequestDto(
405
+ audio=audio_upload,
406
+ asr_model="whisper-small",
407
+ target_language="es",
408
+ voice="dummy",
409
+ speed=1.0,
410
+ requires_translation=True
411
+ )
412
+
413
+ result = audio_service.process_audio_pipeline(request)
414
+
415
+ assert result.success is True
416
+
417
+ # Verify disk space hasn't been exhausted
418
+ final_space = shutil.disk_usage(temp_base_dir)
419
+ assert final_space.free > 0
420
+
421
+ def test_file_naming_conventions(self, audio_service, temp_base_dir):
422
+ """Test file naming conventions and uniqueness."""
423
+ results = []
424
+
425
+ # Process multiple files to test naming
426
+ for i in range(3):
427
+ audio_upload = AudioUploadDto(
428
+ filename=f"naming_test_{i}.wav",
429
+ content=f"naming_test_data_{i}".encode(),
430
+ content_type="audio/wav",
431
+ size=len(f"naming_test_data_{i}".encode())
432
+ )
433
+
434
+ request = ProcessingRequestDto(
435
+ audio=audio_upload,
436
+ asr_model="whisper-small",
437
+ target_language="es",
438
+ voice="dummy",
439
+ speed=1.0,
440
+ requires_translation=True
441
+ )
442
+
443
+ result = audio_service.process_audio_pipeline(request)
444
+ results.append(result)
445
+
446
+ # Verify all results are successful
447
+ for result in results:
448
+ assert result.success is True
449
+ assert result.audio_path is not None
450
+
451
+ # Verify unique file names
452
+ output_paths = [r.audio_path for r in results]
453
+ assert len(set(output_paths)) == len(output_paths) # All unique
454
+
455
+ # Verify naming convention
456
+ for path in output_paths:
457
+ filename = os.path.basename(path)
458
+ assert filename.startswith("output_")
459
+ assert filename.endswith(".wav")
460
+
461
+ def test_file_encoding_handling(self, audio_service, temp_base_dir):
462
+ """Test handling of different file encodings and special characters."""
463
+ # Test with filename containing special characters
464
+ special_filename = "test_file_ñáéíóú_测试.wav"
465
+
466
+ audio_upload = AudioUploadDto(
467
+ filename=special_filename,
468
+ content=b"encoding_test_data",
469
+ content_type="audio/wav",
470
+ size=len(b"encoding_test_data")
471
+ )
472
+
473
+ request = ProcessingRequestDto(
474
+ audio=audio_upload,
475
+ asr_model="whisper-small",
476
+ target_language="es",
477
+ voice="dummy",
478
+ speed=1.0,
479
+ requires_translation=True
480
+ )
481
+
482
+ result = audio_service.process_audio_pipeline(request)
483
+
484
+ assert result.success is True
485
+ assert result.audio_path is not None
486
+ assert os.path.exists(result.audio_path)
487
+
488
+ def test_file_cleanup_context_manager(self, mock_container, mock_config, temp_base_dir):
489
+ """Test file cleanup using context manager."""
490
+ initial_files = set(os.listdir(temp_base_dir))
491
+
492
+ with AudioProcessingApplicationService(mock_container, mock_config) as service:
493
+ audio_upload = AudioUploadDto(
494
+ filename="context_test.wav",
495
+ content=b"context_test_data",
496
+ content_type="audio/wav",
497
+ size=len(b"context_test_data")
498
+ )
499
+
500
+ request = ProcessingRequestDto(
501
+ audio=audio_upload,
502
+ asr_model="whisper-small",
503
+ target_language="es",
504
+ voice="dummy",
505
+ speed=1.0,
506
+ requires_translation=True
507
+ )
508
+
509
+ result = service.process_audio_pipeline(request)
510
+ assert result.success is True
511
+
512
+ # Verify cleanup occurred when exiting context
513
+ final_files = set(os.listdir(temp_base_dir))
514
+ new_files = final_files - initial_files
515
+
516
+ # Should have minimal new files after context exit
517
+ assert len(new_files) <= 1 # Possibly just log file
518
+
519
+ def test_file_recovery_after_interruption(self, audio_service, temp_base_dir, mock_container):
520
+ """Test file recovery mechanisms after processing interruption."""
521
+ # Mock provider to simulate interruption
522
+ mock_tts_provider = mock_container.get_tts_provider.return_value
523
+ mock_tts_provider.synthesize.side_effect = KeyboardInterrupt("Simulated interruption")
524
+
525
+ audio_upload = AudioUploadDto(
526
+ filename="interruption_test.wav",
527
+ content=b"interruption_test_data",
528
+ content_type="audio/wav",
529
+ size=len(b"interruption_test_data")
530
+ )
531
+
532
+ request = ProcessingRequestDto(
533
+ audio=audio_upload,
534
+ asr_model="whisper-small",
535
+ target_language="es",
536
+ voice="dummy",
537
+ speed=1.0,
538
+ requires_translation=True
539
+ )
540
+
541
+ # Process should handle interruption gracefully
542
+ with pytest.raises(KeyboardInterrupt):
543
+ audio_service.process_audio_pipeline(request)
544
+
545
+ # Verify cleanup still occurred
546
+ # (In real implementation, this would be handled by signal handlers)
547
+
548
+ def test_file_metadata_preservation(self, audio_service, temp_base_dir):
549
+ """Test preservation of file metadata during processing."""
550
+ original_filename = "metadata_test.wav"
551
+ original_content = b"metadata_test_data"
552
+
553
+ audio_upload = AudioUploadDto(
554
+ filename=original_filename,
555
+ content=original_content,
556
+ content_type="audio/wav",
557
+ size=len(original_content)
558
+ )
559
+
560
+ request = ProcessingRequestDto(
561
+ audio=audio_upload,
562
+ asr_model="whisper-small",
563
+ target_language="es",
564
+ voice="dummy",
565
+ speed=1.0,
566
+ requires_translation=True
567
+ )
568
+
569
+ result = audio_service.process_audio_pipeline(request)
570
+
571
+ assert result.success is True
572
+ assert result.metadata is not None
573
+
574
+ # Verify original filename is preserved in metadata
575
+ correlation_id = result.metadata.get('correlation_id')
576
+ assert correlation_id is not None
577
+
578
+ # Verify output file exists
579
+ assert result.audio_path is not None
580
+ assert os.path.exists(result.audio_path)
tests/integration/test_performance_and_errors.py ADDED
@@ -0,0 +1,550 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Integration tests for performance and error scenario testing."""
2
+
3
+ import time
4
+ import pytest
5
+ import threading
6
+ import queue
7
+ import psutil
8
+ import os
9
+ from unittest.mock import Mock, patch, MagicMock
10
+ from typing import List, Dict, Any, Optional
11
+
12
+ from src.application.services.audio_processing_service import AudioProcessingApplicationService
13
+ from src.application.dtos.audio_upload_dto import AudioUploadDto
14
+ from src.application.dtos.processing_request_dto import ProcessingRequestDto
15
+ from src.application.dtos.processing_result_dto import ProcessingResultDto
16
+ from src.infrastructure.config.dependency_container import DependencyContainer
17
+ from src.infrastructure.config.app_config import AppConfig
18
+ from src.domain.models.audio_content import AudioContent
19
+ from src.domain.models.text_content import TextContent
20
+ from src.domain.exceptions import (
21
+ SpeechRecognitionException,
22
+ TranslationFailedException,
23
+ SpeechSynthesisException,
24
+ AudioProcessingException,
25
+ ProviderNotAvailableException
26
+ )
27
+
28
+
29
+ class TestPerformanceAndErrors:
30
+ """Integration tests for performance and error scenarios."""
31
+
32
+ @pytest.fixture
33
+ def mock_config(self, tmp_path):
34
+ """Create mock configuration for testing."""
35
+ config = Mock(spec=AppConfig)
36
+
37
+ # Processing configuration
38
+ config.get_processing_config.return_value = {
39
+ 'max_file_size_mb': 100,
40
+ 'supported_audio_formats': ['wav', 'mp3', 'flac'],
41
+ 'temp_dir': str(tmp_path),
42
+ 'cleanup_temp_files': True,
43
+ 'processing_timeout': 300, # 5 minutes
44
+ 'max_concurrent_requests': 10
45
+ }
46
+
47
+ # Logging configuration
48
+ config.get_logging_config.return_value = {
49
+ 'level': 'INFO',
50
+ 'enable_file_logging': False,
51
+ 'log_file_path': str(tmp_path / 'test.log'),
52
+ 'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
53
+ }
54
+
55
+ # STT configuration
56
+ config.get_stt_config.return_value = {
57
+ 'preferred_providers': ['whisper-small', 'whisper-medium', 'parakeet'],
58
+ 'provider_timeout': 60.0,
59
+ 'max_retries': 2
60
+ }
61
+
62
+ # TTS configuration
63
+ config.get_tts_config.return_value = {
64
+ 'preferred_providers': ['kokoro', 'dia', 'cosyvoice2', 'dummy'],
65
+ 'provider_timeout': 30.0,
66
+ 'max_retries': 3
67
+ }
68
+
69
+ # Translation configuration
70
+ config.get_translation_config.return_value = {
71
+ 'provider_timeout': 45.0,
72
+ 'max_retries': 2,
73
+ 'chunk_size': 512
74
+ }
75
+
76
+ return config
77
+
78
+ @pytest.fixture
79
+ def mock_container(self, mock_config):
80
+ """Create mock dependency container."""
81
+ container = Mock(spec=DependencyContainer)
82
+ container.resolve.return_value = mock_config
83
+
84
+ # Mock providers with configurable behavior
85
+ self._setup_mock_providers(container)
86
+
87
+ return container
88
+
89
+ def _setup_mock_providers(self, container):
90
+ """Setup mock providers with configurable behavior."""
91
+ # Mock STT provider
92
+ mock_stt_provider = Mock()
93
+ mock_stt_provider.transcribe.return_value = TextContent(
94
+ text="Performance test transcription",
95
+ language="en"
96
+ )
97
+ container.get_stt_provider.return_value = mock_stt_provider
98
+
99
+ # Mock translation provider
100
+ mock_translation_provider = Mock()
101
+ mock_translation_provider.translate.return_value = TextContent(
102
+ text="Transcripción de prueba de rendimiento",
103
+ language="es"
104
+ )
105
+ container.get_translation_provider.return_value = mock_translation_provider
106
+
107
+ # Mock TTS provider
108
+ mock_tts_provider = Mock()
109
+ mock_tts_provider.synthesize.return_value = AudioContent(
110
+ data=b"performance_test_audio_data",
111
+ format="wav",
112
+ sample_rate=22050,
113
+ duration=3.0
114
+ )
115
+ container.get_tts_provider.return_value = mock_tts_provider
116
+
117
+ @pytest.fixture
118
+ def audio_service(self, mock_container, mock_config):
119
+ """Create audio processing service."""
120
+ return AudioProcessingApplicationService(mock_container, mock_config)
121
+
122
+ @pytest.fixture
123
+ def sample_request(self):
124
+ """Create sample processing request."""
125
+ audio_upload = AudioUploadDto(
126
+ filename="performance_test.wav",
127
+ content=b"performance_test_audio_data",
128
+ content_type="audio/wav",
129
+ size=len(b"performance_test_audio_data")
130
+ )
131
+
132
+ return ProcessingRequestDto(
133
+ audio=audio_upload,
134
+ asr_model="whisper-small",
135
+ target_language="es",
136
+ voice="kokoro",
137
+ speed=1.0,
138
+ requires_translation=True
139
+ )
140
+
141
+ def test_processing_time_performance(self, audio_service, sample_request):
142
+ """Test processing time performance benchmarks."""
143
+ # Warm up
144
+ audio_service.process_audio_pipeline(sample_request)
145
+
146
+ # Measure processing time
147
+ start_time = time.time()
148
+ result = audio_service.process_audio_pipeline(sample_request)
149
+ end_time = time.time()
150
+
151
+ processing_time = end_time - start_time
152
+
153
+ assert result.success is True
154
+ assert result.processing_time > 0
155
+ assert result.processing_time <= processing_time + 0.1 # Allow small margin
156
+
157
+ # Performance benchmark: should complete within reasonable time
158
+ assert processing_time < 5.0 # Should complete within 5 seconds for mock providers
159
+
160
+ def test_memory_usage_performance(self, audio_service, sample_request):
161
+ """Test memory usage during processing."""
162
+ process = psutil.Process(os.getpid())
163
+
164
+ # Measure initial memory
165
+ initial_memory = process.memory_info().rss
166
+
167
+ # Process multiple requests
168
+ for _ in range(10):
169
+ result = audio_service.process_audio_pipeline(sample_request)
170
+ assert result.success is True
171
+
172
+ # Measure final memory
173
+ final_memory = process.memory_info().rss
174
+ memory_increase = final_memory - initial_memory
175
+
176
+ # Memory increase should be reasonable (less than 100MB for test data)
177
+ assert memory_increase < 100 * 1024 * 1024
178
+
179
+ def test_concurrent_processing_performance(self, audio_service, sample_request):
180
+ """Test performance under concurrent load."""
181
+ num_threads = 5
182
+ results_queue = queue.Queue()
183
+
184
+ def process_request():
185
+ try:
186
+ start_time = time.time()
187
+ result = audio_service.process_audio_pipeline(sample_request)
188
+ end_time = time.time()
189
+ results_queue.put((result, end_time - start_time))
190
+ except Exception as e:
191
+ results_queue.put(e)
192
+
193
+ # Start concurrent processing
194
+ threads = []
195
+ start_time = time.time()
196
+
197
+ for _ in range(num_threads):
198
+ thread = threading.Thread(target=process_request)
199
+ threads.append(thread)
200
+ thread.start()
201
+
202
+ # Wait for completion
203
+ for thread in threads:
204
+ thread.join()
205
+
206
+ total_time = time.time() - start_time
207
+
208
+ # Collect results
209
+ results = []
210
+ processing_times = []
211
+
212
+ while not results_queue.empty():
213
+ item = results_queue.get()
214
+ if isinstance(item, Exception):
215
+ pytest.fail(f"Concurrent processing failed: {item}")
216
+ result, proc_time = item
217
+ results.append(result)
218
+ processing_times.append(proc_time)
219
+
220
+ # Verify all succeeded
221
+ assert len(results) == num_threads
222
+ for result in results:
223
+ assert result.success is True
224
+
225
+ # Performance checks
226
+ avg_processing_time = sum(processing_times) / len(processing_times)
227
+ assert avg_processing_time < 10.0 # Average should be reasonable
228
+ assert total_time < 15.0 # Total concurrent time should be reasonable
229
+
230
+ def test_large_file_performance(self, audio_service):
231
+ """Test performance with large audio files."""
232
+ # Create large audio file (10MB)
233
+ large_content = b"x" * (10 * 1024 * 1024)
234
+
235
+ audio_upload = AudioUploadDto(
236
+ filename="large_performance_test.wav",
237
+ content=large_content,
238
+ content_type="audio/wav",
239
+ size=len(large_content)
240
+ )
241
+
242
+ request = ProcessingRequestDto(
243
+ audio=audio_upload,
244
+ asr_model="whisper-small",
245
+ target_language="es",
246
+ voice="kokoro",
247
+ speed=1.0,
248
+ requires_translation=True
249
+ )
250
+
251
+ start_time = time.time()
252
+ result = audio_service.process_audio_pipeline(request)
253
+ end_time = time.time()
254
+
255
+ processing_time = end_time - start_time
256
+
257
+ assert result.success is True
258
+ # Large files should still complete within reasonable time
259
+ assert processing_time < 30.0
260
+
261
+ def test_stt_provider_failure_recovery(self, audio_service, sample_request, mock_container):
262
+ """Test recovery from STT provider failures."""
263
+ mock_stt_provider = mock_container.get_stt_provider.return_value
264
+
265
+ # Mock first call to fail, second to succeed
266
+ mock_stt_provider.transcribe.side_effect = [
267
+ SpeechRecognitionException("STT provider temporarily unavailable"),
268
+ TextContent(text="Recovered transcription", language="en")
269
+ ]
270
+
271
+ result = audio_service.process_audio_pipeline(sample_request)
272
+
273
+ assert result.success is True
274
+ assert "Recovered transcription" in result.original_text
275
+
276
+ def test_translation_provider_failure_recovery(self, audio_service, sample_request, mock_container):
277
+ """Test recovery from translation provider failures."""
278
+ mock_translation_provider = mock_container.get_translation_provider.return_value
279
+
280
+ # Mock first call to fail, second to succeed
281
+ mock_translation_provider.translate.side_effect = [
282
+ TranslationFailedException("Translation service temporarily unavailable"),
283
+ TextContent(text="Traducción recuperada", language="es")
284
+ ]
285
+
286
+ result = audio_service.process_audio_pipeline(sample_request)
287
+
288
+ assert result.success is True
289
+ assert "Traducción recuperada" in result.translated_text
290
+
291
+ def test_tts_provider_failure_recovery(self, audio_service, sample_request, mock_container):
292
+ """Test recovery from TTS provider failures."""
293
+ mock_tts_provider = mock_container.get_tts_provider.return_value
294
+
295
+ # Mock first call to fail, second to succeed
296
+ mock_tts_provider.synthesize.side_effect = [
297
+ SpeechSynthesisException("TTS provider temporarily unavailable"),
298
+ AudioContent(
299
+ data=b"recovered_audio_data",
300
+ format="wav",
301
+ sample_rate=22050,
302
+ duration=2.5
303
+ )
304
+ ]
305
+
306
+ result = audio_service.process_audio_pipeline(sample_request)
307
+
308
+ assert result.success is True
309
+ assert result.audio_path is not None
310
+
311
+ def test_multiple_provider_failures(self, audio_service, sample_request, mock_container):
312
+ """Test handling of multiple provider failures."""
313
+ # Mock all providers to fail initially
314
+ mock_stt_provider = mock_container.get_stt_provider.return_value
315
+ mock_translation_provider = mock_container.get_translation_provider.return_value
316
+ mock_tts_provider = mock_container.get_tts_provider.return_value
317
+
318
+ mock_stt_provider.transcribe.side_effect = SpeechRecognitionException("STT failed")
319
+ mock_translation_provider.translate.side_effect = TranslationFailedException("Translation failed")
320
+ mock_tts_provider.synthesize.side_effect = SpeechSynthesisException("TTS failed")
321
+
322
+ result = audio_service.process_audio_pipeline(sample_request)
323
+
324
+ assert result.success is False
325
+ assert result.error_message is not None
326
+ assert result.error_code is not None
327
+
328
+ def test_timeout_handling(self, audio_service, sample_request, mock_container):
329
+ """Test handling of provider timeouts."""
330
+ mock_stt_provider = mock_container.get_stt_provider.return_value
331
+
332
+ def slow_transcribe(*args, **kwargs):
333
+ time.sleep(2.0) # Simulate slow processing
334
+ return TextContent(text="Slow transcription", language="en")
335
+
336
+ mock_stt_provider.transcribe.side_effect = slow_transcribe
337
+
338
+ start_time = time.time()
339
+ result = audio_service.process_audio_pipeline(sample_request)
340
+ end_time = time.time()
341
+
342
+ processing_time = end_time - start_time
343
+
344
+ # Should complete despite slow provider
345
+ assert result.success is True
346
+ assert processing_time >= 2.0 # Should include the delay
347
+
348
+ def test_invalid_input_handling(self, audio_service):
349
+ """Test handling of invalid input data."""
350
+ # Test with invalid audio format
351
+ invalid_audio = AudioUploadDto(
352
+ filename="invalid.xyz",
353
+ content=b"invalid_audio_data",
354
+ content_type="audio/xyz",
355
+ size=len(b"invalid_audio_data")
356
+ )
357
+
358
+ request = ProcessingRequestDto(
359
+ audio=invalid_audio,
360
+ asr_model="whisper-small",
361
+ target_language="es",
362
+ voice="kokoro",
363
+ speed=1.0,
364
+ requires_translation=True
365
+ )
366
+
367
+ result = audio_service.process_audio_pipeline(request)
368
+
369
+ assert result.success is False
370
+ assert result.error_code is not None
371
+ assert "format" in result.error_message.lower() or "unsupported" in result.error_message.lower()
372
+
373
+ def test_oversized_file_handling(self, audio_service, mock_config):
374
+ """Test handling of oversized files."""
375
+ # Mock config to have small file size limit
376
+ mock_config.get_processing_config.return_value['max_file_size_mb'] = 1
377
+
378
+ # Create file larger than limit
379
+ large_content = b"x" * (2 * 1024 * 1024) # 2MB
380
+
381
+ oversized_audio = AudioUploadDto(
382
+ filename="oversized.wav",
383
+ content=large_content,
384
+ content_type="audio/wav",
385
+ size=len(large_content)
386
+ )
387
+
388
+ request = ProcessingRequestDto(
389
+ audio=oversized_audio,
390
+ asr_model="whisper-small",
391
+ target_language="es",
392
+ voice="kokoro",
393
+ speed=1.0,
394
+ requires_translation=True
395
+ )
396
+
397
+ result = audio_service.process_audio_pipeline(request)
398
+
399
+ assert result.success is False
400
+ assert result.error_code is not None
401
+ assert "size" in result.error_message.lower() or "large" in result.error_message.lower()
402
+
403
+ def test_corrupted_audio_handling(self, audio_service):
404
+ """Test handling of corrupted audio data."""
405
+ corrupted_audio = AudioUploadDto(
406
+ filename="corrupted.wav",
407
+ content=b"corrupted_data_not_audio",
408
+ content_type="audio/wav",
409
+ size=len(b"corrupted_data_not_audio")
410
+ )
411
+
412
+ request = ProcessingRequestDto(
413
+ audio=corrupted_audio,
414
+ asr_model="whisper-small",
415
+ target_language="es",
416
+ voice="kokoro",
417
+ speed=1.0,
418
+ requires_translation=True
419
+ )
420
+
421
+ result = audio_service.process_audio_pipeline(request)
422
+
423
+ # Should handle gracefully (success depends on implementation)
424
+ assert result.error_message is None or "audio" in result.error_message.lower()
425
+
426
+ def test_network_error_simulation(self, audio_service, sample_request, mock_container):
427
+ """Test handling of network-related errors."""
428
+ mock_translation_provider = mock_container.get_translation_provider.return_value
429
+
430
+ # Simulate network errors
431
+ mock_translation_provider.translate.side_effect = [
432
+ ConnectionError("Network connection failed"),
433
+ TimeoutError("Request timed out"),
434
+ TextContent(text="Network recovered translation", language="es")
435
+ ]
436
+
437
+ result = audio_service.process_audio_pipeline(sample_request)
438
+
439
+ # Should recover from network errors
440
+ assert result.success is True
441
+ assert "Network recovered translation" in result.translated_text
442
+
443
+ def test_resource_exhaustion_handling(self, audio_service, sample_request):
444
+ """Test handling of resource exhaustion scenarios."""
445
+ # Simulate memory pressure by processing many requests
446
+ results = []
447
+
448
+ for i in range(20): # Process many requests
449
+ result = audio_service.process_audio_pipeline(sample_request)
450
+ results.append(result)
451
+
452
+ # All should succeed despite resource pressure
453
+ assert result.success is True
454
+
455
+ # Verify all completed successfully
456
+ assert len(results) == 20
457
+ for result in results:
458
+ assert result.success is True
459
+
460
+ def test_error_correlation_tracking(self, audio_service, sample_request, mock_container):
461
+ """Test error correlation tracking across pipeline stages."""
462
+ mock_stt_provider = mock_container.get_stt_provider.return_value
463
+ mock_stt_provider.transcribe.side_effect = SpeechRecognitionException("STT correlation test error")
464
+
465
+ result = audio_service.process_audio_pipeline(sample_request)
466
+
467
+ assert result.success is False
468
+ assert result.metadata is not None
469
+ assert 'correlation_id' in result.metadata
470
+
471
+ # Verify correlation ID is consistent
472
+ correlation_id = result.metadata['correlation_id']
473
+ assert isinstance(correlation_id, str)
474
+ assert len(correlation_id) > 0
475
+
476
+ def test_graceful_degradation(self, audio_service, sample_request, mock_container):
477
+ """Test graceful degradation when some features fail."""
478
+ # Mock translation to fail but allow STT and TTS to succeed
479
+ mock_translation_provider = mock_container.get_translation_provider.return_value
480
+ mock_translation_provider.translate.side_effect = TranslationFailedException("Translation unavailable")
481
+
482
+ # Modify request to not require translation
483
+ sample_request.requires_translation = False
484
+ sample_request.target_language = "en" # Same as source
485
+
486
+ result = audio_service.process_audio_pipeline(sample_request)
487
+
488
+ # Should succeed without translation
489
+ assert result.success is True
490
+ assert result.translated_text is None # No translation performed
491
+
492
+ def test_circuit_breaker_behavior(self, audio_service, sample_request, mock_container):
493
+ """Test circuit breaker behavior under repeated failures."""
494
+ mock_tts_provider = mock_container.get_tts_provider.return_value
495
+
496
+ # Mock repeated failures to trigger circuit breaker
497
+ mock_tts_provider.synthesize.side_effect = SpeechSynthesisException("Repeated TTS failure")
498
+
499
+ results = []
500
+ for _ in range(5): # Multiple attempts
501
+ result = audio_service.process_audio_pipeline(sample_request)
502
+ results.append(result)
503
+
504
+ # All should fail, but circuit breaker should prevent excessive retries
505
+ for result in results:
506
+ assert result.success is False
507
+ assert result.error_code is not None
508
+
509
+ def test_performance_metrics_collection(self, audio_service, sample_request):
510
+ """Test collection of performance metrics."""
511
+ result = audio_service.process_audio_pipeline(sample_request)
512
+
513
+ assert result.success is True
514
+ assert result.processing_time > 0
515
+ assert result.metadata is not None
516
+
517
+ # Verify performance-related metadata
518
+ metadata = result.metadata
519
+ assert 'correlation_id' in metadata
520
+ assert 'asr_model' in metadata
521
+ assert 'target_language' in metadata
522
+ assert 'voice' in metadata
523
+
524
+ def test_stress_testing(self, audio_service, sample_request):
525
+ """Test system behavior under stress conditions."""
526
+ num_requests = 50
527
+ results = []
528
+
529
+ start_time = time.time()
530
+
531
+ for i in range(num_requests):
532
+ result = audio_service.process_audio_pipeline(sample_request)
533
+ results.append(result)
534
+
535
+ end_time = time.time()
536
+ total_time = end_time - start_time
537
+
538
+ # Verify all requests completed
539
+ assert len(results) == num_requests
540
+
541
+ # Calculate success rate
542
+ successful_results = [r for r in results if r.success]
543
+ success_rate = len(successful_results) / len(results)
544
+
545
+ # Should maintain high success rate under stress
546
+ assert success_rate >= 0.95 # At least 95% success rate
547
+
548
+ # Performance should remain reasonable
549
+ avg_time_per_request = total_time / num_requests
550
+ assert avg_time_per_request < 1.0 # Average less than 1 second per request
tests/integration/test_provider_integration.py ADDED
@@ -0,0 +1,462 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Integration tests for provider integration and switching."""
2
+
3
+ import pytest
4
+ from unittest.mock import Mock, patch, MagicMock
5
+ from typing import Dict, Any, List
6
+
7
+ from src.infrastructure.config.dependency_container import DependencyContainer
8
+ from src.infrastructure.config.app_config import AppConfig
9
+ from src.infrastructure.tts.provider_factory import TTSProviderFactory
10
+ from src.infrastructure.stt.provider_factory import STTProviderFactory
11
+ from src.infrastructure.translation.provider_factory import TranslationProviderFactory
12
+ from src.domain.models.audio_content import AudioContent
13
+ from src.domain.models.text_content import TextContent
14
+ from src.domain.models.speech_synthesis_request import SpeechSynthesisRequest
15
+ from src.domain.models.translation_request import TranslationRequest
16
+ from src.domain.models.voice_settings import VoiceSettings
17
+ from src.domain.exceptions import (
18
+ SpeechRecognitionException,
19
+ TranslationFailedException,
20
+ SpeechSynthesisException,
21
+ ProviderNotAvailableException
22
+ )
23
+
24
+
25
+ class TestProviderIntegration:
26
+ """Integration tests for provider integration and switching."""
27
+
28
+ @pytest.fixture
29
+ def mock_config(self):
30
+ """Create mock configuration for testing."""
31
+ config = Mock(spec=AppConfig)
32
+
33
+ # TTS configuration
34
+ config.tts.preferred_providers = ['kokoro', 'dia', 'cosyvoice2', 'dummy']
35
+ config.tts.fallback_enabled = True
36
+ config.tts.provider_timeout = 30.0
37
+
38
+ # STT configuration
39
+ config.stt.default_model = 'whisper-small'
40
+ config.stt.fallback_models = ['whisper-medium', 'parakeet']
41
+ config.stt.provider_timeout = 60.0
42
+
43
+ # Translation configuration
44
+ config.translation.default_provider = 'nllb'
45
+ config.translation.fallback_enabled = True
46
+ config.translation.chunk_size = 512
47
+
48
+ return config
49
+
50
+ @pytest.fixture
51
+ def dependency_container(self, mock_config):
52
+ """Create dependency container with mock configuration."""
53
+ container = DependencyContainer(mock_config)
54
+ return container
55
+
56
+ @pytest.fixture
57
+ def sample_audio_content(self):
58
+ """Create sample audio content for testing."""
59
+ return AudioContent(
60
+ data=b"fake_audio_data",
61
+ format="wav",
62
+ sample_rate=16000,
63
+ duration=2.5
64
+ )
65
+
66
+ @pytest.fixture
67
+ def sample_text_content(self):
68
+ """Create sample text content for testing."""
69
+ return TextContent(
70
+ text="Hello, this is a test message.",
71
+ language="en"
72
+ )
73
+
74
+ def test_tts_provider_switching(self, dependency_container, sample_text_content):
75
+ """Test switching between different TTS providers."""
76
+ voice_settings = VoiceSettings(
77
+ voice_id="test_voice",
78
+ speed=1.0,
79
+ language="en"
80
+ )
81
+
82
+ synthesis_request = SpeechSynthesisRequest(
83
+ text=sample_text_content.text,
84
+ voice_settings=voice_settings
85
+ )
86
+
87
+ # Test each TTS provider
88
+ providers_to_test = ['kokoro', 'dia', 'cosyvoice2', 'dummy']
89
+
90
+ for provider_name in providers_to_test:
91
+ with patch(f'src.infrastructure.tts.{provider_name}_provider') as mock_provider_module:
92
+ # Mock the provider class
93
+ mock_provider_class = Mock()
94
+ mock_provider_instance = Mock()
95
+ mock_provider_instance.synthesize.return_value = AudioContent(
96
+ data=f"{provider_name}_audio_data".encode(),
97
+ format="wav",
98
+ sample_rate=22050,
99
+ duration=2.0
100
+ )
101
+ mock_provider_class.return_value = mock_provider_instance
102
+ setattr(mock_provider_module, f'{provider_name.title()}Provider', mock_provider_class)
103
+
104
+ # Get provider from container
105
+ provider = dependency_container.get_tts_provider(provider_name)
106
+
107
+ # Test synthesis
108
+ result = provider.synthesize(synthesis_request)
109
+
110
+ assert isinstance(result, AudioContent)
111
+ assert provider_name.encode() in result.data
112
+ mock_provider_instance.synthesize.assert_called_once()
113
+
114
+ def test_tts_provider_fallback(self, dependency_container, sample_text_content):
115
+ """Test TTS provider fallback mechanism."""
116
+ voice_settings = VoiceSettings(
117
+ voice_id="test_voice",
118
+ speed=1.0,
119
+ language="en"
120
+ )
121
+
122
+ synthesis_request = SpeechSynthesisRequest(
123
+ text=sample_text_content.text,
124
+ voice_settings=voice_settings
125
+ )
126
+
127
+ with patch('src.infrastructure.tts.provider_factory.TTSProviderFactory') as mock_factory_class:
128
+ mock_factory = Mock()
129
+ mock_factory_class.return_value = mock_factory
130
+
131
+ # Mock first provider to fail, second to succeed
132
+ mock_provider1 = Mock()
133
+ mock_provider1.synthesize.side_effect = SpeechSynthesisException("Provider 1 failed")
134
+
135
+ mock_provider2 = Mock()
136
+ mock_provider2.synthesize.return_value = AudioContent(
137
+ data=b"fallback_audio_data",
138
+ format="wav",
139
+ sample_rate=22050,
140
+ duration=2.0
141
+ )
142
+
143
+ mock_factory.get_provider_with_fallback.return_value = mock_provider2
144
+
145
+ # Get provider with fallback
146
+ provider = dependency_container.get_tts_provider()
147
+ result = provider.synthesize(synthesis_request)
148
+
149
+ assert isinstance(result, AudioContent)
150
+ assert b"fallback_audio_data" in result.data
151
+
152
+ def test_stt_provider_switching(self, dependency_container, sample_audio_content):
153
+ """Test switching between different STT providers."""
154
+ providers_to_test = ['whisper-small', 'whisper-medium', 'parakeet']
155
+
156
+ for provider_name in providers_to_test:
157
+ with patch('src.infrastructure.stt.provider_factory.STTProviderFactory') as mock_factory_class:
158
+ mock_factory = Mock()
159
+ mock_factory_class.return_value = mock_factory
160
+
161
+ mock_provider = Mock()
162
+ mock_provider.transcribe.return_value = TextContent(
163
+ text=f"Transcription from {provider_name}",
164
+ language="en"
165
+ )
166
+ mock_factory.create_provider.return_value = mock_provider
167
+
168
+ # Get provider from container
169
+ provider = dependency_container.get_stt_provider(provider_name)
170
+
171
+ # Test transcription
172
+ result = provider.transcribe(sample_audio_content, provider_name)
173
+
174
+ assert isinstance(result, TextContent)
175
+ assert provider_name in result.text
176
+ mock_provider.transcribe.assert_called_once()
177
+
178
+ def test_stt_provider_fallback(self, dependency_container, sample_audio_content):
179
+ """Test STT provider fallback mechanism."""
180
+ with patch('src.infrastructure.stt.provider_factory.STTProviderFactory') as mock_factory_class:
181
+ mock_factory = Mock()
182
+ mock_factory_class.return_value = mock_factory
183
+
184
+ # Mock first provider to fail, fallback to succeed
185
+ mock_provider1 = Mock()
186
+ mock_provider1.transcribe.side_effect = SpeechRecognitionException("Provider 1 failed")
187
+
188
+ mock_provider2 = Mock()
189
+ mock_provider2.transcribe.return_value = TextContent(
190
+ text="Fallback transcription successful",
191
+ language="en"
192
+ )
193
+
194
+ mock_factory.create_provider_with_fallback.return_value = mock_provider2
195
+
196
+ # Get provider with fallback
197
+ provider = dependency_container.get_stt_provider()
198
+ result = provider.transcribe(sample_audio_content, "whisper-small")
199
+
200
+ assert isinstance(result, TextContent)
201
+ assert "Fallback transcription successful" in result.text
202
+
203
+ def test_translation_provider_integration(self, dependency_container):
204
+ """Test translation provider integration."""
205
+ translation_request = TranslationRequest(
206
+ text="Hello, how are you?",
207
+ source_language="en",
208
+ target_language="es"
209
+ )
210
+
211
+ with patch('src.infrastructure.translation.provider_factory.TranslationProviderFactory') as mock_factory_class:
212
+ mock_factory = Mock()
213
+ mock_factory_class.return_value = mock_factory
214
+
215
+ mock_provider = Mock()
216
+ mock_provider.translate.return_value = TextContent(
217
+ text="Hola, ¿cómo estás?",
218
+ language="es"
219
+ )
220
+ mock_factory.get_default_provider.return_value = mock_provider
221
+
222
+ # Get translation provider
223
+ provider = dependency_container.get_translation_provider()
224
+ result = provider.translate(translation_request)
225
+
226
+ assert isinstance(result, TextContent)
227
+ assert result.text == "Hola, ¿cómo estás?"
228
+ assert result.language == "es"
229
+
230
+ def test_provider_availability_checking(self, dependency_container):
231
+ """Test provider availability checking."""
232
+ with patch('src.infrastructure.tts.provider_factory.TTSProviderFactory') as mock_factory_class:
233
+ mock_factory = Mock()
234
+ mock_factory_class.return_value = mock_factory
235
+
236
+ # Mock availability checking
237
+ mock_factory.is_provider_available.side_effect = lambda name: name in ['kokoro', 'dummy']
238
+ mock_factory.get_available_providers.return_value = ['kokoro', 'dummy']
239
+
240
+ # Test availability
241
+ available_providers = mock_factory.get_available_providers()
242
+
243
+ assert 'kokoro' in available_providers
244
+ assert 'dummy' in available_providers
245
+ assert 'dia' not in available_providers # Not available in mock
246
+
247
+ def test_provider_configuration_loading(self, dependency_container, mock_config):
248
+ """Test provider configuration loading and validation."""
249
+ # Test TTS configuration
250
+ tts_provider = dependency_container.get_tts_provider('dummy')
251
+ assert tts_provider is not None
252
+
253
+ # Test STT configuration
254
+ stt_provider = dependency_container.get_stt_provider('whisper-small')
255
+ assert stt_provider is not None
256
+
257
+ # Test translation configuration
258
+ translation_provider = dependency_container.get_translation_provider()
259
+ assert translation_provider is not None
260
+
261
+ def test_provider_error_handling(self, dependency_container, sample_audio_content):
262
+ """Test provider error handling and recovery."""
263
+ with patch('src.infrastructure.stt.provider_factory.STTProviderFactory') as mock_factory_class:
264
+ mock_factory = Mock()
265
+ mock_factory_class.return_value = mock_factory
266
+
267
+ # Mock provider that always fails
268
+ mock_provider = Mock()
269
+ mock_provider.transcribe.side_effect = SpeechRecognitionException("Provider unavailable")
270
+ mock_factory.create_provider.return_value = mock_provider
271
+
272
+ # Test error handling
273
+ provider = dependency_container.get_stt_provider('whisper-small')
274
+
275
+ with pytest.raises(SpeechRecognitionException):
276
+ provider.transcribe(sample_audio_content, 'whisper-small')
277
+
278
+ def test_provider_performance_monitoring(self, dependency_container, sample_text_content):
279
+ """Test provider performance monitoring."""
280
+ import time
281
+
282
+ voice_settings = VoiceSettings(
283
+ voice_id="test_voice",
284
+ speed=1.0,
285
+ language="en"
286
+ )
287
+
288
+ synthesis_request = SpeechSynthesisRequest(
289
+ text=sample_text_content.text,
290
+ voice_settings=voice_settings
291
+ )
292
+
293
+ with patch('src.infrastructure.tts.provider_factory.TTSProviderFactory') as mock_factory_class:
294
+ mock_factory = Mock()
295
+ mock_factory_class.return_value = mock_factory
296
+
297
+ mock_provider = Mock()
298
+
299
+ def slow_synthesize(request):
300
+ time.sleep(0.1) # Simulate processing time
301
+ return AudioContent(
302
+ data=b"slow_audio_data",
303
+ format="wav",
304
+ sample_rate=22050,
305
+ duration=2.0
306
+ )
307
+
308
+ mock_provider.synthesize.side_effect = slow_synthesize
309
+ mock_factory.create_provider.return_value = mock_provider
310
+
311
+ # Measure performance
312
+ start_time = time.time()
313
+ provider = dependency_container.get_tts_provider('dummy')
314
+ result = provider.synthesize(synthesis_request)
315
+ end_time = time.time()
316
+
317
+ processing_time = end_time - start_time
318
+
319
+ assert isinstance(result, AudioContent)
320
+ assert processing_time >= 0.1 # Should take at least the sleep time
321
+
322
+ def test_provider_resource_cleanup(self, dependency_container):
323
+ """Test provider resource cleanup."""
324
+ # Get multiple providers
325
+ tts_provider = dependency_container.get_tts_provider('dummy')
326
+ stt_provider = dependency_container.get_stt_provider('whisper-small')
327
+ translation_provider = dependency_container.get_translation_provider()
328
+
329
+ assert tts_provider is not None
330
+ assert stt_provider is not None
331
+ assert translation_provider is not None
332
+
333
+ # Test cleanup
334
+ dependency_container.cleanup()
335
+
336
+ # Verify cleanup was called (would need to mock the actual providers)
337
+ # This is more of a smoke test to ensure cleanup doesn't crash
338
+
339
+ def test_provider_concurrent_access(self, dependency_container, sample_text_content):
340
+ """Test concurrent access to providers."""
341
+ import threading
342
+ import queue
343
+
344
+ voice_settings = VoiceSettings(
345
+ voice_id="test_voice",
346
+ speed=1.0,
347
+ language="en"
348
+ )
349
+
350
+ synthesis_request = SpeechSynthesisRequest(
351
+ text=sample_text_content.text,
352
+ voice_settings=voice_settings
353
+ )
354
+
355
+ results_queue = queue.Queue()
356
+
357
+ def synthesize_audio():
358
+ try:
359
+ provider = dependency_container.get_tts_provider('dummy')
360
+ with patch.object(provider, 'synthesize') as mock_synthesize:
361
+ mock_synthesize.return_value = AudioContent(
362
+ data=b"concurrent_audio_data",
363
+ format="wav",
364
+ sample_rate=22050,
365
+ duration=2.0
366
+ )
367
+ result = provider.synthesize(synthesis_request)
368
+ results_queue.put(result)
369
+ except Exception as e:
370
+ results_queue.put(e)
371
+
372
+ # Start multiple threads
373
+ threads = []
374
+ for _ in range(3):
375
+ thread = threading.Thread(target=synthesize_audio)
376
+ threads.append(thread)
377
+ thread.start()
378
+
379
+ # Wait for completion
380
+ for thread in threads:
381
+ thread.join()
382
+
383
+ # Verify results
384
+ results = []
385
+ while not results_queue.empty():
386
+ result = results_queue.get()
387
+ if isinstance(result, Exception):
388
+ pytest.fail(f"Concurrent access failed: {result}")
389
+ results.append(result)
390
+
391
+ assert len(results) == 3
392
+ for result in results:
393
+ assert isinstance(result, AudioContent)
394
+
395
+ def test_provider_configuration_updates(self, dependency_container, mock_config):
396
+ """Test dynamic provider configuration updates."""
397
+ # Initial configuration
398
+ initial_providers = mock_config.tts.preferred_providers
399
+ assert 'kokoro' in initial_providers
400
+
401
+ # Update configuration
402
+ mock_config.tts.preferred_providers = ['dia', 'dummy']
403
+
404
+ # Verify configuration update affects provider selection
405
+ # (This would require actual implementation of dynamic config updates)
406
+ updated_providers = mock_config.tts.preferred_providers
407
+ assert 'dia' in updated_providers
408
+ assert 'dummy' in updated_providers
409
+
410
+ def test_provider_health_checking(self, dependency_container):
411
+ """Test provider health checking mechanisms."""
412
+ with patch('src.infrastructure.tts.provider_factory.TTSProviderFactory') as mock_factory_class:
413
+ mock_factory = Mock()
414
+ mock_factory_class.return_value = mock_factory
415
+
416
+ # Mock health check methods
417
+ mock_factory.check_provider_health.return_value = {
418
+ 'kokoro': {'status': 'healthy', 'response_time': 0.1},
419
+ 'dia': {'status': 'unhealthy', 'error': 'Connection timeout'},
420
+ 'dummy': {'status': 'healthy', 'response_time': 0.05}
421
+ }
422
+
423
+ health_status = mock_factory.check_provider_health()
424
+
425
+ assert health_status['kokoro']['status'] == 'healthy'
426
+ assert health_status['dia']['status'] == 'unhealthy'
427
+ assert health_status['dummy']['status'] == 'healthy'
428
+
429
+ def test_provider_load_balancing(self, dependency_container):
430
+ """Test provider load balancing mechanisms."""
431
+ with patch('src.infrastructure.tts.provider_factory.TTSProviderFactory') as mock_factory_class:
432
+ mock_factory = Mock()
433
+ mock_factory_class.return_value = mock_factory
434
+
435
+ # Mock load balancing
436
+ provider_calls = {'kokoro': 0, 'dia': 0, 'dummy': 0}
437
+
438
+ def mock_get_provider(name=None):
439
+ if name is None:
440
+ # Round-robin selection
441
+ providers = ['kokoro', 'dia', 'dummy']
442
+ selected = min(providers, key=lambda p: provider_calls[p])
443
+ provider_calls[selected] += 1
444
+ name = selected
445
+
446
+ mock_provider = Mock()
447
+ mock_provider.name = name
448
+ return mock_provider
449
+
450
+ mock_factory.create_provider.side_effect = mock_get_provider
451
+
452
+ # Get multiple providers to test load balancing
453
+ providers = []
454
+ for _ in range(6):
455
+ provider = mock_factory.create_provider()
456
+ providers.append(provider)
457
+
458
+ # Verify load distribution
459
+ provider_names = [p.name for p in providers]
460
+ assert provider_names.count('kokoro') == 2
461
+ assert provider_names.count('dia') == 2
462
+ assert provider_names.count('dummy') == 2