Michael Hu commited on
Commit
6aea21a
·
1 Parent(s): 8a0c4b0

Implement domain services

Browse files
src/domain/services/__init__.py CHANGED
@@ -1,3 +1,7 @@
1
  """Domain services package."""
2
 
3
- # Services will be added in subsequent tasks
 
 
 
 
 
1
  """Domain services package."""
2
 
3
+ from .audio_processing_service import AudioProcessingService
4
+
5
+ __all__ = [
6
+ 'AudioProcessingService'
7
+ ]
src/domain/services/audio_processing_service.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Concrete implementation of audio processing service."""
2
+
3
+ import time
4
+ from typing import TYPE_CHECKING
5
+
6
+ from ..interfaces.audio_processing import IAudioProcessingService
7
+ from ..interfaces.speech_recognition import ISpeechRecognitionService
8
+ from ..interfaces.translation import ITranslationService
9
+ from ..interfaces.speech_synthesis import ISpeechSynthesisService
10
+ from ..models.processing_result import ProcessingResult
11
+ from ..models.translation_request import TranslationRequest
12
+ from ..models.speech_synthesis_request import SpeechSynthesisRequest
13
+ from ..exceptions import (
14
+ AudioProcessingException,
15
+ SpeechRecognitionException,
16
+ TranslationFailedException,
17
+ SpeechSynthesisException
18
+ )
19
+
20
+ if TYPE_CHECKING:
21
+ from ..models.audio_content import AudioContent
22
+ from ..models.voice_settings import VoiceSettings
23
+
24
+
25
+ class AudioProcessingService(IAudioProcessingService):
26
+ """Concrete implementation of audio processing pipeline orchestration."""
27
+
28
+ def __init__(
29
+ self,
30
+ speech_recognition_service: ISpeechRecognitionService,
31
+ translation_service: ITranslationService,
32
+ speech_synthesis_service: ISpeechSynthesisService
33
+ ):
34
+ """
35
+ Initialize the audio processing service with injected dependencies.
36
+
37
+ Args:
38
+ speech_recognition_service: Service for speech-to-text conversion
39
+ translation_service: Service for text translation
40
+ speech_synthesis_service: Service for text-to-speech synthesis
41
+ """
42
+ self._speech_recognition_service = speech_recognition_service
43
+ self._translation_service = translation_service
44
+ self._speech_synthesis_service = speech_synthesis_service
45
+
46
+ def process_audio_pipeline(
47
+ self,
48
+ audio: 'AudioContent',
49
+ target_language: str,
50
+ voice_settings: 'VoiceSettings'
51
+ ) -> 'ProcessingResult':
52
+ """
53
+ Process audio through the complete pipeline: STT -> Translation -> TTS.
54
+
55
+ Args:
56
+ audio: The input audio content
57
+ target_language: The target language for translation
58
+ voice_settings: Voice settings for TTS synthesis
59
+
60
+ Returns:
61
+ ProcessingResult: The result of the complete processing pipeline
62
+
63
+ Raises:
64
+ AudioProcessingException: If any step in the pipeline fails
65
+ """
66
+ start_time = time.time()
67
+
68
+ try:
69
+ # Validate inputs
70
+ self._validate_pipeline_inputs(audio, target_language, voice_settings)
71
+
72
+ # Step 1: Speech Recognition (STT)
73
+ original_text = self._perform_speech_recognition(audio)
74
+
75
+ # Step 2: Translation
76
+ translated_text = self._perform_translation(original_text, target_language)
77
+
78
+ # Step 3: Speech Synthesis (TTS)
79
+ audio_output = self._perform_speech_synthesis(translated_text, voice_settings)
80
+
81
+ # Calculate processing time
82
+ processing_time = time.time() - start_time
83
+
84
+ # Create successful result
85
+ return ProcessingResult.success_result(
86
+ original_text=original_text,
87
+ translated_text=translated_text,
88
+ audio_output=audio_output,
89
+ processing_time=processing_time
90
+ )
91
+
92
+ except (SpeechRecognitionException, TranslationFailedException, SpeechSynthesisException) as e:
93
+ # Handle domain-specific exceptions
94
+ processing_time = time.time() - start_time
95
+ return ProcessingResult.failure_result(
96
+ error_message=str(e),
97
+ processing_time=processing_time
98
+ )
99
+ except Exception as e:
100
+ # Handle unexpected exceptions
101
+ processing_time = time.time() - start_time
102
+ error_message = f"Unexpected error in audio processing pipeline: {str(e)}"
103
+ return ProcessingResult.failure_result(
104
+ error_message=error_message,
105
+ processing_time=processing_time
106
+ )
107
+
108
+ def _validate_pipeline_inputs(
109
+ self,
110
+ audio: 'AudioContent',
111
+ target_language: str,
112
+ voice_settings: 'VoiceSettings'
113
+ ) -> None:
114
+ """
115
+ Validate inputs for the audio processing pipeline.
116
+
117
+ Args:
118
+ audio: The input audio content
119
+ target_language: The target language for translation
120
+ voice_settings: Voice settings for TTS synthesis
121
+
122
+ Raises:
123
+ AudioProcessingException: If validation fails
124
+ """
125
+ if audio is None:
126
+ raise AudioProcessingException("Audio content cannot be None")
127
+
128
+ if not target_language or not target_language.strip():
129
+ raise AudioProcessingException("Target language cannot be empty")
130
+
131
+ if voice_settings is None:
132
+ raise AudioProcessingException("Voice settings cannot be None")
133
+
134
+ # Validate that voice settings language matches target language
135
+ if voice_settings.language != target_language:
136
+ raise AudioProcessingException(
137
+ f"Voice settings language ({voice_settings.language}) must match "
138
+ f"target language ({target_language})"
139
+ )
140
+
141
+ # Validate audio duration for processing limits
142
+ if audio.duration > 300: # 5 minutes limit
143
+ raise AudioProcessingException(
144
+ f"Audio duration ({audio.duration:.1f}s) exceeds maximum allowed duration (300s)"
145
+ )
146
+
147
+ # Validate audio format is supported
148
+ if not audio.is_valid_format:
149
+ raise AudioProcessingException(f"Unsupported audio format: {audio.format}")
150
+
151
+ def _perform_speech_recognition(self, audio: 'AudioContent') -> 'TextContent':
152
+ """
153
+ Perform speech recognition on the input audio.
154
+
155
+ Args:
156
+ audio: The input audio content
157
+
158
+ Returns:
159
+ TextContent: The transcribed text
160
+
161
+ Raises:
162
+ SpeechRecognitionException: If transcription fails
163
+ """
164
+ try:
165
+ # Use a default STT model - this could be configurable in the future
166
+ model = "whisper-base" # Default model
167
+ return self._speech_recognition_service.transcribe(audio, model)
168
+ except Exception as e:
169
+ raise SpeechRecognitionException(f"Speech recognition failed: {str(e)}")
170
+
171
+ def _perform_translation(self, text: 'TextContent', target_language: str) -> 'TextContent':
172
+ """
173
+ Perform translation of the transcribed text.
174
+
175
+ Args:
176
+ text: The text to translate
177
+ target_language: The target language for translation
178
+
179
+ Returns:
180
+ TextContent: The translated text
181
+
182
+ Raises:
183
+ TranslationFailedException: If translation fails
184
+ """
185
+ try:
186
+ # Check if translation is needed
187
+ if text.language == target_language:
188
+ # No translation needed, return original text
189
+ return text
190
+
191
+ # Create translation request
192
+ translation_request = TranslationRequest(
193
+ source_text=text,
194
+ target_language=target_language
195
+ )
196
+
197
+ return self._translation_service.translate(translation_request)
198
+ except Exception as e:
199
+ raise TranslationFailedException(f"Translation failed: {str(e)}")
200
+
201
+ def _perform_speech_synthesis(
202
+ self,
203
+ text: 'TextContent',
204
+ voice_settings: 'VoiceSettings'
205
+ ) -> 'AudioContent':
206
+ """
207
+ Perform speech synthesis on the translated text.
208
+
209
+ Args:
210
+ text: The text to synthesize
211
+ voice_settings: Voice settings for synthesis
212
+
213
+ Returns:
214
+ AudioContent: The synthesized audio
215
+
216
+ Raises:
217
+ SpeechSynthesisException: If synthesis fails
218
+ """
219
+ try:
220
+ # Create speech synthesis request
221
+ synthesis_request = SpeechSynthesisRequest(
222
+ text_content=text,
223
+ voice_settings=voice_settings
224
+ )
225
+
226
+ return self._speech_synthesis_service.synthesize(synthesis_request)
227
+ except Exception as e:
228
+ raise SpeechSynthesisException(f"Speech synthesis failed: {str(e)}")
tests/unit/domain/services/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Domain services tests package."""
tests/unit/domain/services/test_audio_processing_service.py ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for AudioProcessingService."""
2
+
3
+ import pytest
4
+ from unittest.mock import Mock, MagicMock
5
+ from src.domain.services.audio_processing_service import AudioProcessingService
6
+ from src.domain.models.audio_content import AudioContent
7
+ from src.domain.models.text_content import TextContent
8
+ from src.domain.models.voice_settings import VoiceSettings
9
+ from src.domain.models.translation_request import TranslationRequest
10
+ from src.domain.models.speech_synthesis_request import SpeechSynthesisRequest
11
+ from src.domain.exceptions import (
12
+ AudioProcessingException,
13
+ SpeechRecognitionException,
14
+ TranslationFailedException,
15
+ SpeechSynthesisException
16
+ )
17
+
18
+
19
+ class TestAudioProcessingService:
20
+ """Test cases for AudioProcessingService."""
21
+
22
+ @pytest.fixture
23
+ def mock_stt_service(self):
24
+ """Mock speech recognition service."""
25
+ return Mock()
26
+
27
+ @pytest.fixture
28
+ def mock_translation_service(self):
29
+ """Mock translation service."""
30
+ return Mock()
31
+
32
+ @pytest.fixture
33
+ def mock_tts_service(self):
34
+ """Mock speech synthesis service."""
35
+ return Mock()
36
+
37
+ @pytest.fixture
38
+ def audio_processing_service(self, mock_stt_service, mock_translation_service, mock_tts_service):
39
+ """AudioProcessingService instance with mocked dependencies."""
40
+ return AudioProcessingService(
41
+ speech_recognition_service=mock_stt_service,
42
+ translation_service=mock_translation_service,
43
+ speech_synthesis_service=mock_tts_service
44
+ )
45
+
46
+ @pytest.fixture
47
+ def sample_audio(self):
48
+ """Sample audio content for testing."""
49
+ return AudioContent(
50
+ data=b"fake_audio_data",
51
+ format="wav",
52
+ sample_rate=22050,
53
+ duration=10.0,
54
+ filename="test.wav"
55
+ )
56
+
57
+ @pytest.fixture
58
+ def sample_voice_settings(self):
59
+ """Sample voice settings for testing."""
60
+ return VoiceSettings(
61
+ voice_id="test_voice",
62
+ speed=1.0,
63
+ language="es"
64
+ )
65
+
66
+ @pytest.fixture
67
+ def sample_text_content(self):
68
+ """Sample text content for testing."""
69
+ return TextContent(
70
+ text="Hello world",
71
+ language="en"
72
+ )
73
+
74
+ def test_successful_pipeline_processing(
75
+ self,
76
+ audio_processing_service,
77
+ mock_stt_service,
78
+ mock_translation_service,
79
+ mock_tts_service,
80
+ sample_audio,
81
+ sample_voice_settings,
82
+ sample_text_content
83
+ ):
84
+ """Test successful processing through the complete pipeline."""
85
+ # Arrange
86
+ original_text = TextContent(text="Hello world", language="en")
87
+ translated_text = TextContent(text="Hola mundo", language="es")
88
+ output_audio = AudioContent(
89
+ data=b"synthesized_audio",
90
+ format="wav",
91
+ sample_rate=22050,
92
+ duration=5.0
93
+ )
94
+
95
+ mock_stt_service.transcribe.return_value = original_text
96
+ mock_translation_service.translate.return_value = translated_text
97
+ mock_tts_service.synthesize.return_value = output_audio
98
+
99
+ # Act
100
+ result = audio_processing_service.process_audio_pipeline(
101
+ audio=sample_audio,
102
+ target_language="es",
103
+ voice_settings=sample_voice_settings
104
+ )
105
+
106
+ # Assert
107
+ assert result.success is True
108
+ assert result.original_text == original_text
109
+ assert result.translated_text == translated_text
110
+ assert result.audio_output == output_audio
111
+ assert result.error_message is None
112
+ assert result.processing_time >= 0
113
+
114
+ # Verify service calls
115
+ mock_stt_service.transcribe.assert_called_once_with(sample_audio, "whisper-base")
116
+ mock_translation_service.translate.assert_called_once()
117
+ mock_tts_service.synthesize.assert_called_once()
118
+
119
+ def test_no_translation_needed_same_language(
120
+ self,
121
+ audio_processing_service,
122
+ mock_stt_service,
123
+ mock_translation_service,
124
+ mock_tts_service,
125
+ sample_audio
126
+ ):
127
+ """Test pipeline when no translation is needed (same language)."""
128
+ # Arrange
129
+ original_text = TextContent(text="Hola mundo", language="es")
130
+ voice_settings = VoiceSettings(voice_id="test_voice", speed=1.0, language="es")
131
+ output_audio = AudioContent(
132
+ data=b"synthesized_audio",
133
+ format="wav",
134
+ sample_rate=22050,
135
+ duration=5.0
136
+ )
137
+
138
+ mock_stt_service.transcribe.return_value = original_text
139
+ mock_tts_service.synthesize.return_value = output_audio
140
+
141
+ # Act
142
+ result = audio_processing_service.process_audio_pipeline(
143
+ audio=sample_audio,
144
+ target_language="es",
145
+ voice_settings=voice_settings
146
+ )
147
+
148
+ # Assert
149
+ assert result.success is True
150
+ assert result.original_text == original_text
151
+ assert result.translated_text == original_text # Same as original
152
+ assert result.audio_output == output_audio
153
+
154
+ # Translation service should not be called
155
+ mock_translation_service.translate.assert_not_called()
156
+
157
+ def test_validation_error_none_audio(self, audio_processing_service, sample_voice_settings):
158
+ """Test validation error when audio is None."""
159
+ # Act
160
+ result = audio_processing_service.process_audio_pipeline(
161
+ audio=None,
162
+ target_language="es",
163
+ voice_settings=sample_voice_settings
164
+ )
165
+
166
+ # Assert
167
+ assert result.success is False
168
+ assert "Audio content cannot be None" in result.error_message
169
+
170
+ def test_validation_error_empty_target_language(self, audio_processing_service, sample_audio, sample_voice_settings):
171
+ """Test validation error when target language is empty."""
172
+ # Act
173
+ result = audio_processing_service.process_audio_pipeline(
174
+ audio=sample_audio,
175
+ target_language="",
176
+ voice_settings=sample_voice_settings
177
+ )
178
+
179
+ # Assert
180
+ assert result.success is False
181
+ assert "Target language cannot be empty" in result.error_message
182
+
183
+ def test_validation_error_language_mismatch(self, audio_processing_service, sample_audio):
184
+ """Test validation error when voice settings language doesn't match target language."""
185
+ # Arrange
186
+ voice_settings = VoiceSettings(voice_id="test_voice", speed=1.0, language="en")
187
+
188
+ # Act
189
+ result = audio_processing_service.process_audio_pipeline(
190
+ audio=sample_audio,
191
+ target_language="es",
192
+ voice_settings=voice_settings
193
+ )
194
+
195
+ # Assert
196
+ assert result.success is False
197
+ assert "Voice settings language (en) must match target language (es)" in result.error_message
198
+
199
+ def test_validation_error_audio_too_long(self, audio_processing_service, sample_voice_settings):
200
+ """Test validation error when audio is too long."""
201
+ # Arrange
202
+ long_audio = AudioContent(
203
+ data=b"fake_audio_data",
204
+ format="wav",
205
+ sample_rate=22050,
206
+ duration=400.0 # Exceeds 300s limit
207
+ )
208
+
209
+ # Act
210
+ result = audio_processing_service.process_audio_pipeline(
211
+ audio=long_audio,
212
+ target_language="es",
213
+ voice_settings=sample_voice_settings
214
+ )
215
+
216
+ # Assert
217
+ assert result.success is False
218
+ assert "exceeds maximum allowed duration" in result.error_message
219
+
220
+ def test_stt_failure_handling(
221
+ self,
222
+ audio_processing_service,
223
+ mock_stt_service,
224
+ sample_audio,
225
+ sample_voice_settings
226
+ ):
227
+ """Test handling of STT service failure."""
228
+ # Arrange
229
+ mock_stt_service.transcribe.side_effect = Exception("STT service unavailable")
230
+
231
+ # Act
232
+ result = audio_processing_service.process_audio_pipeline(
233
+ audio=sample_audio,
234
+ target_language="es",
235
+ voice_settings=sample_voice_settings
236
+ )
237
+
238
+ # Assert
239
+ assert result.success is False
240
+ assert "Speech recognition failed" in result.error_message
241
+ assert result.processing_time >= 0
242
+
243
+ def test_translation_failure_handling(
244
+ self,
245
+ audio_processing_service,
246
+ mock_stt_service,
247
+ mock_translation_service,
248
+ sample_audio,
249
+ sample_voice_settings
250
+ ):
251
+ """Test handling of translation service failure."""
252
+ # Arrange
253
+ original_text = TextContent(text="Hello world", language="en")
254
+ mock_stt_service.transcribe.return_value = original_text
255
+ mock_translation_service.translate.side_effect = Exception("Translation service unavailable")
256
+
257
+ # Act
258
+ result = audio_processing_service.process_audio_pipeline(
259
+ audio=sample_audio,
260
+ target_language="es",
261
+ voice_settings=sample_voice_settings
262
+ )
263
+
264
+ # Assert
265
+ assert result.success is False
266
+ assert "Translation failed" in result.error_message
267
+ assert result.processing_time >= 0
268
+
269
+ def test_tts_failure_handling(
270
+ self,
271
+ audio_processing_service,
272
+ mock_stt_service,
273
+ mock_translation_service,
274
+ mock_tts_service,
275
+ sample_audio,
276
+ sample_voice_settings
277
+ ):
278
+ """Test handling of TTS service failure."""
279
+ # Arrange
280
+ original_text = TextContent(text="Hello world", language="en")
281
+ translated_text = TextContent(text="Hola mundo", language="es")
282
+
283
+ mock_stt_service.transcribe.return_value = original_text
284
+ mock_translation_service.translate.return_value = translated_text
285
+ mock_tts_service.synthesize.side_effect = Exception("TTS service unavailable")
286
+
287
+ # Act
288
+ result = audio_processing_service.process_audio_pipeline(
289
+ audio=sample_audio,
290
+ target_language="es",
291
+ voice_settings=sample_voice_settings
292
+ )
293
+
294
+ # Assert
295
+ assert result.success is False
296
+ assert "Speech synthesis failed" in result.error_message
297
+ assert result.processing_time >= 0