File size: 12,131 Bytes
c549dab
 
f7492cb
cd1309d
 
c72d839
 
 
 
 
 
 
 
 
 
 
c549dab
cd1309d
f7492cb
 
 
 
 
 
 
 
 
 
 
2d2f2b9
cd1309d
 
 
 
c549dab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cd1309d
f7492cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cd1309d
f7492cb
 
0ee4f42
f7492cb
 
 
 
 
 
 
 
 
cd1309d
f7492cb
c549dab
 
f7492cb
cd1309d
f7492cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c549dab
f7492cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cd1309d
c72d839
c549dab
 
cd1309d
f7492cb
 
 
 
 
 
 
 
 
 
 
 
 
c549dab
 
f7492cb
 
 
 
 
 
 
c549dab
f7492cb
c549dab
f7492cb
 
 
 
 
 
 
 
 
 
 
 
 
 
cd1309d
c549dab
f7492cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c549dab
 
 
f7492cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34f1262
f0248ed
c549dab
 
f7492cb
 
 
 
 
c549dab
 
f7492cb
c549dab
 
 
f7492cb
 
 
c549dab
 
f7492cb
 
c549dab
f7492cb
 
 
 
c549dab
f7492cb
 
c549dab
 
f7492cb
 
 
 
 
 
 
 
 
 
c549dab
f7492cb
0ee4f42
 
 
f7492cb
0ee4f42
 
 
c549dab
f7492cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c549dab
f7492cb
 
 
 
c549dab
 
 
 
f7492cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cd1309d
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
"""
Main entry point for the Audio Translation Web Application
Handles file upload, processing pipeline, and UI rendering using DDD architecture
"""

import logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler("app.log"),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

import streamlit as st
import os
from typing import Optional

# Import application services and DTOs
from src.application.services.audio_processing_service import AudioProcessingApplicationService
from src.application.services.configuration_service import ConfigurationApplicationService
from src.application.dtos.audio_upload_dto import AudioUploadDto
from src.application.dtos.processing_request_dto import ProcessingRequestDto
from src.application.dtos.processing_result_dto import ProcessingResultDto

# Import infrastructure setup
from src.infrastructure.config.container_setup import initialize_global_container, get_global_container

# Initialize environment configurations
os.makedirs("temp/uploads", exist_ok=True)
os.makedirs("temp/outputs", exist_ok=True)

def configure_page():
    """Set up Streamlit page configuration"""
    logger.info("Configuring Streamlit page")
    st.set_page_config(
        page_title="Audio Translator",
        page_icon="🎧",
        layout="wide",
        initial_sidebar_state="expanded"
    )
    st.markdown("""
        <style>
            .reportview-container {margin-top: -2em;}
            #MainMenu {visibility: hidden;}
            .stDeployButton {display:none;}
            .stAlert {padding: 20px !important;}
        </style>
    """, unsafe_allow_html=True)

def create_audio_upload_dto(uploaded_file) -> AudioUploadDto:
    """
    Create AudioUploadDto from Streamlit uploaded file.

    Args:
        uploaded_file: Streamlit UploadedFile object

    Returns:
        AudioUploadDto: DTO containing upload information
    """
    try:
        content = uploaded_file.getbuffer().tobytes()

        # Determine content type based on file extension
        file_ext = os.path.splitext(uploaded_file.name.lower())[1]
        content_type_map = {
            '.wav': 'audio/wav',
            '.mp3': 'audio/mpeg',
            '.m4a': 'audio/mp4',
            '.flac': 'audio/flac',
            '.ogg': 'audio/ogg'
        }
        content_type = content_type_map.get(file_ext, 'audio/wav')

        return AudioUploadDto(
            filename=uploaded_file.name,
            content=content,
            content_type=content_type,
            size=len(content)
        )
    except Exception as e:
        logger.error(f"Failed to create AudioUploadDto: {e}")
        raise ValueError(f"Invalid audio file: {str(e)}")

def handle_file_processing(
    audio_upload: AudioUploadDto,
    asr_model: str,
    target_language: str,
    voice: str,
    speed: float,
    source_language: Optional[str] = None
) -> ProcessingResultDto:
    """
    Execute the complete processing pipeline using application services.

    Args:
        audio_upload: Audio upload DTO
        asr_model: ASR model to use
        target_language: Target language for translation
        voice: Voice for TTS
        speed: Speech speed
        source_language: Source language (optional)

    Returns:
        ProcessingResultDto: Processing result
    """
    logger.info(f"Starting processing for: {audio_upload.filename} using {asr_model} model")
    progress_bar = st.progress(0)
    status_text = st.empty()

    try:
        # Get application service from container
        container = get_global_container()
        audio_service = container.resolve(AudioProcessingApplicationService)

        # Create processing request
        request = ProcessingRequestDto(
            audio=audio_upload,
            asr_model=asr_model,
            target_language=target_language,
            voice=voice,
            speed=speed,
            source_language=source_language
        )

        # Update progress and status
        status_text.markdown("πŸ” **Performing Speech Recognition...**")
        progress_bar.progress(10)

        # Process through application service
        with st.spinner("Processing audio pipeline..."):
            result = audio_service.process_audio_pipeline(request)

        if result.success:
            progress_bar.progress(100)
            status_text.success("βœ… Processing Complete!")
            logger.info(f"Processing completed successfully in {result.processing_time:.2f}s")
        else:
            status_text.error(f"❌ Processing Failed: {result.error_message}")
            logger.error(f"Processing failed: {result.error_message}")

        return result

    except Exception as e:
        logger.error(f"Processing failed: {str(e)}", exc_info=True)
        status_text.error(f"❌ Processing Failed: {str(e)}")
        st.exception(e)

        # Return error result
        return ProcessingResultDto.error_result(
            error_message=str(e),
            error_code='SYSTEM_ERROR'
        )

def render_results(result: ProcessingResultDto):
    """
    Display processing results using ProcessingResultDto.

    Args:
        result: Processing result DTO
    """
    logger.info("Rendering results")
    st.divider()

    if not result.success:
        st.error(f"Processing failed: {result.error_message}")
        if result.error_code:
            st.code(f"Error Code: {result.error_code}")
        return

    col1, col2 = st.columns([2, 1])

    with col1:
        # Display original text if available
        if result.original_text:
            st.subheader("Recognition Results")
            st.code(result.original_text, language="text")

        # Display translated text if available
        if result.translated_text:
            st.subheader("Translation Results")
            st.code(result.translated_text, language="text")

        # Display processing metadata
        if result.metadata:
            with st.expander("Processing Details"):
                st.json(result.metadata)

    with col2:
        # Display audio output if available
        if result.has_audio_output and result.audio_path:
            st.subheader("Audio Output")

            # Check if file exists and is accessible
            if os.path.exists(result.audio_path):
                # Standard audio player
                st.audio(result.audio_path)

                # Download button
                try:
                    with open(result.audio_path, "rb") as f:
                        st.download_button(
                            label="Download Audio",
                            data=f,
                            file_name="translated_audio.wav",
                            mime="audio/wav"
                        )
                except Exception as e:
                    st.warning(f"Download not available: {str(e)}")
            else:
                st.warning("Audio file not found or not accessible")

        # Display processing time
        st.metric("Processing Time", f"{result.processing_time:.2f}s")

def get_supported_configurations() -> dict:
    """
    Get supported configurations from application service.

    Returns:
        dict: Supported configurations
    """
    try:
        container = get_global_container()
        audio_service = container.resolve(AudioProcessingApplicationService)
        return audio_service.get_supported_configurations()
    except Exception as e:
        logger.warning(f"Failed to get configurations: {e}")
        # Return fallback configurations
        return {
            'asr_models': ['whisper-small', 'parakeet'],
            'voices': ['kokoro', 'dia', 'cosyvoice2', 'dummy'],
            'languages': ['en', 'zh', 'es', 'fr', 'de'],
            'audio_formats': ['wav', 'mp3'],
            'max_file_size_mb': 100,
            'speed_range': {'min': 0.5, 'max': 2.0}
        }

def initialize_session_state():
    """Initialize session state variables"""
    if 'processing_result' not in st.session_state:
        st.session_state.processing_result = None
    if 'container_initialized' not in st.session_state:
        st.session_state.container_initialized = False

def initialize_application():
    """Initialize the application with dependency injection container"""
    if not st.session_state.container_initialized:
        try:
            logger.info("Initializing application container")
            initialize_global_container()
            st.session_state.container_initialized = True
            logger.info("Application container initialized successfully")
        except Exception as e:
            logger.error(f"Failed to initialize application: {e}")
            st.error(f"Application initialization failed: {str(e)}")
            st.stop()

def main():
    """Main application workflow"""
    logger.info("Starting application")

    # Initialize application
    initialize_application()

    # Configure page
    configure_page()
    initialize_session_state()

    st.title("🎧 High-Quality Audio Translation System")
    st.markdown("Upload English Audio β†’ Get Chinese Speech Output")

    # Get supported configurations
    config = get_supported_configurations()

    # Voice selection in sidebar
    st.sidebar.header("TTS Settings")

    # Map voice display names to internal IDs
    voice_options = {
        "Kokoro": "kokoro",
        "Dia": "dia",
        "CosyVoice2": "cosyvoice2",
        "Dummy (Test)": "dummy"
    }

    selected_voice_display = st.sidebar.selectbox(
        "Select Voice",
        list(voice_options.keys()),
        index=0
    )
    selected_voice = voice_options[selected_voice_display]

    speed = st.sidebar.slider(
        "Speech Speed",
        config['speed_range']['min'],
        config['speed_range']['max'],
        1.0,
        0.1
    )

    # Model selection
    asr_model = st.selectbox(
        "Select Speech Recognition Model",
        options=config['asr_models'],
        index=0,
        help="Choose the ASR model for speech recognition"
    )

    # Language selection
    language_options = {
        "Chinese (Mandarin)": "zh",
        "Spanish": "es",
        "French": "fr",
        "German": "de",
        "English": "en"
    }

    selected_language_display = st.selectbox(
        "Target Language",
        list(language_options.keys()),
        index=0,
        help="Select the target language for translation"
    )
    target_language = language_options[selected_language_display]

    # File upload
    uploaded_file = st.file_uploader(
        f"Select Audio File ({', '.join(config['audio_formats']).upper()})",
        type=config['audio_formats'],
        accept_multiple_files=False,
        help=f"Maximum file size: {config['max_file_size_mb']}MB"
    )

    if uploaded_file:
        logger.info(f"File uploaded: {uploaded_file.name}")

        try:
            # Create audio upload DTO
            audio_upload = create_audio_upload_dto(uploaded_file)

            # Display file information
            st.info(f"πŸ“ **File:** {audio_upload.filename} ({audio_upload.size / 1024:.1f} KB)")

            # Process button
            if st.button("πŸš€ Process Audio", type="primary"):
                # Process the audio
                result = handle_file_processing(
                    audio_upload=audio_upload,
                    asr_model=asr_model,
                    target_language=target_language,
                    voice=selected_voice,
                    speed=speed,
                    source_language="en"  # Assume English source for now
                )

                # Store result in session state
                st.session_state.processing_result = result

            # Display results if available
            if st.session_state.processing_result:
                render_results(st.session_state.processing_result)

        except Exception as e:
            st.error(f"Error processing file: {str(e)}")
            logger.error(f"File processing error: {e}")

if __name__ == "__main__":
    main()