Michael Hu commited on
Commit
8023ba2
·
1 Parent(s): c28c1de

feat(logging): add detailed logging for audio upload and configuration handling

Browse files
Files changed (2) hide show
  1. app.py +37 -4
  2. src/application/dtos/audio_upload_dto.py +27 -0
app.py CHANGED
@@ -64,12 +64,16 @@ def create_audio_upload_dto(audio_file_path: str) -> AudioUploadDto:
64
  raise ValueError("No audio file provided or file does not exist")
65
 
66
  filename = os.path.basename(audio_file_path)
 
 
67
 
68
  with open(audio_file_path, 'rb') as f:
69
  content = f.read()
70
 
71
  # Determine content type based on file extension
72
  file_ext = os.path.splitext(filename.lower())[1]
 
 
73
  content_type_map = {
74
  '.wav': 'audio/wav',
75
  '.mp3': 'audio/mpeg',
@@ -78,6 +82,11 @@ def create_audio_upload_dto(audio_file_path: str) -> AudioUploadDto:
78
  '.ogg': 'audio/ogg'
79
  }
80
  content_type = content_type_map.get(file_ext, 'audio/wav')
 
 
 
 
 
81
 
82
  return AudioUploadDto(
83
  filename=filename,
@@ -107,15 +116,18 @@ def get_supported_configurations() -> dict:
107
  return config
108
  except Exception as e:
109
  logger.error(f"Failed to get configurations: {e}", exc_info=True)
 
110
  # Return fallback configurations
111
- return {
112
  'asr_models': ['whisper-small', 'parakeet'],
113
  'voices': ['chatterbox'],
114
  'languages': ['en', 'zh', 'es', 'fr', 'de'],
115
- 'audio_formats': ['wav', 'mp3'],
116
  'max_file_size_mb': 100,
117
  'speed_range': {'min': 0.5, 'max': 2.0}
118
  }
 
 
119
 
120
  def process_audio_pipeline(
121
  audio_file,
@@ -144,9 +156,12 @@ def process_audio_pipeline(
144
  return "❌ No audio file provided", "", "", None, ""
145
 
146
  logger.info(f"Starting processing for: {audio_file} using {asr_model} model")
 
147
 
148
  # Create audio upload DTO
 
149
  audio_upload = create_audio_upload_dto(audio_file)
 
150
 
151
  # Get application service from container
152
  container = get_global_container()
@@ -208,10 +223,19 @@ def create_interface():
208
  # Get supported configurations
209
  config = get_supported_configurations()
210
 
 
 
 
 
 
 
 
 
 
211
  # Language options mapping
212
  language_options = {
213
  "Chinese (Mandarin)": "zh",
214
- "Spanish": "es",
215
  "French": "fr",
216
  "German": "de",
217
  "English": "en"
@@ -232,10 +256,19 @@ def create_interface():
232
  )
233
 
234
  # Create the interface using gr.Interface for better compatibility
 
 
 
235
  interface = gr.Interface(
236
  fn=process_wrapper,
237
  inputs=[
238
- gr.Audio(label="Upload Audio File", type="filepath"),
 
 
 
 
 
 
239
  gr.Dropdown(
240
  choices=config['asr_models'],
241
  value=config['asr_models'][0] if config['asr_models'] else "parakeet",
 
64
  raise ValueError("No audio file provided or file does not exist")
65
 
66
  filename = os.path.basename(audio_file_path)
67
+ logger.info(f"Creating AudioUploadDto for file: {filename}")
68
+ logger.info(f"Full file path: {audio_file_path}")
69
 
70
  with open(audio_file_path, 'rb') as f:
71
  content = f.read()
72
 
73
  # Determine content type based on file extension
74
  file_ext = os.path.splitext(filename.lower())[1]
75
+ logger.info(f"Detected file extension: {file_ext}")
76
+
77
  content_type_map = {
78
  '.wav': 'audio/wav',
79
  '.mp3': 'audio/mpeg',
 
82
  '.ogg': 'audio/ogg'
83
  }
84
  content_type = content_type_map.get(file_ext, 'audio/wav')
85
+ logger.info(f"Mapped content type: {content_type}")
86
+
87
+ # Log file size info
88
+ file_size = len(content)
89
+ logger.info(f"File size: {file_size} bytes ({file_size / 1024 / 1024:.2f} MB)")
90
 
91
  return AudioUploadDto(
92
  filename=filename,
 
116
  return config
117
  except Exception as e:
118
  logger.error(f"Failed to get configurations: {e}", exc_info=True)
119
+ logger.warning("Using fallback configurations - this may indicate a configuration service issue")
120
  # Return fallback configurations
121
+ fallback_config = {
122
  'asr_models': ['whisper-small', 'parakeet'],
123
  'voices': ['chatterbox'],
124
  'languages': ['en', 'zh', 'es', 'fr', 'de'],
125
+ 'audio_formats': ['wav', 'mp3', 'm4a', 'flac', 'ogg'], # Updated to include all supported formats
126
  'max_file_size_mb': 100,
127
  'speed_range': {'min': 0.5, 'max': 2.0}
128
  }
129
+ logger.info(f"Using fallback configuration: {fallback_config}")
130
+ return fallback_config
131
 
132
  def process_audio_pipeline(
133
  audio_file,
 
156
  return "❌ No audio file provided", "", "", None, ""
157
 
158
  logger.info(f"Starting processing for: {audio_file} using {asr_model} model")
159
+ logger.info(f"Audio file exists: {os.path.exists(audio_file) if audio_file else 'N/A'}")
160
 
161
  # Create audio upload DTO
162
+ logger.info("Creating AudioUploadDto...")
163
  audio_upload = create_audio_upload_dto(audio_file)
164
+ logger.info(f"AudioUploadDto created successfully - Content-Type: {audio_upload.content_type}")
165
 
166
  # Get application service from container
167
  container = get_global_container()
 
223
  # Get supported configurations
224
  config = get_supported_configurations()
225
 
226
+ # Log configuration details for debugging
227
+ logger.info("=== Gradio Interface Configuration ===")
228
+ logger.info(f"Supported ASR models: {config.get('asr_models', [])}")
229
+ logger.info(f"Supported voices: {config.get('voices', [])}")
230
+ logger.info(f"Supported audio formats: {config.get('audio_formats', [])}")
231
+ logger.info(f"Max file size: {config.get('max_file_size_mb', 0)} MB")
232
+ logger.info(f"Speed range: {config.get('speed_range', {})}")
233
+ logger.info("=== End Configuration ===")
234
+
235
  # Language options mapping
236
  language_options = {
237
  "Chinese (Mandarin)": "zh",
238
+ "Spanish": "es",
239
  "French": "fr",
240
  "German": "de",
241
  "English": "en"
 
256
  )
257
 
258
  # Create the interface using gr.Interface for better compatibility
259
+ logger.info("Creating Gradio interface with updated file type support...")
260
+ logger.info("Updated file types for Audio component: .wav, .mp3, .m4a, .flac, .ogg")
261
+
262
  interface = gr.Interface(
263
  fn=process_wrapper,
264
  inputs=[
265
+ gr.Audio(
266
+ label="Upload Audio File",
267
+ type="filepath",
268
+ file_types=[".wav", ".mp3", ".m4a", ".flac", ".ogg"],
269
+ # Accept both file extensions and MIME types
270
+ # This explicitly allows mp3 files to pass Gradio's frontend validation
271
+ ),
272
  gr.Dropdown(
273
  choices=config['asr_models'],
274
  value=config['asr_models'][0] if config['asr_models'] else "parakeet",
src/application/dtos/audio_upload_dto.py CHANGED
@@ -20,6 +20,33 @@ class AudioUploadDto:
20
 
21
  def __post_init__(self):
22
  """Validate the DTO after initialization"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  self._validate()
24
  if self.size is None:
25
  self.size = len(self.content)
 
20
 
21
  def __post_init__(self):
22
  """Validate the DTO after initialization"""
23
+ # Add logging for debugging mp3 validation issues
24
+ import logging
25
+ logger = logging.getLogger(__name__)
26
+
27
+ logger.info(f"Validating AudioUploadDto - Filename: {self.filename}")
28
+ logger.info(f"Content-Type: {self.content_type}")
29
+ logger.info(f"File size: {len(self.content)} bytes")
30
+
31
+ # Check file extension and MIME type mapping
32
+ _, ext = os.path.splitext(self.filename.lower())
33
+ logger.info(f"File extension: {ext}")
34
+
35
+ content_type_map = {
36
+ '.wav': 'audio/wav',
37
+ '.mp3': 'audio/mpeg',
38
+ '.m4a': 'audio/mp4',
39
+ '.flac': 'audio/flac',
40
+ '.ogg': 'audio/ogg'
41
+ }
42
+ expected_content_type = content_type_map.get(ext)
43
+ logger.info(f"Expected content type for {ext}: {expected_content_type}")
44
+ logger.info(f"Actual content type: {self.content_type}")
45
+
46
+ # Check mimetypes.guess_type result
47
+ guessed_type = mimetypes.guess_type(self.filename)[0]
48
+ logger.info(f"mimetypes.guess_type result: {guessed_type}")
49
+
50
  self._validate()
51
  if self.size is None:
52
  self.size = len(self.content)