Spaces:
Sleeping
Sleeping
Michael Hu
commited on
Commit
·
8023ba2
1
Parent(s):
c28c1de
feat(logging): add detailed logging for audio upload and configuration handling
Browse files- app.py +37 -4
- src/application/dtos/audio_upload_dto.py +27 -0
app.py
CHANGED
|
@@ -64,12 +64,16 @@ def create_audio_upload_dto(audio_file_path: str) -> AudioUploadDto:
|
|
| 64 |
raise ValueError("No audio file provided or file does not exist")
|
| 65 |
|
| 66 |
filename = os.path.basename(audio_file_path)
|
|
|
|
|
|
|
| 67 |
|
| 68 |
with open(audio_file_path, 'rb') as f:
|
| 69 |
content = f.read()
|
| 70 |
|
| 71 |
# Determine content type based on file extension
|
| 72 |
file_ext = os.path.splitext(filename.lower())[1]
|
|
|
|
|
|
|
| 73 |
content_type_map = {
|
| 74 |
'.wav': 'audio/wav',
|
| 75 |
'.mp3': 'audio/mpeg',
|
|
@@ -78,6 +82,11 @@ def create_audio_upload_dto(audio_file_path: str) -> AudioUploadDto:
|
|
| 78 |
'.ogg': 'audio/ogg'
|
| 79 |
}
|
| 80 |
content_type = content_type_map.get(file_ext, 'audio/wav')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
return AudioUploadDto(
|
| 83 |
filename=filename,
|
|
@@ -107,15 +116,18 @@ def get_supported_configurations() -> dict:
|
|
| 107 |
return config
|
| 108 |
except Exception as e:
|
| 109 |
logger.error(f"Failed to get configurations: {e}", exc_info=True)
|
|
|
|
| 110 |
# Return fallback configurations
|
| 111 |
-
|
| 112 |
'asr_models': ['whisper-small', 'parakeet'],
|
| 113 |
'voices': ['chatterbox'],
|
| 114 |
'languages': ['en', 'zh', 'es', 'fr', 'de'],
|
| 115 |
-
'audio_formats': ['wav', 'mp3'],
|
| 116 |
'max_file_size_mb': 100,
|
| 117 |
'speed_range': {'min': 0.5, 'max': 2.0}
|
| 118 |
}
|
|
|
|
|
|
|
| 119 |
|
| 120 |
def process_audio_pipeline(
|
| 121 |
audio_file,
|
|
@@ -144,9 +156,12 @@ def process_audio_pipeline(
|
|
| 144 |
return "❌ No audio file provided", "", "", None, ""
|
| 145 |
|
| 146 |
logger.info(f"Starting processing for: {audio_file} using {asr_model} model")
|
|
|
|
| 147 |
|
| 148 |
# Create audio upload DTO
|
|
|
|
| 149 |
audio_upload = create_audio_upload_dto(audio_file)
|
|
|
|
| 150 |
|
| 151 |
# Get application service from container
|
| 152 |
container = get_global_container()
|
|
@@ -208,10 +223,19 @@ def create_interface():
|
|
| 208 |
# Get supported configurations
|
| 209 |
config = get_supported_configurations()
|
| 210 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
# Language options mapping
|
| 212 |
language_options = {
|
| 213 |
"Chinese (Mandarin)": "zh",
|
| 214 |
-
"Spanish": "es",
|
| 215 |
"French": "fr",
|
| 216 |
"German": "de",
|
| 217 |
"English": "en"
|
|
@@ -232,10 +256,19 @@ def create_interface():
|
|
| 232 |
)
|
| 233 |
|
| 234 |
# Create the interface using gr.Interface for better compatibility
|
|
|
|
|
|
|
|
|
|
| 235 |
interface = gr.Interface(
|
| 236 |
fn=process_wrapper,
|
| 237 |
inputs=[
|
| 238 |
-
gr.Audio(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
gr.Dropdown(
|
| 240 |
choices=config['asr_models'],
|
| 241 |
value=config['asr_models'][0] if config['asr_models'] else "parakeet",
|
|
|
|
| 64 |
raise ValueError("No audio file provided or file does not exist")
|
| 65 |
|
| 66 |
filename = os.path.basename(audio_file_path)
|
| 67 |
+
logger.info(f"Creating AudioUploadDto for file: {filename}")
|
| 68 |
+
logger.info(f"Full file path: {audio_file_path}")
|
| 69 |
|
| 70 |
with open(audio_file_path, 'rb') as f:
|
| 71 |
content = f.read()
|
| 72 |
|
| 73 |
# Determine content type based on file extension
|
| 74 |
file_ext = os.path.splitext(filename.lower())[1]
|
| 75 |
+
logger.info(f"Detected file extension: {file_ext}")
|
| 76 |
+
|
| 77 |
content_type_map = {
|
| 78 |
'.wav': 'audio/wav',
|
| 79 |
'.mp3': 'audio/mpeg',
|
|
|
|
| 82 |
'.ogg': 'audio/ogg'
|
| 83 |
}
|
| 84 |
content_type = content_type_map.get(file_ext, 'audio/wav')
|
| 85 |
+
logger.info(f"Mapped content type: {content_type}")
|
| 86 |
+
|
| 87 |
+
# Log file size info
|
| 88 |
+
file_size = len(content)
|
| 89 |
+
logger.info(f"File size: {file_size} bytes ({file_size / 1024 / 1024:.2f} MB)")
|
| 90 |
|
| 91 |
return AudioUploadDto(
|
| 92 |
filename=filename,
|
|
|
|
| 116 |
return config
|
| 117 |
except Exception as e:
|
| 118 |
logger.error(f"Failed to get configurations: {e}", exc_info=True)
|
| 119 |
+
logger.warning("Using fallback configurations - this may indicate a configuration service issue")
|
| 120 |
# Return fallback configurations
|
| 121 |
+
fallback_config = {
|
| 122 |
'asr_models': ['whisper-small', 'parakeet'],
|
| 123 |
'voices': ['chatterbox'],
|
| 124 |
'languages': ['en', 'zh', 'es', 'fr', 'de'],
|
| 125 |
+
'audio_formats': ['wav', 'mp3', 'm4a', 'flac', 'ogg'], # Updated to include all supported formats
|
| 126 |
'max_file_size_mb': 100,
|
| 127 |
'speed_range': {'min': 0.5, 'max': 2.0}
|
| 128 |
}
|
| 129 |
+
logger.info(f"Using fallback configuration: {fallback_config}")
|
| 130 |
+
return fallback_config
|
| 131 |
|
| 132 |
def process_audio_pipeline(
|
| 133 |
audio_file,
|
|
|
|
| 156 |
return "❌ No audio file provided", "", "", None, ""
|
| 157 |
|
| 158 |
logger.info(f"Starting processing for: {audio_file} using {asr_model} model")
|
| 159 |
+
logger.info(f"Audio file exists: {os.path.exists(audio_file) if audio_file else 'N/A'}")
|
| 160 |
|
| 161 |
# Create audio upload DTO
|
| 162 |
+
logger.info("Creating AudioUploadDto...")
|
| 163 |
audio_upload = create_audio_upload_dto(audio_file)
|
| 164 |
+
logger.info(f"AudioUploadDto created successfully - Content-Type: {audio_upload.content_type}")
|
| 165 |
|
| 166 |
# Get application service from container
|
| 167 |
container = get_global_container()
|
|
|
|
| 223 |
# Get supported configurations
|
| 224 |
config = get_supported_configurations()
|
| 225 |
|
| 226 |
+
# Log configuration details for debugging
|
| 227 |
+
logger.info("=== Gradio Interface Configuration ===")
|
| 228 |
+
logger.info(f"Supported ASR models: {config.get('asr_models', [])}")
|
| 229 |
+
logger.info(f"Supported voices: {config.get('voices', [])}")
|
| 230 |
+
logger.info(f"Supported audio formats: {config.get('audio_formats', [])}")
|
| 231 |
+
logger.info(f"Max file size: {config.get('max_file_size_mb', 0)} MB")
|
| 232 |
+
logger.info(f"Speed range: {config.get('speed_range', {})}")
|
| 233 |
+
logger.info("=== End Configuration ===")
|
| 234 |
+
|
| 235 |
# Language options mapping
|
| 236 |
language_options = {
|
| 237 |
"Chinese (Mandarin)": "zh",
|
| 238 |
+
"Spanish": "es",
|
| 239 |
"French": "fr",
|
| 240 |
"German": "de",
|
| 241 |
"English": "en"
|
|
|
|
| 256 |
)
|
| 257 |
|
| 258 |
# Create the interface using gr.Interface for better compatibility
|
| 259 |
+
logger.info("Creating Gradio interface with updated file type support...")
|
| 260 |
+
logger.info("Updated file types for Audio component: .wav, .mp3, .m4a, .flac, .ogg")
|
| 261 |
+
|
| 262 |
interface = gr.Interface(
|
| 263 |
fn=process_wrapper,
|
| 264 |
inputs=[
|
| 265 |
+
gr.Audio(
|
| 266 |
+
label="Upload Audio File",
|
| 267 |
+
type="filepath",
|
| 268 |
+
file_types=[".wav", ".mp3", ".m4a", ".flac", ".ogg"],
|
| 269 |
+
# Accept both file extensions and MIME types
|
| 270 |
+
# This explicitly allows mp3 files to pass Gradio's frontend validation
|
| 271 |
+
),
|
| 272 |
gr.Dropdown(
|
| 273 |
choices=config['asr_models'],
|
| 274 |
value=config['asr_models'][0] if config['asr_models'] else "parakeet",
|
src/application/dtos/audio_upload_dto.py
CHANGED
|
@@ -20,6 +20,33 @@ class AudioUploadDto:
|
|
| 20 |
|
| 21 |
def __post_init__(self):
|
| 22 |
"""Validate the DTO after initialization"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
self._validate()
|
| 24 |
if self.size is None:
|
| 25 |
self.size = len(self.content)
|
|
|
|
| 20 |
|
| 21 |
def __post_init__(self):
|
| 22 |
"""Validate the DTO after initialization"""
|
| 23 |
+
# Add logging for debugging mp3 validation issues
|
| 24 |
+
import logging
|
| 25 |
+
logger = logging.getLogger(__name__)
|
| 26 |
+
|
| 27 |
+
logger.info(f"Validating AudioUploadDto - Filename: {self.filename}")
|
| 28 |
+
logger.info(f"Content-Type: {self.content_type}")
|
| 29 |
+
logger.info(f"File size: {len(self.content)} bytes")
|
| 30 |
+
|
| 31 |
+
# Check file extension and MIME type mapping
|
| 32 |
+
_, ext = os.path.splitext(self.filename.lower())
|
| 33 |
+
logger.info(f"File extension: {ext}")
|
| 34 |
+
|
| 35 |
+
content_type_map = {
|
| 36 |
+
'.wav': 'audio/wav',
|
| 37 |
+
'.mp3': 'audio/mpeg',
|
| 38 |
+
'.m4a': 'audio/mp4',
|
| 39 |
+
'.flac': 'audio/flac',
|
| 40 |
+
'.ogg': 'audio/ogg'
|
| 41 |
+
}
|
| 42 |
+
expected_content_type = content_type_map.get(ext)
|
| 43 |
+
logger.info(f"Expected content type for {ext}: {expected_content_type}")
|
| 44 |
+
logger.info(f"Actual content type: {self.content_type}")
|
| 45 |
+
|
| 46 |
+
# Check mimetypes.guess_type result
|
| 47 |
+
guessed_type = mimetypes.guess_type(self.filename)[0]
|
| 48 |
+
logger.info(f"mimetypes.guess_type result: {guessed_type}")
|
| 49 |
+
|
| 50 |
self._validate()
|
| 51 |
if self.size is None:
|
| 52 |
self.size = len(self.content)
|