Spaces:

DroolingPanda
/

teachingAssistant

Build error

App Files Files Community

Michael Hu commited on 15 days ago

Commit

fdc056d

1 Parent(s): f7aaf3b

add more logs

Browse files

Files changed (29) hide show

DEVELOPER_GUIDE.md +2 -2
src/application/dtos/dto_validation.py +36 -36
src/application/error_handling/error_mapper.py +1 -1
src/application/error_handling/structured_logger.py +1 -1
src/application/services/configuration_service.py +4 -4
src/infrastructure/base/file_utils.py +9 -9
src/infrastructure/base/stt_provider_base.py +4 -4
src/infrastructure/base/translation_provider_base.py +2 -2
src/infrastructure/base/tts_provider_base.py +1 -1
src/infrastructure/config/app_config.py +8 -8
src/infrastructure/config/dependency_container.py +9 -3
src/infrastructure/stt/legacy_compatibility.py +26 -26
src/infrastructure/stt/parakeet_provider.py +8 -8
src/infrastructure/stt/provider_factory.py +17 -17
src/infrastructure/stt/whisper_provider.py +8 -8
src/infrastructure/translation/nllb_provider.py +3 -3
src/infrastructure/translation/provider_factory.py +3 -3
src/infrastructure/tts/dia_provider.py +118 -35
src/infrastructure/tts/dummy_provider.py +15 -15
src/infrastructure/tts/kokoro_provider.py +5 -5
src/infrastructure/tts/provider_factory.py +24 -6
src/infrastructure/utils/dependency_installer.py +304 -0
tests/unit/application/error_handling/test_structured_logger.py +1 -1
utils/stt.py +21 -21
utils/translation.py +4 -4
utils/tts.py +16 -16
utils/tts_dia.py +28 -28
utils/tts_dummy.py +12 -12
utils/tts_kokoro.py +23 -23

DEVELOPER_GUIDE.md CHANGED Viewed

@@ -173,7 +173,7 @@ def _register_default_providers(self):
         self._providers['my_tts'] = MyTTSProvider
         logger.info("Registered MyTTS provider")
     except ImportError as e:
-        logger.debug(f"MyTTS provider not available: {e}")
 ```
 ### Step 3: Add Configuration Support
@@ -590,7 +590,7 @@ import logging
 logger = logging.getLogger(__name__)
 # Use appropriate log levels
-logger.debug("Detailed debugging information")
 logger.info("General information about program execution")
 logger.warning("Something unexpected happened")
 logger.error("A serious error occurred")

         self._providers['my_tts'] = MyTTSProvider
         logger.info("Registered MyTTS provider")
     except ImportError as e:
+        logger.info(f"MyTTS provider not available: {e}")
 ```
 ### Step 3: Add Configuration Support
 logger = logging.getLogger(__name__)
 # Use appropriate log levels
+logger.info("Detailed debugging information")
 logger.info("General information about program execution")
 logger.warning("Something unexpected happened")
 logger.error("A serious error occurred")

src/application/dtos/dto_validation.py CHANGED Viewed

@@ -15,13 +15,13 @@ T = TypeVar('T')
 class ValidationError(Exception):
     """Custom exception for DTO validation errors"""
     def __init__(self, message: str, field: str = None, value: Any = None):
         self.message = message
         self.field = field
         self.value = value
         super().__init__(self.message)
     def __str__(self):
         if self.field:
             return f"Validation error for field '{self.field}': {self.message}"
@@ -30,13 +30,13 @@ class ValidationError(Exception):
 def validate_dto(dto_instance: Any) -> bool:
     """Validate a DTO instance
     Args:
         dto_instance: The DTO instance to validate
     Returns:
         bool: True if validation passes
     Raises:
         ValidationError: If validation fails
     """
@@ -44,11 +44,11 @@ def validate_dto(dto_instance: Any) -> bool:
         # Call the DTO's validation method if it exists
         if hasattr(dto_instance, '_validate'):
             dto_instance._validate()
         # Additional validation can be added here
-        logger.debug(f"Successfully validated {type(dto_instance).__name__}")
         return True
     except ValueError as e:
         logger.error(f"Validation failed for {type(dto_instance).__name__}: {e}")
         raise ValidationError(str(e)) from e
@@ -59,10 +59,10 @@ def validate_dto(dto_instance: Any) -> bool:
 def validation_required(func: Callable[..., T]) -> Callable[..., T]:
     """Decorator to ensure DTO validation before method execution
     Args:
         func: The method to decorate
     Returns:
         Decorated function that validates 'self' before execution
     """
@@ -75,23 +75,23 @@ def validation_required(func: Callable[..., T]) -> Callable[..., T]:
             raise
         except Exception as e:
             raise ValidationError(f"Error in {func.__name__}: {e}") from e
     return wrapper
-def validate_field(value: Any, field_name: str, validator: Callable[[Any], bool],
                   error_message: str = None) -> Any:
     """Validate a single field value
     Args:
         value: The value to validate
         field_name: Name of the field being validated
         validator: Function that returns True if value is valid
         error_message: Custom error message
     Returns:
         The validated value
     Raises:
         ValidationError: If validation fails
     """
@@ -108,37 +108,37 @@ def validate_field(value: Any, field_name: str, validator: Callable[[Any], bool]
 def validate_required(value: Any, field_name: str) -> Any:
     """Validate that a field is not None or empty
     Args:
         value: The value to validate
         field_name: Name of the field being validated
     Returns:
         The validated value
     Raises:
         ValidationError: If field is None or empty
     """
     if value is None:
         raise ValidationError(f"Field '{field_name}' is required", field_name, value)
     if isinstance(value, (str, list, dict)) and len(value) == 0:
         raise ValidationError(f"Field '{field_name}' cannot be empty", field_name, value)
     return value
 def validate_type(value: Any, field_name: str, expected_type: Union[type, tuple]) -> Any:
     """Validate that a field is of the expected type
     Args:
         value: The value to validate
         field_name: Name of the field being validated
         expected_type: Expected type or tuple of types
     Returns:
         The validated value
     Raises:
         ValidationError: If type doesn't match
     """
@@ -148,30 +148,30 @@ def validate_type(value: Any, field_name: str, expected_type: Union[type, tuple]
             expected_str = " or ".join(type_names)
         else:
             expected_str = expected_type.__name__
         actual_type = type(value).__name__
         raise ValidationError(
             f"Field '{field_name}' must be of type {expected_str}, got {actual_type}",
             field_name, value
         )
     return value
-def validate_range(value: Union[int, float], field_name: str,
-                  min_value: Union[int, float] = None,
                   max_value: Union[int, float] = None) -> Union[int, float]:
     """Validate that a numeric value is within a specified range
     Args:
         value: The numeric value to validate
         field_name: Name of the field being validated
         min_value: Minimum allowed value (inclusive)
         max_value: Maximum allowed value (inclusive)
     Returns:
         The validated value
     Raises:
         ValidationError: If value is outside the range
     """
@@ -180,27 +180,27 @@ def validate_range(value: Union[int, float], field_name: str,
             f"Field '{field_name}' must be >= {min_value}, got {value}",
             field_name, value
         )
     if max_value is not None and value > max_value:
         raise ValidationError(
             f"Field '{field_name}' must be <= {max_value}, got {value}",
             field_name, value
         )
     return value
 def validate_choices(value: Any, field_name: str, choices: list) -> Any:
     """Validate that a value is one of the allowed choices
     Args:
         value: The value to validate
         field_name: Name of the field being validated
         choices: List of allowed values
     Returns:
         The validated value
     Raises:
         ValidationError: If value is not in choices
     """
@@ -209,5 +209,5 @@ def validate_choices(value: Any, field_name: str, choices: list) -> Any:
             f"Field '{field_name}' must be one of {choices}, got '{value}'",
             field_name, value
         )
     return value

 class ValidationError(Exception):
     """Custom exception for DTO validation errors"""
     def __init__(self, message: str, field: str = None, value: Any = None):
         self.message = message
         self.field = field
         self.value = value
         super().__init__(self.message)
     def __str__(self):
         if self.field:
             return f"Validation error for field '{self.field}': {self.message}"
 def validate_dto(dto_instance: Any) -> bool:
     """Validate a DTO instance
     Args:
         dto_instance: The DTO instance to validate
     Returns:
         bool: True if validation passes
     Raises:
         ValidationError: If validation fails
     """
         # Call the DTO's validation method if it exists
         if hasattr(dto_instance, '_validate'):
             dto_instance._validate()
         # Additional validation can be added here
+        logger.info(f"Successfully validated {type(dto_instance).__name__}")
         return True
     except ValueError as e:
         logger.error(f"Validation failed for {type(dto_instance).__name__}: {e}")
         raise ValidationError(str(e)) from e
 def validation_required(func: Callable[..., T]) -> Callable[..., T]:
     """Decorator to ensure DTO validation before method execution
     Args:
         func: The method to decorate
     Returns:
         Decorated function that validates 'self' before execution
     """
             raise
         except Exception as e:
             raise ValidationError(f"Error in {func.__name__}: {e}") from e
     return wrapper
+def validate_field(value: Any, field_name: str, validator: Callable[[Any], bool],
                   error_message: str = None) -> Any:
     """Validate a single field value
     Args:
         value: The value to validate
         field_name: Name of the field being validated
         validator: Function that returns True if value is valid
         error_message: Custom error message
     Returns:
         The validated value
     Raises:
         ValidationError: If validation fails
     """
 def validate_required(value: Any, field_name: str) -> Any:
     """Validate that a field is not None or empty
     Args:
         value: The value to validate
         field_name: Name of the field being validated
     Returns:
         The validated value
     Raises:
         ValidationError: If field is None or empty
     """
     if value is None:
         raise ValidationError(f"Field '{field_name}' is required", field_name, value)
     if isinstance(value, (str, list, dict)) and len(value) == 0:
         raise ValidationError(f"Field '{field_name}' cannot be empty", field_name, value)
     return value
 def validate_type(value: Any, field_name: str, expected_type: Union[type, tuple]) -> Any:
     """Validate that a field is of the expected type
     Args:
         value: The value to validate
         field_name: Name of the field being validated
         expected_type: Expected type or tuple of types
     Returns:
         The validated value
     Raises:
         ValidationError: If type doesn't match
     """
             expected_str = " or ".join(type_names)
         else:
             expected_str = expected_type.__name__
         actual_type = type(value).__name__
         raise ValidationError(
             f"Field '{field_name}' must be of type {expected_str}, got {actual_type}",
             field_name, value
         )
     return value
+def validate_range(value: Union[int, float], field_name: str,
+                  min_value: Union[int, float] = None,
                   max_value: Union[int, float] = None) -> Union[int, float]:
     """Validate that a numeric value is within a specified range
     Args:
         value: The numeric value to validate
         field_name: Name of the field being validated
         min_value: Minimum allowed value (inclusive)
         max_value: Maximum allowed value (inclusive)
     Returns:
         The validated value
     Raises:
         ValidationError: If value is outside the range
     """
             f"Field '{field_name}' must be >= {min_value}, got {value}",
             field_name, value
         )
     if max_value is not None and value > max_value:
         raise ValidationError(
             f"Field '{field_name}' must be <= {max_value}, got {value}",
             field_name, value
         )
     return value
 def validate_choices(value: Any, field_name: str, choices: list) -> Any:
     """Validate that a value is one of the allowed choices
     Args:
         value: The value to validate
         field_name: Name of the field being validated
         choices: List of allowed values
     Returns:
         The validated value
     Raises:
         ValidationError: If value is not in choices
     """
             f"Field '{field_name}' must be one of {choices}, got '{value}'",
             field_name, value
         )
     return value

src/application/error_handling/error_mapper.py CHANGED Viewed

@@ -262,7 +262,7 @@ class ErrorMapper:
             if context:
                 mapping = self._enhance_mapping_with_context(mapping, exception, context)
-            logger.debug(f"Mapped {type(exception).__name__} to {mapping.error_code}")
             return mapping
         except Exception as e:

             if context:
                 mapping = self._enhance_mapping_with_context(mapping, exception, context)
+            logger.info(f"Mapped {type(exception).__name__} to {mapping.error_code}")
             return mapping
         except Exception as e:

src/application/error_handling/structured_logger.py CHANGED Viewed

@@ -125,7 +125,7 @@ class StructuredLogger:
         if self.logger.isEnabledFor(logging.DEBUG):
             log_data = self._get_log_data(message, LogLevel.DEBUG.value, context, extra)
             # Use 'structured_data' to avoid conflicts with LogRecord attributes
-            self.logger.debug(message, extra={'structured_data': log_data})
     def info(self, message: str, context: Optional[LogContext] = None,
             extra: Optional[Dict[str, Any]] = None) -> None:

         if self.logger.isEnabledFor(logging.DEBUG):
             log_data = self._get_log_data(message, LogLevel.DEBUG.value, context, extra)
             # Use 'structured_data' to avoid conflicts with LogRecord attributes
+            self.logger.info(message, extra={'structured_data': log_data})
     def info(self, message: str, context: Optional[LogContext] = None,
             extra: Optional[Dict[str, Any]] = None) -> None:

src/application/services/configuration_service.py CHANGED Viewed

@@ -153,7 +153,7 @@ class ConfigurationApplicationService:
                     # Update the actual config object
                     if hasattr(self._config.tts, key):
                         setattr(self._config.tts, key, value)
-                        logger.debug(f"Updated TTS config: {key} = {value}")
                 else:
                     logger.warning(f"Unknown TTS configuration key: {key}")
@@ -192,7 +192,7 @@ class ConfigurationApplicationService:
                     # Update the actual config object
                     if hasattr(self._config.stt, key):
                         setattr(self._config.stt, key, value)
-                        logger.debug(f"Updated STT config: {key} = {value}")
                 else:
                     logger.warning(f"Unknown STT configuration key: {key}")
@@ -231,7 +231,7 @@ class ConfigurationApplicationService:
                     # Update the actual config object
                     if hasattr(self._config.translation, key):
                         setattr(self._config.translation, key, value)
-                        logger.debug(f"Updated translation config: {key} = {value}")
                 else:
                     logger.warning(f"Unknown translation configuration key: {key}")
@@ -270,7 +270,7 @@ class ConfigurationApplicationService:
                     # Update the actual config object
                     if hasattr(self._config.processing, key):
                         setattr(self._config.processing, key, value)
-                        logger.debug(f"Updated processing config: {key} = {value}")
                 else:
                     logger.warning(f"Unknown processing configuration key: {key}")

                     # Update the actual config object
                     if hasattr(self._config.tts, key):
                         setattr(self._config.tts, key, value)
+                        logger.info(f"Updated TTS config: {key} = {value}")
                 else:
                     logger.warning(f"Unknown TTS configuration key: {key}")
                     # Update the actual config object
                     if hasattr(self._config.stt, key):
                         setattr(self._config.stt, key, value)
+                        logger.info(f"Updated STT config: {key} = {value}")
                 else:
                     logger.warning(f"Unknown STT configuration key: {key}")
                     # Update the actual config object
                     if hasattr(self._config.translation, key):
                         setattr(self._config.translation, key, value)
+                        logger.info(f"Updated translation config: {key} = {value}")
                 else:
                     logger.warning(f"Unknown translation configuration key: {key}")
                     # Update the actual config object
                     if hasattr(self._config.processing, key):
                         setattr(self._config.processing, key, value)
+                        logger.info(f"Updated processing config: {key} = {value}")
                 else:
                     logger.warning(f"Unknown processing configuration key: {key}")

src/infrastructure/base/file_utils.py CHANGED Viewed

@@ -27,7 +27,7 @@ class FileManager:
             self.base_dir = Path(tempfile.gettempdir()) / "tts_app"
         self.base_dir.mkdir(exist_ok=True)
-        logger.debug(f"FileManager initialized with base directory: {self.base_dir}")
     def create_temp_file(self, suffix: str = ".tmp", prefix: str = "temp", content: bytes = None) -> Path:
         """
@@ -51,7 +51,7 @@ class FileManager:
         else:
             file_path.touch()
-        logger.debug(f"Created temporary file: {file_path}")
         return file_path
     def create_unique_filename(self, base_name: str, extension: str = "", content_hash: bool = False, content: bytes = None) -> str:
@@ -103,7 +103,7 @@ class FileManager:
         with open(file_path, 'wb') as f:
             f.write(audio_data)
-        logger.debug(f"Saved audio file: {file_path} ({len(audio_data)} bytes)")
         return file_path
     def save_text_file(self, text_content: str, encoding: str = "utf-8", prefix: str = "text") -> Path:
@@ -124,7 +124,7 @@ class FileManager:
         with open(file_path, 'w', encoding=encoding) as f:
             f.write(text_content)
-        logger.debug(f"Saved text file: {file_path} ({len(text_content)} characters)")
         return file_path
     def cleanup_file(self, file_path: Union[str, Path]) -> bool:
@@ -141,7 +141,7 @@ class FileManager:
             path = Path(file_path)
             if path.exists() and path.is_file():
                 path.unlink()
-                logger.debug(f"Cleaned up file: {path}")
                 return True
             return False
         except Exception as e:
@@ -223,7 +223,7 @@ class FileManager:
         """
         path = Path(dir_path)
         path.mkdir(parents=True, exist_ok=True)
-        logger.debug(f"Ensured directory exists: {path}")
         return path
     def get_disk_usage(self) -> dict:
@@ -282,7 +282,7 @@ class AudioFileGenerator:
                 wav_file.setframerate(sample_rate)
                 wav_file.writeframes(audio_data)
-            logger.debug(f"Saved WAV file: {path} (sample_rate={sample_rate}, channels={channels})")
             return path
         except Exception as e:
@@ -318,7 +318,7 @@ class AudioFileGenerator:
             sf.write(str(path), audio_array, sample_rate)
-            logger.debug(f"Converted numpy array to WAV: {path}")
             return path
         except ImportError:
@@ -406,4 +406,4 @@ class ErrorHandler:
             debug_msg += f" ({context})"
         debug_msg += f": {message}"
-        self.logger.debug(debug_msg)

             self.base_dir = Path(tempfile.gettempdir()) / "tts_app"
         self.base_dir.mkdir(exist_ok=True)
+        logger.info(f"FileManager initialized with base directory: {self.base_dir}")
     def create_temp_file(self, suffix: str = ".tmp", prefix: str = "temp", content: bytes = None) -> Path:
         """
         else:
             file_path.touch()
+        logger.info(f"Created temporary file: {file_path}")
         return file_path
     def create_unique_filename(self, base_name: str, extension: str = "", content_hash: bool = False, content: bytes = None) -> str:
         with open(file_path, 'wb') as f:
             f.write(audio_data)
+        logger.info(f"Saved audio file: {file_path} ({len(audio_data)} bytes)")
         return file_path
     def save_text_file(self, text_content: str, encoding: str = "utf-8", prefix: str = "text") -> Path:
         with open(file_path, 'w', encoding=encoding) as f:
             f.write(text_content)
+        logger.info(f"Saved text file: {file_path} ({len(text_content)} characters)")
         return file_path
     def cleanup_file(self, file_path: Union[str, Path]) -> bool:
             path = Path(file_path)
             if path.exists() and path.is_file():
                 path.unlink()
+                logger.info(f"Cleaned up file: {path}")
                 return True
             return False
         except Exception as e:
         """
         path = Path(dir_path)
         path.mkdir(parents=True, exist_ok=True)
+        logger.info(f"Ensured directory exists: {path}")
         return path
     def get_disk_usage(self) -> dict:
                 wav_file.setframerate(sample_rate)
                 wav_file.writeframes(audio_data)
+            logger.info(f"Saved WAV file: {path} (sample_rate={sample_rate}, channels={channels})")
             return path
         except Exception as e:
             sf.write(str(path), audio_array, sample_rate)
+            logger.info(f"Converted numpy array to WAV: {path}")
             return path
         except ImportError:
             debug_msg += f" ({context})"
         debug_msg += f": {message}"
+        self.logger.info(debug_msg)

src/infrastructure/base/stt_provider_base.py CHANGED Viewed

@@ -145,7 +145,7 @@ class STTProviderBase(ISpeechRecognitionService, ABC):
             # Convert to required format if needed
             processed_file = self._convert_audio_format(temp_file, audio)
-            logger.debug(f"Audio preprocessed and saved to: {processed_file}")
             return processed_file
         except Exception as e:
@@ -191,7 +191,7 @@ class STTProviderBase(ISpeechRecognitionService, ABC):
             # Export converted audio
             standardized_audio.export(output_path, format="wav")
-            logger.debug(f"Audio converted from {audio.format} to WAV: {output_path}")
             return output_path
         except ImportError:
@@ -273,7 +273,7 @@ class STTProviderBase(ISpeechRecognitionService, ABC):
         try:
             if file_path.exists():
                 file_path.unlink()
-                logger.debug(f"Cleaned up temp file: {file_path}")
         except Exception as e:
             logger.warning(f"Failed to cleanup temp file {file_path}: {str(e)}")
@@ -294,7 +294,7 @@ class STTProviderBase(ISpeechRecognitionService, ABC):
                     file_age = current_time - file_path.stat().st_mtime
                     if file_age > max_age_seconds:
                         file_path.unlink()
-                        logger.debug(f"Cleaned up old temp file: {file_path}")
         except Exception as e:
             logger.warning(f"Failed to cleanup old temp files: {str(e)}")

             # Convert to required format if needed
             processed_file = self._convert_audio_format(temp_file, audio)
+            logger.info(f"Audio preprocessed and saved to: {processed_file}")
             return processed_file
         except Exception as e:
             # Export converted audio
             standardized_audio.export(output_path, format="wav")
+            logger.info(f"Audio converted from {audio.format} to WAV: {output_path}")
             return output_path
         except ImportError:
         try:
             if file_path.exists():
                 file_path.unlink()
+                logger.info(f"Cleaned up temp file: {file_path}")
         except Exception as e:
             logger.warning(f"Failed to cleanup temp file {file_path}: {str(e)}")
                     file_age = current_time - file_path.stat().st_mtime
                     if file_age > max_age_seconds:
                         file_path.unlink()
+                        logger.info(f"Cleaned up old temp file: {file_path}")
         except Exception as e:
             logger.warning(f"Failed to cleanup old temp files: {str(e)}")

src/infrastructure/base/translation_provider_base.py CHANGED Viewed

@@ -56,7 +56,7 @@ class TranslationProviderBase(ITranslationService, ABC):
             # Translate each chunk
             translated_chunks = []
             for i, chunk in enumerate(text_chunks):
-                logger.debug(f"Translating chunk {i+1}/{len(text_chunks)}")
                 translated_chunk = self._translate_chunk(
                     chunk,
                     request.source_text.language,
@@ -160,7 +160,7 @@ class TranslationProviderBase(ITranslationService, ABC):
         if current_chunk.strip():
             chunks.append(current_chunk.strip())
-        logger.debug(f"Text chunked into {len(chunks)} pieces")
         return chunks
     def _split_into_sentences(self, text: str) -> List[str]:

             # Translate each chunk
             translated_chunks = []
             for i, chunk in enumerate(text_chunks):
+                logger.info(f"Translating chunk {i+1}/{len(text_chunks)}")
                 translated_chunk = self._translate_chunk(
                     chunk,
                     request.source_text.language,
         if current_chunk.strip():
             chunks.append(current_chunk.strip())
+        logger.info(f"Text chunked into {len(chunks)} pieces")
         return chunks
     def _split_into_sentences(self, text: str) -> List[str]:

src/infrastructure/base/tts_provider_base.py CHANGED Viewed

@@ -322,7 +322,7 @@ class TTSProviderBase(ISpeechSynthesisService, ABC):
                     file_age = current_time - file_path.stat().st_mtime
                     if file_age > max_age_seconds:
                         file_path.unlink()
-                        logger.debug(f"Cleaned up old temp file: {file_path}")
         except Exception as e:
             logger.warning(f"Failed to cleanup temp files: {str(e)}")

                     file_age = current_time - file_path.stat().st_mtime
                     if file_age > max_age_seconds:
                         file_path.unlink()
+                        logger.info(f"Cleaned up old temp file: {file_path}")
         except Exception as e:
             logger.warning(f"Failed to cleanup temp files: {str(e)}")

src/infrastructure/config/app_config.py CHANGED Viewed

@@ -73,14 +73,14 @@ class AppConfig:
         """
         self.config_file = config_file
         self._config_data: Dict[str, Any] = {}
         # Initialize configuration sections
         self.tts = TTSConfig()
         self.stt = STTConfig()
         self.translation = TranslationConfig()
         self.processing = ProcessingConfig()
         self.logging = LoggingConfig()
         # Load configuration
         self._load_configuration()
@@ -89,16 +89,16 @@ class AppConfig:
         try:
             # Load from environment variables first
             self._load_from_environment()
             # Load from config file if provided
             if self.config_file and os.path.exists(self.config_file):
                 self._load_from_file()
             # Validate configuration
             self._validate_configuration()
             logger.info("Configuration loaded successfully")
         except Exception as e:
             logger.error(f"Failed to load configuration: {e}")
             # Use default configuration
@@ -158,7 +158,7 @@ class AppConfig:
         """Load configuration from file (JSON or YAML)."""
         try:
             import json
             with open(self.config_file, 'r') as f:
                 if self.config_file.endswith('.json'):
                     self._config_data = json.load(f)
@@ -175,7 +175,7 @@ class AppConfig:
             # Apply configuration from file
             self._apply_config_data()
         except Exception as e:
             logger.error(f"Failed to load config file {self.config_file}: {e}")

         """
         self.config_file = config_file
         self._config_data: Dict[str, Any] = {}
         # Initialize configuration sections
         self.tts = TTSConfig()
         self.stt = STTConfig()
         self.translation = TranslationConfig()
         self.processing = ProcessingConfig()
         self.logging = LoggingConfig()
         # Load configuration
         self._load_configuration()
         try:
             # Load from environment variables first
             self._load_from_environment()
             # Load from config file if provided
             if self.config_file and os.path.exists(self.config_file):
                 self._load_from_file()
             # Validate configuration
             self._validate_configuration()
             logger.info("Configuration loaded successfully")
         except Exception as e:
             logger.error(f"Failed to load configuration: {e}")
             # Use default configuration
         """Load configuration from file (JSON or YAML)."""
         try:
             import json
             with open(self.config_file, 'r') as f:
                 if self.config_file.endswith('.json'):
                     self._config_data = json.load(f)
             # Apply configuration from file
             self._apply_config_data()
         except Exception as e:
             logger.error(f"Failed to load config file {self.config_file}: {e}")

src/infrastructure/config/dependency_container.py CHANGED Viewed

@@ -309,19 +309,25 @@ class DependencyContainer:
         Returns:
             ISpeechSynthesisService: TTS provider instance
         """
         factory = self.resolve(TTSProviderFactory)
         if provider_name:
             try:
-                return factory.create_provider(provider_name, **kwargs)
             except Exception as e:
-                logger.warning(f"Failed to create specific TTS provider {provider_name}: {e}")
-                logger.info("Falling back to default provider selection")
                 # Fall back to default provider selection
                 preferred_providers = self._config.tts.preferred_providers
                 return factory.get_provider_with_fallback(preferred_providers, **kwargs)
         else:
             preferred_providers = self._config.tts.preferred_providers
             return factory.get_provider_with_fallback(preferred_providers, **kwargs)
     def get_stt_provider(self, provider_name: Optional[str] = None) -> ISpeechRecognitionService:

         Returns:
             ISpeechSynthesisService: TTS provider instance
         """
+        logger.info(f"🎯 Requesting TTS provider: {provider_name or 'default'}")
         factory = self.resolve(TTSProviderFactory)
         if provider_name:
+            logger.info(f"🔧 Attempting to create specific TTS provider: {provider_name}")
             try:
+                provider = factory.create_provider(provider_name, **kwargs)
+                logger.info(f"✅ Successfully created TTS provider: {provider_name}")
+                return provider
             except Exception as e:
+                logger.warning(f"❌ Failed to create specific TTS provider {provider_name}: {e}")
+                logger.info("🔄 Falling back to default provider selection")
                 # Fall back to default provider selection
                 preferred_providers = self._config.tts.preferred_providers
+                logger.info(f"📋 Preferred providers for fallback: {preferred_providers}")
                 return factory.get_provider_with_fallback(preferred_providers, **kwargs)
         else:
             preferred_providers = self._config.tts.preferred_providers
+            logger.info(f"📋 Using preferred providers: {preferred_providers}")
             return factory.get_provider_with_fallback(preferred_providers, **kwargs)
     def get_stt_provider(self, provider_name: Optional[str] = None) -> ISpeechRecognitionService:

src/infrastructure/stt/legacy_compatibility.py CHANGED Viewed

@@ -14,37 +14,37 @@ logger = logging.getLogger(__name__)
 def transcribe_audio(audio_path: Union[str, Path], model_name: str = "parakeet") -> str:
     """
     Convert audio file to text using specified STT model (legacy interface).
     This function maintains backward compatibility with the original utils/stt.py interface.
     Args:
         audio_path: Path to input audio file
         model_name: Name of the STT model/provider to use (whisper or parakeet)
     Returns:
         str: Transcribed English text
     Raises:
         SpeechRecognitionException: If transcription fails
     """
     logger.info(f"Starting transcription for: {audio_path} using {model_name} model")
     try:
         # Convert path to Path object
         audio_path = Path(audio_path)
         if not audio_path.exists():
             raise SpeechRecognitionException(f"Audio file not found: {audio_path}")
         # Read audio file and create AudioContent
         with open(audio_path, 'rb') as f:
             audio_data = f.read()
         # Determine audio format from file extension
         audio_format = audio_path.suffix.lower().lstrip('.')
         if audio_format not in ['wav', 'mp3', 'flac', 'ogg']:
             audio_format = 'wav'  # Default fallback
         # Create AudioContent (we'll use reasonable placeholder values)
         # The provider will handle the actual audio analysis during preprocessing
         try:
@@ -64,7 +64,7 @@ def transcribe_audio(audio_path: Union[str, Path], model_name: str = "parakeet")
                 duration=1.0,  # Minimum valid duration
                 filename=audio_path.name
             )
         # Get the appropriate provider
         try:
             provider = STTProviderFactory.create_provider(model_name)
@@ -72,14 +72,14 @@ def transcribe_audio(audio_path: Union[str, Path], model_name: str = "parakeet")
             # Fallback to any available provider
             logger.warning(f"Requested provider {model_name} not available, using fallback")
             provider = STTProviderFactory.create_provider_with_fallback(model_name)
         # Get the default model for the provider
         model = provider.get_default_model()
         # Transcribe audio
         text_content = provider.transcribe(audio_content, model)
         result = text_content.text
         logger.info(f"Transcription completed: {result}")
         return result
@@ -91,33 +91,33 @@ def transcribe_audio(audio_path: Union[str, Path], model_name: str = "parakeet")
 def create_audio_content_from_file(audio_path: Union[str, Path]) -> AudioContent:
     """
     Create AudioContent from an audio file with proper metadata detection.
     Args:
         audio_path: Path to the audio file
     Returns:
         AudioContent: The audio content object
     Raises:
         SpeechRecognitionException: If file cannot be processed
     """
     try:
         from pydub import AudioSegment
         audio_path = Path(audio_path)
         # Load audio file to get metadata
         audio_segment = AudioSegment.from_file(audio_path)
         # Read raw audio data
         with open(audio_path, 'rb') as f:
             audio_data = f.read()
         # Determine format
         audio_format = audio_path.suffix.lower().lstrip('.')
         if audio_format not in ['wav', 'mp3', 'flac', 'ogg']:
             audio_format = 'wav'
         # Create AudioContent with actual metadata
         return AudioContent(
             data=audio_data,
@@ -126,18 +126,18 @@ def create_audio_content_from_file(audio_path: Union[str, Path]) -> AudioContent
             duration=len(audio_segment) / 1000.0,  # Convert ms to seconds
             filename=audio_path.name
         )
     except ImportError:
         # Fallback without pydub
         logger.warning("pydub not available, using placeholder metadata")
         with open(audio_path, 'rb') as f:
             audio_data = f.read()
         audio_format = Path(audio_path).suffix.lower().lstrip('.')
         if audio_format not in ['wav', 'mp3', 'flac', 'ogg']:
             audio_format = 'wav'
         return AudioContent(
             data=audio_data,
             format=audio_format,
@@ -145,6 +145,6 @@ def create_audio_content_from_file(audio_path: Union[str, Path]) -> AudioContent
             duration=1.0,  # Placeholder
             filename=Path(audio_path).name
         )
     except Exception as e:
         raise SpeechRecognitionException(f"Failed to create AudioContent from file: {str(e)}") from e

 def transcribe_audio(audio_path: Union[str, Path], model_name: str = "parakeet") -> str:
     """
     Convert audio file to text using specified STT model (legacy interface).
     This function maintains backward compatibility with the original utils/stt.py interface.
     Args:
         audio_path: Path to input audio file
         model_name: Name of the STT model/provider to use (whisper or parakeet)
     Returns:
         str: Transcribed English text
     Raises:
         SpeechRecognitionException: If transcription fails
     """
     logger.info(f"Starting transcription for: {audio_path} using {model_name} model")
     try:
         # Convert path to Path object
         audio_path = Path(audio_path)
         if not audio_path.exists():
             raise SpeechRecognitionException(f"Audio file not found: {audio_path}")
         # Read audio file and create AudioContent
         with open(audio_path, 'rb') as f:
             audio_data = f.read()
         # Determine audio format from file extension
         audio_format = audio_path.suffix.lower().lstrip('.')
         if audio_format not in ['wav', 'mp3', 'flac', 'ogg']:
             audio_format = 'wav'  # Default fallback
         # Create AudioContent (we'll use reasonable placeholder values)
         # The provider will handle the actual audio analysis during preprocessing
         try:
                 duration=1.0,  # Minimum valid duration
                 filename=audio_path.name
             )
         # Get the appropriate provider
         try:
             provider = STTProviderFactory.create_provider(model_name)
             # Fallback to any available provider
             logger.warning(f"Requested provider {model_name} not available, using fallback")
             provider = STTProviderFactory.create_provider_with_fallback(model_name)
         # Get the default model for the provider
         model = provider.get_default_model()
         # Transcribe audio
         text_content = provider.transcribe(audio_content, model)
         result = text_content.text
         logger.info(f"Transcription completed: {result}")
         return result
 def create_audio_content_from_file(audio_path: Union[str, Path]) -> AudioContent:
     """
     Create AudioContent from an audio file with proper metadata detection.
     Args:
         audio_path: Path to the audio file
     Returns:
         AudioContent: The audio content object
     Raises:
         SpeechRecognitionException: If file cannot be processed
     """
     try:
         from pydub import AudioSegment
         audio_path = Path(audio_path)
         # Load audio file to get metadata
         audio_segment = AudioSegment.from_file(audio_path)
         # Read raw audio data
         with open(audio_path, 'rb') as f:
             audio_data = f.read()
         # Determine format
         audio_format = audio_path.suffix.lower().lstrip('.')
         if audio_format not in ['wav', 'mp3', 'flac', 'ogg']:
             audio_format = 'wav'
         # Create AudioContent with actual metadata
         return AudioContent(
             data=audio_data,
             duration=len(audio_segment) / 1000.0,  # Convert ms to seconds
             filename=audio_path.name
         )
     except ImportError:
         # Fallback without pydub
         logger.warning("pydub not available, using placeholder metadata")
         with open(audio_path, 'rb') as f:
             audio_data = f.read()
         audio_format = Path(audio_path).suffix.lower().lstrip('.')
         if audio_format not in ['wav', 'mp3', 'flac', 'ogg']:
             audio_format = 'wav'
         return AudioContent(
             data=audio_data,
             format=audio_format,
             duration=1.0,  # Placeholder
             filename=Path(audio_path).name
         )
     except Exception as e:
         raise SpeechRecognitionException(f"Failed to create AudioContent from file: {str(e)}") from e

src/infrastructure/stt/parakeet_provider.py CHANGED Viewed

@@ -42,11 +42,11 @@ class ParakeetSTTProvider(STTProviderBase):
                 self._load_model(model)
             logger.info(f"Starting Parakeet transcription with model {model}")
             # Perform transcription
             output = self.model.transcribe([str(audio_path)])
             result = output[0].text if output and len(output) > 0 else ""
             logger.info("Parakeet transcription completed successfully")
             return result
@@ -62,9 +62,9 @@ class ParakeetSTTProvider(STTProviderBase):
         """
         try:
             import nemo.collections.asr as nemo_asr
             logger.info(f"Loading Parakeet model: {model_name}")
             # Map model names to actual model identifiers
             model_mapping = {
                 "parakeet-tdt-0.6b-v2": "nvidia/parakeet-tdt-0.6b-v2",
@@ -72,12 +72,12 @@ class ParakeetSTTProvider(STTProviderBase):
                 "parakeet-ctc-0.6b": "nvidia/parakeet-ctc-0.6b",
                 "default": "nvidia/parakeet-tdt-0.6b-v2"
             }
             actual_model_name = model_mapping.get(model_name, model_mapping["default"])
             self.model = nemo_asr.models.ASRModel.from_pretrained(model_name=actual_model_name)
             logger.info(f"Parakeet model {model_name} loaded successfully")
         except ImportError as e:
             raise SpeechRecognitionException(
                 "nemo_toolkit not available. Please install with: pip install -U 'nemo_toolkit[asr]'"
@@ -108,7 +108,7 @@ class ParakeetSTTProvider(STTProviderBase):
         """
         return [
             "parakeet-tdt-0.6b-v2",
-            "parakeet-tdt-1.1b",
             "parakeet-ctc-0.6b"
         ]

                 self._load_model(model)
             logger.info(f"Starting Parakeet transcription with model {model}")
             # Perform transcription
             output = self.model.transcribe([str(audio_path)])
             result = output[0].text if output and len(output) > 0 else ""
             logger.info("Parakeet transcription completed successfully")
             return result
         """
         try:
             import nemo.collections.asr as nemo_asr
             logger.info(f"Loading Parakeet model: {model_name}")
             # Map model names to actual model identifiers
             model_mapping = {
                 "parakeet-tdt-0.6b-v2": "nvidia/parakeet-tdt-0.6b-v2",
                 "parakeet-ctc-0.6b": "nvidia/parakeet-ctc-0.6b",
                 "default": "nvidia/parakeet-tdt-0.6b-v2"
             }
             actual_model_name = model_mapping.get(model_name, model_mapping["default"])
             self.model = nemo_asr.models.ASRModel.from_pretrained(model_name=actual_model_name)
             logger.info(f"Parakeet model {model_name} loaded successfully")
         except ImportError as e:
             raise SpeechRecognitionException(
                 "nemo_toolkit not available. Please install with: pip install -U 'nemo_toolkit[asr]'"
         """
         return [
             "parakeet-tdt-0.6b-v2",
+            "parakeet-tdt-1.1b",
             "parakeet-ctc-0.6b"
         ]

src/infrastructure/stt/provider_factory.py CHANGED Viewed

@@ -36,21 +36,21 @@ class STTProviderFactory:
             SpeechRecognitionException: If provider is not available or creation fails
         """
         provider_name = provider_name.lower()
         if provider_name not in cls._providers:
             raise SpeechRecognitionException(f"Unknown STT provider: {provider_name}")
         provider_class = cls._providers[provider_name]
         try:
             provider = provider_class()
             if not provider.is_available():
                 raise SpeechRecognitionException(f"STT provider {provider_name} is not available")
             logger.info(f"Created STT provider: {provider_name}")
             return provider
         except Exception as e:
             logger.error(f"Failed to create STT provider {provider_name}: {str(e)}")
             raise SpeechRecognitionException(f"Failed to create STT provider {provider_name}: {str(e)}") from e
@@ -79,7 +79,7 @@ class STTProviderFactory:
         for provider_name in cls._fallback_order:
             if provider_name.lower() == preferred_provider.lower():
                 continue  # Skip the preferred provider we already tried
             try:
                 logger.info(f"Trying fallback STT provider: {provider_name}")
                 return cls.create_provider(provider_name)
@@ -98,15 +98,15 @@ class STTProviderFactory:
             list[str]: List of available provider names
         """
         available = []
         for provider_name, provider_class in cls._providers.items():
             try:
                 provider = provider_class()
                 if provider.is_available():
                     available.append(provider_name)
             except Exception as e:
-                logger.debug(f"Provider {provider_name} not available: {str(e)}")
         return available
     @classmethod
@@ -121,12 +121,12 @@ class STTProviderFactory:
             Optional[dict]: Provider information or None if not found
         """
         provider_name = provider_name.lower()
         if provider_name not in cls._providers:
             return None
         provider_class = cls._providers[provider_name]
         try:
             provider = provider_class()
             return {
@@ -137,7 +137,7 @@ class STTProviderFactory:
                 "default_model": provider.get_default_model() if provider.is_available() else None
             }
         except Exception as e:
-            logger.debug(f"Failed to get info for provider {provider_name}: {str(e)}")
             return {
                 "name": provider_name,
                 "available": False,
@@ -160,15 +160,15 @@ class STTProviderFactory:
 # Legacy compatibility - create an ASRFactory alias
 class ASRFactory:
     """Legacy ASRFactory for backward compatibility."""
     @staticmethod
     def get_model(model_name: str = "parakeet") -> STTProviderBase:
         """
         Get STT provider by model name (legacy interface).
         Args:
             model_name: Name of the model/provider to use
         Returns:
             STTProviderBase: The provider instance
         """
@@ -178,9 +178,9 @@ class ASRFactory:
             "parakeet": "parakeet",
             "faster-whisper": "whisper"
         }
         provider_name = provider_mapping.get(model_name.lower(), model_name.lower())
         try:
             return STTProviderFactory.create_provider(provider_name)
         except SpeechRecognitionException:

             SpeechRecognitionException: If provider is not available or creation fails
         """
         provider_name = provider_name.lower()
         if provider_name not in cls._providers:
             raise SpeechRecognitionException(f"Unknown STT provider: {provider_name}")
         provider_class = cls._providers[provider_name]
         try:
             provider = provider_class()
             if not provider.is_available():
                 raise SpeechRecognitionException(f"STT provider {provider_name} is not available")
             logger.info(f"Created STT provider: {provider_name}")
             return provider
         except Exception as e:
             logger.error(f"Failed to create STT provider {provider_name}: {str(e)}")
             raise SpeechRecognitionException(f"Failed to create STT provider {provider_name}: {str(e)}") from e
         for provider_name in cls._fallback_order:
             if provider_name.lower() == preferred_provider.lower():
                 continue  # Skip the preferred provider we already tried
             try:
                 logger.info(f"Trying fallback STT provider: {provider_name}")
                 return cls.create_provider(provider_name)
             list[str]: List of available provider names
         """
         available = []
         for provider_name, provider_class in cls._providers.items():
             try:
                 provider = provider_class()
                 if provider.is_available():
                     available.append(provider_name)
             except Exception as e:
+                logger.info(f"Provider {provider_name} not available: {str(e)}")
         return available
     @classmethod
             Optional[dict]: Provider information or None if not found
         """
         provider_name = provider_name.lower()
         if provider_name not in cls._providers:
             return None
         provider_class = cls._providers[provider_name]
         try:
             provider = provider_class()
             return {
                 "default_model": provider.get_default_model() if provider.is_available() else None
             }
         except Exception as e:
+            logger.info(f"Failed to get info for provider {provider_name}: {str(e)}")
             return {
                 "name": provider_name,
                 "available": False,
 # Legacy compatibility - create an ASRFactory alias
 class ASRFactory:
     """Legacy ASRFactory for backward compatibility."""
     @staticmethod
     def get_model(model_name: str = "parakeet") -> STTProviderBase:
         """
         Get STT provider by model name (legacy interface).
         Args:
             model_name: Name of the model/provider to use
         Returns:
             STTProviderBase: The provider instance
         """
             "parakeet": "parakeet",
             "faster-whisper": "whisper"
         }
         provider_name = provider_mapping.get(model_name.lower(), model_name.lower())
         try:
             return STTProviderFactory.create_provider(provider_name)
         except SpeechRecognitionException:

src/infrastructure/stt/whisper_provider.py CHANGED Viewed

@@ -36,7 +36,7 @@ class WhisperSTTProvider(STTProviderBase):
         except ImportError:
             # Fallback to CPU if torch is not available
             self._device = "cpu"
         self._compute_type = "float16" if self._device == "cuda" else "int8"
         logger.info(f"Whisper provider initialized with device: {self._device}, compute_type: {self._compute_type}")
@@ -57,7 +57,7 @@ class WhisperSTTProvider(STTProviderBase):
                 self._load_model(model)
             logger.info(f"Starting Whisper transcription with model {model}")
             # Perform transcription
             segments, info = self.model.transcribe(
                 str(audio_path),
@@ -72,7 +72,7 @@ class WhisperSTTProvider(STTProviderBase):
             result_text = ""
             for segment in segments:
                 result_text += segment.text + " "
-                logger.debug(f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}")
             result = result_text.strip()
             logger.info("Whisper transcription completed successfully")
@@ -90,18 +90,18 @@ class WhisperSTTProvider(STTProviderBase):
         """
         try:
             from faster_whisper import WhisperModel as FasterWhisperModel
             logger.info(f"Loading Whisper model: {model_name}")
             logger.info(f"Using device: {self._device}, compute_type: {self._compute_type}")
             self.model = FasterWhisperModel(
                 model_name,
                 device=self._device,
                 compute_type=self._compute_type
             )
             logger.info(f"Whisper model {model_name} loaded successfully")
         except ImportError as e:
             raise SpeechRecognitionException(
                 "faster-whisper not available. Please install with: pip install faster-whisper"
@@ -134,7 +134,7 @@ class WhisperSTTProvider(STTProviderBase):
             "tiny",
             "tiny.en",
             "base",
-            "base.en",
             "small",
             "small.en",
             "medium",

         except ImportError:
             # Fallback to CPU if torch is not available
             self._device = "cpu"
         self._compute_type = "float16" if self._device == "cuda" else "int8"
         logger.info(f"Whisper provider initialized with device: {self._device}, compute_type: {self._compute_type}")
                 self._load_model(model)
             logger.info(f"Starting Whisper transcription with model {model}")
             # Perform transcription
             segments, info = self.model.transcribe(
                 str(audio_path),
             result_text = ""
             for segment in segments:
                 result_text += segment.text + " "
+                logger.info(f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}")
             result = result_text.strip()
             logger.info("Whisper transcription completed successfully")
         """
         try:
             from faster_whisper import WhisperModel as FasterWhisperModel
             logger.info(f"Loading Whisper model: {model_name}")
             logger.info(f"Using device: {self._device}, compute_type: {self._compute_type}")
             self.model = FasterWhisperModel(
                 model_name,
                 device=self._device,
                 compute_type=self._compute_type
             )
             logger.info(f"Whisper model {model_name} loaded successfully")
         except ImportError as e:
             raise SpeechRecognitionException(
                 "faster-whisper not available. Please install with: pip install faster-whisper"
             "tiny",
             "tiny.en",
             "base",
+            "base.en",
             "small",
             "small.en",
             "medium",

src/infrastructure/translation/nllb_provider.py CHANGED Viewed

@@ -430,7 +430,7 @@ class NLLBTranslationProvider(TranslationProviderBase):
             # For simplicity, assume all languages can translate to all other languages
             # In practice, you might want to be more specific about supported pairs
             supported_languages[lang_code] = [
-                target for target in self.LANGUAGE_MAPPINGS.keys()
                 if target != lang_code
             ]
@@ -465,7 +465,7 @@ class NLLBTranslationProvider(TranslationProviderBase):
             source_nllb = self._map_language_code(source_language)
             target_nllb = self._map_language_code(target_language)
-            logger.debug(f"Translating chunk from {source_nllb} to {target_nllb}")
             # Tokenize with source language specification
             inputs = self._tokenizer(
@@ -490,7 +490,7 @@ class NLLBTranslationProvider(TranslationProviderBase):
             # Post-process the translation
             translated = self._postprocess_text(translated)
-            logger.debug(f"Chunk translation completed: {len(text)} -> {len(translated)} chars")
             return translated
         except Exception as e:

             # For simplicity, assume all languages can translate to all other languages
             # In practice, you might want to be more specific about supported pairs
             supported_languages[lang_code] = [
+                target for target in self.LANGUAGE_MAPPINGS.keys()
                 if target != lang_code
             ]
             source_nllb = self._map_language_code(source_language)
             target_nllb = self._map_language_code(target_language)
+            logger.info(f"Translating chunk from {source_nllb} to {target_nllb}")
             # Tokenize with source language specification
             inputs = self._tokenizer(
             # Post-process the translation
             translated = self._postprocess_text(translated)
+            logger.info(f"Chunk translation completed: {len(text)} -> {len(translated)} chars")
             return translated
         except Exception as e:

src/infrastructure/translation/provider_factory.py CHANGED Viewed

@@ -67,7 +67,7 @@ class TranslationProviderFactory:
             # Return cached instance if available and requested
             if use_cache and cache_key in self._provider_cache:
-                logger.debug(f"Returning cached {provider_type.value} provider")
                 return self._provider_cache[cache_key]
             # Check if provider type is registered
@@ -86,7 +86,7 @@ class TranslationProviderFactory:
                 final_config.update(config)
             logger.info(f"Creating {provider_type.value} translation provider")
-            logger.debug(f"Provider config: {final_config}")
             # Create provider instance
             provider = provider_class(**final_config)
@@ -258,7 +258,7 @@ class TranslationProviderFactory:
             # Cache the result
             self._availability_cache[provider_type] = is_available
-            logger.debug(f"Provider {provider_type.value} availability: {is_available}")
             return is_available
         except Exception as e:

             # Return cached instance if available and requested
             if use_cache and cache_key in self._provider_cache:
+                logger.info(f"Returning cached {provider_type.value} provider")
                 return self._provider_cache[cache_key]
             # Check if provider type is registered
                 final_config.update(config)
             logger.info(f"Creating {provider_type.value} translation provider")
+            logger.info(f"Provider config: {final_config}")
             # Create provider instance
             provider = provider_class(**final_config)
             # Cache the result
             self._availability_cache[provider_type] = is_available
+            logger.info(f"Provider {provider_type.value} availability: {is_available}")
             return is_available
         except Exception as e:

src/infrastructure/tts/dia_provider.py CHANGED Viewed

@@ -19,19 +19,70 @@ DIA_AVAILABLE = False
 DEFAULT_SAMPLE_RATE = 24000
 # Try to import Dia dependencies
-try:
-    import torch
-    from dia.model import Dia
-    DIA_AVAILABLE = True
-    logger.info("Dia TTS engine is available")
-except ImportError:
-    logger.warning("Dia TTS engine is not available")
-except ModuleNotFoundError as e:
-    if "dac" in str(e):
-        logger.warning("Dia TTS engine is not available due to missing 'dac' module")
-    else:
-        logger.warning(f"Dia TTS engine is not available: {str(e)}")
-    DIA_AVAILABLE = False
 class DiaTTSProvider(TTSProviderBase):
@@ -48,26 +99,58 @@ class DiaTTSProvider(TTSProviderBase):
     def _ensure_model(self):
         """Ensure the model is loaded."""
-        if self.model is None and DIA_AVAILABLE:
-            try:
-                import torch
-                from dia.model import Dia
-                self.model = Dia.from_pretrained()
-                logger.info("Dia model successfully loaded")
-            except ImportError as e:
-                logger.error(f"Failed to import Dia dependencies: {str(e)}")
-                self.model = None
-            except FileNotFoundError as e:
-                logger.error(f"Failed to load Dia model files: {str(e)}")
-                self.model = None
-            except Exception as e:
-                logger.error(f"Failed to initialize Dia model: {str(e)}")
-                self.model = None
-        return self.model is not None
     def is_available(self) -> bool:
         """Check if Dia TTS is available."""
-        return DIA_AVAILABLE and self._ensure_model()
     def get_available_voices(self) -> list[str]:
         """Get available voices for Dia."""
@@ -81,7 +164,7 @@ class DiaTTSProvider(TTSProviderBase):
         try:
             import torch
             # Extract parameters from request
             text = request.text_content.text
@@ -120,7 +203,7 @@ class DiaTTSProvider(TTSProviderBase):
         try:
             import torch
             # Extract parameters from request
             text = request.text_content.text
@@ -158,13 +241,13 @@ class DiaTTSProvider(TTSProviderBase):
         try:
             # Create an in-memory buffer
             buffer = io.BytesIO()
             # Write audio data to buffer as WAV
             sf.write(buffer, audio_array, sample_rate, format='WAV')
             # Get bytes from buffer
             buffer.seek(0)
             return buffer.read()
         except Exception as e:
             raise SpeechSynthesisException(f"Failed to convert audio to bytes: {str(e)}") from e

 DEFAULT_SAMPLE_RATE = 24000
 # Try to import Dia dependencies
+def _check_and_install_dia_dependencies():
+    """Check and install Dia dependencies if needed."""
+    global DIA_AVAILABLE
+    logger.info("🔍 Checking Dia TTS dependencies...")
+    try:
+        logger.info("Attempting to import torch...")
+        import torch
+        logger.info("✓ Successfully imported torch")
+        logger.info("Attempting to import dia.model...")
+        from dia.model import Dia
+        logger.info("✓ Successfully imported dia.model")
+        DIA_AVAILABLE = True
+        logger.info("✅ Dia TTS engine is available")
+        return True
+    except ImportError as e:
+        logger.warning(f"⚠️ Dia TTS engine dependencies not available: {e}")
+        logger.info(f"ImportError details: {type(e).__name__}: {e}")
+    except ModuleNotFoundError as e:
+        if "dac" in str(e):
+            logger.warning("❌ Dia TTS engine is not available due to missing 'dac' module")
+        elif "dia" in str(e):
+            logger.warning("❌ Dia TTS engine is not available due to missing 'dia' module")
+        else:
+            logger.warning(f"❌ Dia TTS engine is not available: {str(e)}")
+        logger.info(f"ModuleNotFoundError details: {type(e).__name__}: {e}")
+    # Try to install missing dependencies
+    logger.info("🔧 Attempting to install Dia TTS dependencies...")
+    try:
+        installer = get_dependency_installer()
+        success, errors = installer.install_dia_dependencies()
+        if success:
+            logger.info("✅ Successfully installed Dia TTS dependencies")
+            # Try importing again after installation
+            try:
+                logger.info("Re-attempting import after installation...")
+                import torch
+                from dia.model import Dia
+                DIA_AVAILABLE = True
+                logger.info("🎉 Dia TTS engine is now available after installation")
+                return True
+            except Exception as e:
+                logger.error(f"❌ Dia TTS still not available after installation: {e}")
+                logger.info(f"Post-installation import error: {type(e).__name__}: {e}")
+                DIA_AVAILABLE = False
+                return False
+        else:
+            logger.error(f"❌ Failed to install Dia TTS dependencies: {errors}")
+            DIA_AVAILABLE = False
+            return False
+    except Exception as e:
+        logger.error(f"❌ Error during dependency installation: {e}")
+        logger.info(f"Installation error details: {type(e).__name__}: {e}")
+        DIA_AVAILABLE = False
+        return False
+# Initial check
+logger.info("🚀 Initializing Dia TTS provider...")
+_check_and_install_dia_dependencies()
 class DiaTTSProvider(TTSProviderBase):
     def _ensure_model(self):
         """Ensure the model is loaded."""
+        global DIA_AVAILABLE
+        if self.model is None:
+            logger.info("🔄 Ensuring Dia model is loaded...")
+            # If Dia is not available, try to install dependencies
+            if not DIA_AVAILABLE:
+                logger.info("⚠️ Dia not available, attempting to install dependencies...")
+                if _check_and_install_dia_dependencies():
+                    DIA_AVAILABLE = True
+                    logger.info("✅ Dependencies installed, Dia is now available")
+                else:
+                    logger.error("❌ Failed to install dependencies, Dia remains unavailable")
+                    return False
+            if DIA_AVAILABLE:
+                try:
+                    logger.info("📥 Loading Dia model from pretrained...")
+                    import torch
+                    from dia.model import Dia
+                    self.model = Dia.from_pretrained()
+                    logger.info("🎉 Dia model successfully loaded")
+                except ImportError as e:
+                    logger.error(f"❌ Failed to import Dia dependencies: {str(e)}")
+                    self.model = None
+                except FileNotFoundError as e:
+                    logger.error(f"❌ Failed to load Dia model files: {str(e)}")
+                    logger.info("ℹ️ This might be the first time loading the model. It will be downloaded automatically.")
+                    self.model = None
+                except Exception as e:
+                    logger.error(f"❌ Failed to initialize Dia model: {str(e)}")
+                    logger.info(f"Model initialization error: {type(e).__name__}: {e}")
+                    self.model = None
+        is_available = self.model is not None
+        logger.info(f"Model availability check result: {is_available}")
+        return is_available
     def is_available(self) -> bool:
         """Check if Dia TTS is available."""
+        logger.info(f"🔍 Checking Dia availability: DIA_AVAILABLE={DIA_AVAILABLE}")
+        if not DIA_AVAILABLE:
+            logger.info("❌ Dia dependencies not available")
+            return False
+        model_available = self._ensure_model()
+        logger.info(f"🔍 Model availability: {model_available}")
+        result = DIA_AVAILABLE and model_available
+        logger.info(f"🎯 Dia TTS availability result: {result}")
+        return result
     def get_available_voices(self) -> list[str]:
         """Get available voices for Dia."""
         try:
             import torch
             # Extract parameters from request
             text = request.text_content.text
         try:
             import torch
             # Extract parameters from request
             text = request.text_content.text
         try:
             # Create an in-memory buffer
             buffer = io.BytesIO()
             # Write audio data to buffer as WAV
             sf.write(buffer, audio_array, sample_rate, format='WAV')
             # Get bytes from buffer
             buffer.seek(0)
             return buffer.read()
         except Exception as e:
             raise SpeechSynthesisException(f"Failed to convert audio to bytes: {str(e)}") from e

src/infrastructure/tts/dummy_provider.py CHANGED Viewed

@@ -44,14 +44,14 @@ class DummyTTSProvider(TTSProviderBase):
             sample_rate = 24000
             # Rough approximation of speech duration adjusted by speed
             duration = min(len(text) / (20 * speed), 10)
             # Create time array
             t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
             # Generate sine wave (440 Hz base frequency)
             frequency = 440
             audio = 0.5 * np.sin(2 * np.pi * frequency * t)
             # Add some variation based on voice setting
             voice = request.voice_settings.voice_id
             if voice == 'male':
@@ -66,7 +66,7 @@ class DummyTTSProvider(TTSProviderBase):
             # Convert to bytes
             audio_bytes = self._numpy_to_bytes(audio, sample_rate)
             logger.info(f"Generated dummy audio: duration={duration:.2f}s, voice={voice}")
             return audio_bytes, sample_rate
@@ -84,24 +84,24 @@ class DummyTTSProvider(TTSProviderBase):
             sample_rate = 24000
             chunk_duration = 1.0  # 1 second chunks
             total_duration = min(len(text) / (20 * speed), 10)
             chunks_count = int(np.ceil(total_duration / chunk_duration))
             for chunk_idx in range(chunks_count):
                 start_time = chunk_idx * chunk_duration
                 end_time = min((chunk_idx + 1) * chunk_duration, total_duration)
                 actual_duration = end_time - start_time
                 if actual_duration <= 0:
                     break
                 # Create time array for this chunk
                 t = np.linspace(0, actual_duration, int(sample_rate * actual_duration), endpoint=False)
                 # Generate sine wave
                 frequency = 440
                 audio = 0.5 * np.sin(2 * np.pi * frequency * t)
                 # Apply voice variations
                 voice = request.voice_settings.voice_id
                 if voice == 'male':
@@ -113,10 +113,10 @@ class DummyTTSProvider(TTSProviderBase):
                 # Convert to bytes
                 audio_bytes = self._numpy_to_bytes(audio, sample_rate)
                 # Check if this is the final chunk
                 is_final = (chunk_idx == chunks_count - 1)
                 yield audio_bytes, sample_rate, is_final
         except Exception as e:
@@ -127,13 +127,13 @@ class DummyTTSProvider(TTSProviderBase):
         try:
             # Create an in-memory buffer
             buffer = io.BytesIO()
             # Write audio data to buffer as WAV
             sf.write(buffer, audio_array, sample_rate, format='WAV')
             # Get bytes from buffer
             buffer.seek(0)
             return buffer.read()
         except Exception as e:
             raise SpeechSynthesisException(f"Failed to convert audio to bytes: {str(e)}") from e

             sample_rate = 24000
             # Rough approximation of speech duration adjusted by speed
             duration = min(len(text) / (20 * speed), 10)
             # Create time array
             t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
             # Generate sine wave (440 Hz base frequency)
             frequency = 440
             audio = 0.5 * np.sin(2 * np.pi * frequency * t)
             # Add some variation based on voice setting
             voice = request.voice_settings.voice_id
             if voice == 'male':
             # Convert to bytes
             audio_bytes = self._numpy_to_bytes(audio, sample_rate)
             logger.info(f"Generated dummy audio: duration={duration:.2f}s, voice={voice}")
             return audio_bytes, sample_rate
             sample_rate = 24000
             chunk_duration = 1.0  # 1 second chunks
             total_duration = min(len(text) / (20 * speed), 10)
             chunks_count = int(np.ceil(total_duration / chunk_duration))
             for chunk_idx in range(chunks_count):
                 start_time = chunk_idx * chunk_duration
                 end_time = min((chunk_idx + 1) * chunk_duration, total_duration)
                 actual_duration = end_time - start_time
                 if actual_duration <= 0:
                     break
                 # Create time array for this chunk
                 t = np.linspace(0, actual_duration, int(sample_rate * actual_duration), endpoint=False)
                 # Generate sine wave
                 frequency = 440
                 audio = 0.5 * np.sin(2 * np.pi * frequency * t)
                 # Apply voice variations
                 voice = request.voice_settings.voice_id
                 if voice == 'male':
                 # Convert to bytes
                 audio_bytes = self._numpy_to_bytes(audio, sample_rate)
                 # Check if this is the final chunk
                 is_final = (chunk_idx == chunks_count - 1)
                 yield audio_bytes, sample_rate, is_final
         except Exception as e:
         try:
             # Create an in-memory buffer
             buffer = io.BytesIO()
             # Write audio data to buffer as WAV
             sf.write(buffer, audio_array, sample_rate, format='WAV')
             # Get bytes from buffer
             buffer.seek(0)
             return buffer.read()
         except Exception as e:
             raise SpeechSynthesisException(f"Failed to convert audio to bytes: {str(e)}") from e

src/infrastructure/tts/kokoro_provider.py CHANGED Viewed

@@ -77,7 +77,7 @@ class KokoroTTSProvider(TTSProviderBase):
             # Generate speech using Kokoro
             generator = self.pipeline(text, voice=voice, speed=speed)
             for _, _, audio in generator:
                 # Convert numpy array to bytes
                 audio_bytes = self._numpy_to_bytes(audio, sample_rate=24000)
@@ -101,7 +101,7 @@ class KokoroTTSProvider(TTSProviderBase):
             # Generate speech stream using Kokoro
             generator = self.pipeline(text, voice=voice, speed=speed)
             chunk_count = 0
             for _, _, audio in generator:
                 chunk_count += 1
@@ -119,13 +119,13 @@ class KokoroTTSProvider(TTSProviderBase):
         try:
             # Create an in-memory buffer
             buffer = io.BytesIO()
             # Write audio data to buffer as WAV
             sf.write(buffer, audio_array, sample_rate, format='WAV')
             # Get bytes from buffer
             buffer.seek(0)
             return buffer.read()
         except Exception as e:
             raise SpeechSynthesisException(f"Failed to convert audio to bytes: {str(e)}") from e

             # Generate speech using Kokoro
             generator = self.pipeline(text, voice=voice, speed=speed)
             for _, _, audio in generator:
                 # Convert numpy array to bytes
                 audio_bytes = self._numpy_to_bytes(audio, sample_rate=24000)
             # Generate speech stream using Kokoro
             generator = self.pipeline(text, voice=voice, speed=speed)
             chunk_count = 0
             for _, _, audio in generator:
                 chunk_count += 1
         try:
             # Create an in-memory buffer
             buffer = io.BytesIO()
             # Write audio data to buffer as WAV
             sf.write(buffer, audio_array, sample_rate, format='WAV')
             # Get bytes from buffer
             buffer.seek(0)
             return buffer.read()
         except Exception as e:
             raise SpeechSynthesisException(f"Failed to convert audio to bytes: {str(e)}") from e

src/infrastructure/tts/provider_factory.py CHANGED Viewed

@@ -31,7 +31,7 @@ class TTSProviderFactory:
             self._providers['kokoro'] = KokoroTTSProvider
             logger.info("Registered Kokoro TTS provider")
         except ImportError as e:
-            logger.debug(f"Kokoro TTS provider not available: {e}")
         # Try to register Dia provider
         try:
@@ -56,18 +56,23 @@ class TTSProviderFactory:
             self._providers['cosyvoice2'] = CosyVoice2TTSProvider
             logger.info("Registered CosyVoice2 TTS provider")
         except ImportError as e:
-            logger.debug(f"CosyVoice2 TTS provider not available: {e}")
     def get_available_providers(self) -> List[str]:
         """Get list of available TTS providers."""
         available = []
         for name, provider_class in self._providers.items():
             try:
                 # Create instance if not cached
                 if name not in self._provider_instances:
                     if name == 'kokoro':
                         self._provider_instances[name] = provider_class()
                     elif name == 'dia':
                         self._provider_instances[name] = provider_class()
                     elif name == 'cosyvoice2':
                         self._provider_instances[name] = provider_class()
@@ -75,12 +80,18 @@ class TTSProviderFactory:
                         self._provider_instances[name] = provider_class()
                 # Check if provider is available
-                if self._provider_instances[name].is_available():
                     available.append(name)
             except Exception as e:
-                logger.warning(f"Failed to check availability of {name} provider: {e}")
         return available
     def create_provider(self, provider_name: str, **kwargs) -> TTSProviderBase:
@@ -147,16 +158,23 @@ class TTSProviderFactory:
         if preferred_providers is None:
             preferred_providers = ['kokoro', 'dia', 'cosyvoice2', 'dummy']
         available_providers = self.get_available_providers()
         # Try preferred providers in order
         for provider_name in preferred_providers:
             if provider_name in available_providers:
                 try:
-                    return self.create_provider(provider_name, **kwargs)
                 except Exception as e:
-                    logger.warning(f"Failed to create preferred provider {provider_name}: {e}")
                     continue
         # If no preferred providers work, try any available provider
         for provider_name in available_providers:

             self._providers['kokoro'] = KokoroTTSProvider
             logger.info("Registered Kokoro TTS provider")
         except ImportError as e:
+            logger.info(f"Kokoro TTS provider not available: {e}")
         # Try to register Dia provider
         try:
             self._providers['cosyvoice2'] = CosyVoice2TTSProvider
             logger.info("Registered CosyVoice2 TTS provider")
         except ImportError as e:
+            logger.info(f"CosyVoice2 TTS provider not available: {e}")
     def get_available_providers(self) -> List[str]:
         """Get list of available TTS providers."""
+        logger.info("🔍 Checking availability of TTS providers...")
         available = []
         for name, provider_class in self._providers.items():
+            logger.info(f"Checking provider: {name}")
             try:
                 # Create instance if not cached
                 if name not in self._provider_instances:
+                    logger.info(f"Creating instance for {name} provider")
                     if name == 'kokoro':
                         self._provider_instances[name] = provider_class()
                     elif name == 'dia':
+                        logger.info(f"🔧 Creating Dia TTS provider instance...")
                         self._provider_instances[name] = provider_class()
                     elif name == 'cosyvoice2':
                         self._provider_instances[name] = provider_class()
                         self._provider_instances[name] = provider_class()
                 # Check if provider is available
+                logger.info(f"Checking availability for {name}")
+                is_available = self._provider_instances[name].is_available()
+                logger.info(f"Provider {name} availability: {'✅ Available' if is_available else '❌ Not Available'}")
+                if is_available:
                     available.append(name)
             except Exception as e:
+                logger.warning(f"❌ Failed to check availability of {name} provider: {e}")
+                logger.info(f"Provider check error details: {type(e).__name__}: {e}")
+        logger.info(f"📋 Available TTS providers: {available}")
         return available
     def create_provider(self, provider_name: str, **kwargs) -> TTSProviderBase:
         if preferred_providers is None:
             preferred_providers = ['kokoro', 'dia', 'cosyvoice2', 'dummy']
+        logger.info(f"🔄 Getting TTS provider with fallback, preferred order: {preferred_providers}")
         available_providers = self.get_available_providers()
         # Try preferred providers in order
         for provider_name in preferred_providers:
+            logger.info(f"🔍 Trying preferred provider: {provider_name}")
             if provider_name in available_providers:
+                logger.info(f"✅ Provider {provider_name} is available, attempting to create...")
                 try:
+                    provider = self.create_provider(provider_name, **kwargs)
+                    logger.info(f"🎉 Successfully created provider: {provider_name}")
+                    return provider
                 except Exception as e:
+                    logger.warning(f"❌ Failed to create preferred provider {provider_name}: {e}")
                     continue
+            else:
+                logger.info(f"❌ Provider {provider_name} is not in available providers list")
         # If no preferred providers work, try any available provider
         for provider_name in available_providers:

src/infrastructure/utils/dependency_installer.py ADDED Viewed

	@@ -0,0 +1,304 @@

+"""Automatic dependency installer for TTS providers."""
+import logging
+import subprocess
+import sys
+import importlib
+from typing import List, Dict, Optional, Tuple
+import os
+logger = logging.getLogger(__name__)
+class DependencyInstaller:
+    """Utility class for automatically installing missing dependencies."""
+    def __init__(self):
+        """Initialize the dependency installer."""
+        self.installed_packages = set()
+    def check_module_available(self, module_name: str) -> bool:
+        """
+        Check if a module is available for import.
+        Args:
+            module_name: Name of the module to check
+        Returns:
+            bool: True if module is available, False otherwise
+        """
+        try:
+            importlib.import_module(module_name)
+            return True
+        except ImportError:
+            return False
+    def install_package(self, package_name: str, upgrade: bool = False) -> bool:
+        """
+        Install a package using pip.
+        Args:
+            package_name: Name of the package to install
+            upgrade: Whether to upgrade if already installed
+        Returns:
+            bool: True if installation succeeded, False otherwise
+        """
+        if package_name in self.installed_packages:
+            logger.info(f"Package {package_name} already installed in this session")
+            return True
+        try:
+            cmd = [sys.executable, "-m", "pip", "install"]
+            if upgrade:
+                cmd.append("--upgrade")
+            cmd.append(package_name)
+            logger.info(f"Installing package: {package_name}")
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=300  # 5 minute timeout
+            )
+            if result.returncode == 0:
+                logger.info(f"Successfully installed {package_name}")
+                self.installed_packages.add(package_name)
+                return True
+            else:
+                logger.error(f"Failed to install {package_name}: {result.stderr}")
+                return False
+        except subprocess.TimeoutExpired:
+            logger.error(f"Installation of {package_name} timed out")
+            return False
+        except Exception as e:
+            logger.error(f"Error installing {package_name}: {e}")
+            return False
+    def install_from_git(self, git_url: str, package_name: Optional[str] = None) -> bool:
+        """
+        Install a package from a git repository.
+        Args:
+            git_url: Git repository URL
+            package_name: Optional package name for tracking
+        Returns:
+            bool: True if installation succeeded, False otherwise
+        """
+        package_name = package_name or git_url.split('/')[-1].replace('.git', '')
+        if package_name in self.installed_packages:
+            logger.info(f"Package {package_name} already installed in this session")
+            return True
+        try:
+            cmd = [sys.executable, "-m", "pip", "install", f"git+{git_url}"]
+            logger.info(f"Installing package from git: {git_url}")
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=600  # 10 minute timeout for git installs
+            )
+            if result.returncode == 0:
+                logger.info(f"Successfully installed {package_name} from git")
+                self.installed_packages.add(package_name)
+                return True
+            else:
+                logger.error(f"Failed to install {package_name} from git: {result.stderr}")
+                return False
+        except subprocess.TimeoutExpired:
+            logger.error(f"Git installation of {package_name} timed out")
+            return False
+        except Exception as e:
+            logger.error(f"Error installing {package_name} from git: {e}")
+            return False
+    def install_dia_dependencies(self) -> Tuple[bool, List[str]]:
+        """
+        Install all dependencies required for Dia TTS.
+        Returns:
+            Tuple[bool, List[str]]: (success, list of error messages)
+        """
+        errors = []
+        # Check if Dia is already available
+        if self.check_module_available("dia"):
+            logger.info("Dia TTS is already available")
+            return True, []
+        # Install Dia TTS from git - this will automatically install all dependencies
+        # including descript-audio-codec as specified in pyproject.toml
+        logger.info("Installing Dia TTS and all dependencies from GitHub")
+        if self.install_from_git("https://github.com/nari-labs/dia.git", "dia"):
+            logger.info("Successfully installed Dia TTS and dependencies")
+            return True, []
+        else:
+            errors.append("Failed to install Dia TTS from git")
+            # Fallback: try installing individual dependencies if git install fails
+            logger.info("Git install failed, trying individual dependencies...")
+            dependencies = [
+                ("torch", "torch"),
+                ("transformers", "transformers"),
+                ("accelerate", "accelerate"),
+                ("soundfile", "soundfile"),
+                ("dac", "descript-audio-codec"),
+            ]
+            success = True
+            for module_name, package_name in dependencies:
+                if not self.check_module_available(module_name):
+                    logger.info(f"Installing missing dependency: {package_name}")
+                    if not self.install_package(package_name):
+                        errors.append(f"Failed to install {package_name}")
+                        success = False
+            # Try installing Dia again after dependencies
+            if success and not self.check_module_available("dia"):
+                if self.install_from_git("https://github.com/nari-labs/dia.git", "dia"):
+                    return True, []
+                else:
+                    errors.append("Failed to install Dia TTS after installing dependencies")
+            return success and len(errors) == 1, errors  # Only the initial git error if dependencies succeeded
+    def install_dependencies_for_provider(self, provider_name: str) -> Tuple[bool, List[str]]:
+        """
+        Install dependencies for a specific TTS provider.
+        Args:
+            provider_name: Name of the TTS provider
+        Returns:
+            Tuple[bool, List[str]]: (success, list of error messages)
+        """
+        if provider_name.lower() == "dia":
+            return self.install_dia_dependencies()
+        else:
+            return False, [f"Unknown provider: {provider_name}"]
+    def verify_installation(self, module_name: str) -> bool:
+        """
+        Verify that a module was installed correctly.
+        Args:
+            module_name: Name of the module to verify
+        Returns:
+            bool: True if module can be imported, False otherwise
+        """
+        try:
+            # Clear import cache to ensure fresh import
+            if module_name in sys.modules:
+                del sys.modules[module_name]
+            importlib.import_module(module_name)
+            logger.info(f"Successfully verified installation of {module_name}")
+            return True
+        except ImportError as e:
+            logger.error(f"Failed to verify installation of {module_name}: {e}")
+            return False
+    def get_installation_status(self) -> Dict[str, bool]:
+        """
+        Get the installation status of key dependencies.
+        Returns:
+            Dict[str, bool]: Dictionary mapping module names to availability status
+        """
+        modules_to_check = [
+            "torch",
+            "transformers",
+            "accelerate",
+            "soundfile",
+            "numpy",
+            "dac",
+            "dia"
+        ]
+        status = {}
+        for module in modules_to_check:
+            status[module] = self.check_module_available(module)
+        return status
+    def install_with_retry(self, package_name: str, max_retries: int = 3) -> bool:
+        """
+        Install a package with retry logic.
+        Args:
+            package_name: Name of the package to install
+            max_retries: Maximum number of retry attempts
+        Returns:
+            bool: True if installation succeeded, False otherwise
+        """
+        for attempt in range(max_retries):
+            if self.install_package(package_name):
+                return True
+            if attempt < max_retries - 1:
+                logger.warning(f"Installation attempt {attempt + 1} failed for {package_name}, retrying...")
+            else:
+                logger.error(f"All {max_retries} installation attempts failed for {package_name}")
+        return False
+# Global instance for reuse
+_dependency_installer = None
+def get_dependency_installer() -> DependencyInstaller:
+    """
+    Get a global dependency installer instance.
+    Returns:
+        DependencyInstaller: Global dependency installer instance
+    """
+    global _dependency_installer
+    if _dependency_installer is None:
+        _dependency_installer = DependencyInstaller()
+    return _dependency_installer
+def install_dia_dependencies() -> Tuple[bool, List[str]]:
+    """
+    Convenience function to install Dia TTS dependencies.
+    Returns:
+        Tuple[bool, List[str]]: (success, list of error messages)
+    """
+    installer = get_dependency_installer()
+    return installer.install_dia_dependencies()
+def check_and_install_module(module_name: str, package_name: Optional[str] = None) -> bool:
+    """
+    Check if a module is available and install it if not.
+    Args:
+        module_name: Name of the module to check
+        package_name: Name of the package to install (defaults to module_name)
+    Returns:
+        bool: True if module is available after check/install, False otherwise
+    """
+    installer = get_dependency_installer()
+    if installer.check_module_available(module_name):
+        return True
+    package_name = package_name or module_name
+    if installer.install_package(package_name):
+        return installer.verify_installation(module_name)
+    return False

tests/unit/application/error_handling/test_structured_logger.py CHANGED Viewed

@@ -60,7 +60,7 @@ class TestStructuredLogger:
         context = LogContext(correlation_id="test-123", operation="test_op")
         with patch.object(self.logger.logger, 'debug') as mock_debug:
-            self.logger.debug("Test debug message", context=context)
             mock_debug.assert_called_once()
             args, kwargs = mock_debug.call_args

         context = LogContext(correlation_id="test-123", operation="test_op")
         with patch.object(self.logger.logger, 'debug') as mock_debug:
+            self.logger.info("Test debug message", context=context)
             mock_debug.assert_called_once()
             args, kwargs = mock_debug.call_args

utils/stt.py CHANGED Viewed

@@ -16,17 +16,17 @@ from pydub import AudioSegment
 class ASRModel(ABC):
     """Base class for ASR models"""
     @abstractmethod
     def load_model(self):
         """Load the ASR model"""
         pass
     @abstractmethod
     def transcribe(self, audio_path):
         """Transcribe audio to text"""
         pass
     def preprocess_audio(self, audio_path):
         """Convert audio to required format"""
         logger.info("Converting audio format")
@@ -42,7 +42,7 @@ class ASRModel(ABC):
 class WhisperModel(ASRModel):
     """Faster Whisper ASR model implementation"""
     def __init__(self):
         self.model = None
         # Check for CUDA availability without torch dependency
@@ -53,13 +53,13 @@ class WhisperModel(ASRModel):
             # Fallback to CPU if torch is not available
             self.device = "cpu"
         self.compute_type = "float16" if self.device == "cuda" else "int8"
     def load_model(self):
         """Load Faster Whisper model"""
         logger.info("Loading Faster Whisper model")
         logger.info(f"Using device: {self.device}")
         logger.info(f"Using compute type: {self.compute_type}")
         # Use large-v3 model with appropriate compute type based on device
         self.model = FasterWhisperModel(
             "large-v3",
@@ -67,14 +67,14 @@ class WhisperModel(ASRModel):
             compute_type=self.compute_type
         )
         logger.info("Faster Whisper model loaded successfully")
     def transcribe(self, audio_path):
         """Transcribe audio using Faster Whisper"""
         if self.model is None:
             self.load_model()
         wav_path = self.preprocess_audio(audio_path)
         # Transcription with Faster Whisper
         logger.info("Generating transcription with Faster Whisper")
         segments, info = self.model.transcribe(
@@ -83,15 +83,15 @@ class WhisperModel(ASRModel):
             language="en",
             task="transcribe"
         )
         logger.info(f"Detected language '{info.language}' with probability {info.language_probability}")
         # Collect all segments into a single text
         result_text = ""
         for segment in segments:
             result_text += segment.text + " "
-            logger.debug(f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}")
         result = result_text.strip()
         logger.info(f"Transcription completed successfully")
         return result
@@ -99,10 +99,10 @@ class WhisperModel(ASRModel):
 class ParakeetModel(ASRModel):
     """Parakeet ASR model implementation"""
     def __init__(self):
         self.model = None
     def load_model(self):
         """Load Parakeet model"""
         try:
@@ -113,14 +113,14 @@ class ParakeetModel(ASRModel):
         except ImportError:
             logger.error("Failed to import nemo_toolkit. Please install with: pip install -U 'nemo_toolkit[asr]'")
             raise
     def transcribe(self, audio_path):
         """Transcribe audio using Parakeet"""
         if self.model is None:
             self.load_model()
         wav_path = self.preprocess_audio(audio_path)
         # Transcription
         logger.info("Generating transcription with Parakeet")
         output = self.model.transcribe([wav_path])
@@ -131,7 +131,7 @@ class ParakeetModel(ASRModel):
 class ASRFactory:
     """Factory for creating ASR model instances"""
     @staticmethod
     def get_model(model_name="parakeet"):
         """
@@ -160,11 +160,11 @@ def transcribe_audio(audio_path, model_name="parakeet"):
         Transcribed English text
     """
     logger.info(f"Starting transcription for: {audio_path} using {model_name} model")
     try:
         # Get the appropriate model
         asr_model = ASRFactory.get_model(model_name)
         # Transcribe audio
         result = asr_model.transcribe(audio_path)
         logger.info(f"transcription: %s" % result)

 class ASRModel(ABC):
     """Base class for ASR models"""
     @abstractmethod
     def load_model(self):
         """Load the ASR model"""
         pass
     @abstractmethod
     def transcribe(self, audio_path):
         """Transcribe audio to text"""
         pass
     def preprocess_audio(self, audio_path):
         """Convert audio to required format"""
         logger.info("Converting audio format")
 class WhisperModel(ASRModel):
     """Faster Whisper ASR model implementation"""
     def __init__(self):
         self.model = None
         # Check for CUDA availability without torch dependency
             # Fallback to CPU if torch is not available
             self.device = "cpu"
         self.compute_type = "float16" if self.device == "cuda" else "int8"
     def load_model(self):
         """Load Faster Whisper model"""
         logger.info("Loading Faster Whisper model")
         logger.info(f"Using device: {self.device}")
         logger.info(f"Using compute type: {self.compute_type}")
         # Use large-v3 model with appropriate compute type based on device
         self.model = FasterWhisperModel(
             "large-v3",
             compute_type=self.compute_type
         )
         logger.info("Faster Whisper model loaded successfully")
     def transcribe(self, audio_path):
         """Transcribe audio using Faster Whisper"""
         if self.model is None:
             self.load_model()
         wav_path = self.preprocess_audio(audio_path)
         # Transcription with Faster Whisper
         logger.info("Generating transcription with Faster Whisper")
         segments, info = self.model.transcribe(
             language="en",
             task="transcribe"
         )
         logger.info(f"Detected language '{info.language}' with probability {info.language_probability}")
         # Collect all segments into a single text
         result_text = ""
         for segment in segments:
             result_text += segment.text + " "
+            logger.info(f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}")
         result = result_text.strip()
         logger.info(f"Transcription completed successfully")
         return result
 class ParakeetModel(ASRModel):
     """Parakeet ASR model implementation"""
     def __init__(self):
         self.model = None
     def load_model(self):
         """Load Parakeet model"""
         try:
         except ImportError:
             logger.error("Failed to import nemo_toolkit. Please install with: pip install -U 'nemo_toolkit[asr]'")
             raise
     def transcribe(self, audio_path):
         """Transcribe audio using Parakeet"""
         if self.model is None:
             self.load_model()
         wav_path = self.preprocess_audio(audio_path)
         # Transcription
         logger.info("Generating transcription with Parakeet")
         output = self.model.transcribe([wav_path])
 class ASRFactory:
     """Factory for creating ASR model instances"""
     @staticmethod
     def get_model(model_name="parakeet"):
         """
         Transcribed English text
     """
     logger.info(f"Starting transcription for: {audio_path} using {model_name} model")
     try:
         # Get the appropriate model
         asr_model = ASRFactory.get_model(model_name)
         # Transcribe audio
         result = asr_model.transcribe(audio_path)
         logger.info(f"transcription: %s" % result)

utils/translation.py CHANGED Viewed

@@ -17,7 +17,7 @@ def translate_text(text):
         Translated Chinese text
     """
     logger.info(f"Starting translation for text length: {len(text)}")
     try:
         # Model initialization with explicit language codes
         logger.info("Loading NLLB model")
@@ -36,7 +36,7 @@ def translate_text(text):
         translated_chunks = []
         for i, chunk in enumerate(text_chunks):
             logger.info(f"Processing chunk {i+1}/{len(text_chunks)}")
             # Tokenize with source language specification
             inputs = tokenizer(
                 chunk,
@@ -44,14 +44,14 @@ def translate_text(text):
                 max_length=1024,
                 truncation=True
             )
             # Generate translation with target language specification
             outputs = model.generate(
                 **inputs,
                 forced_bos_token_id=tokenizer.convert_tokens_to_ids("zho_Hans"),
                 max_new_tokens=1024
             )
             translated = tokenizer.decode(outputs[0], skip_special_tokens=True)
             translated_chunks.append(translated)
             logger.info(f"Chunk {i+1} translated successfully")

         Translated Chinese text
     """
     logger.info(f"Starting translation for text length: {len(text)}")
     try:
         # Model initialization with explicit language codes
         logger.info("Loading NLLB model")
         translated_chunks = []
         for i, chunk in enumerate(text_chunks):
             logger.info(f"Processing chunk {i+1}/{len(text_chunks)}")
             # Tokenize with source language specification
             inputs = tokenizer(
                 chunk,
                 max_length=1024,
                 truncation=True
             )
             # Generate translation with target language specification
             outputs = model.generate(
                 **inputs,
                 forced_bos_token_id=tokenizer.convert_tokens_to_ids("zho_Hans"),
                 max_new_tokens=1024
             )
             translated = tokenizer.decode(outputs[0], skip_special_tokens=True)
             translated_chunks.append(translated)
             logger.info(f"Chunk {i+1} translated successfully")

utils/tts.py CHANGED Viewed

@@ -17,42 +17,42 @@ logger = logging.getLogger(__name__)
 def get_available_engines() -> List[str]:
     """Get a list of available TTS engines
     Returns:
         List[str]: List of available engine names
     """
     available = []
     if KOKORO_AVAILABLE:
         available.append('kokoro')
     if DIA_AVAILABLE:
         available.append('dia')
     if COSYVOICE2_AVAILABLE:
         available.append('cosyvoice2')
     # Dummy is always available
     available.append('dummy')
     return available
 def get_tts_engine(engine_type: Optional[str] = None, lang_code: str = 'z') -> TTSBase:
     """Get a TTS engine instance
     Args:
         engine_type (str, optional): Type of engine to create ('kokoro', 'dia', 'cosyvoice2', 'dummy')
                                     If None, the best available engine will be used
         lang_code (str): Language code for the engine
     Returns:
         TTSBase: An instance of a TTS engine
     """
     # Get available engines
     available_engines = get_available_engines()
     logger.info(f"Available TTS engines: {available_engines}")
     # If engine_type is specified, try to create that specific engine
     if engine_type is not None:
         if engine_type == 'kokoro' and KOKORO_AVAILABLE:
@@ -69,7 +69,7 @@ def get_tts_engine(engine_type: Optional[str] = None, lang_code: str = 'z') -> T
             return DummyTTS(lang_code)
         else:
             logger.warning(f"Requested engine '{engine_type}' is not available")
     # If no specific engine is requested or the requested engine is not available,
     # use the best available engine based on priority
     priority_order = ['cosyvoice2', 'kokoro', 'dia', 'dummy']
@@ -84,23 +84,23 @@ def get_tts_engine(engine_type: Optional[str] = None, lang_code: str = 'z') -> T
                 return CosyVoice2TTS(lang_code)
             elif engine == 'dummy':
                 return DummyTTS(lang_code)
     # Fallback to dummy engine if no engines are available
     logger.warning("No TTS engines available, falling back to dummy engine")
     return DummyTTS(lang_code)
-def generate_speech(text: str, engine_type: Optional[str] = None, lang_code: str = 'z',
                    voice: str = 'default', speed: float = 1.0) -> Optional[str]:
     """Generate speech using the specified or best available TTS engine
     Args:
         text (str): Input text to synthesize
         engine_type (str, optional): Type of engine to use
         lang_code (str): Language code
         voice (str): Voice ID to use
         speed (float): Speech speed multiplier
     Returns:
         Optional[str]: Path to the generated audio file or None if generation fails
     """
@@ -111,14 +111,14 @@ def generate_speech(text: str, engine_type: Optional[str] = None, lang_code: str
 def generate_speech_stream(text: str, engine_type: Optional[str] = None, lang_code: str = 'z',
                           voice: str = 'default', speed: float = 1.0) -> Generator[Tuple[int, np.ndarray], None, None]:
     """Generate speech stream using the specified or best available TTS engine
     Args:
         text (str): Input text to synthesize
         engine_type (str, optional): Type of engine to use
         lang_code (str): Language code
         voice (str): Voice ID to use
         speed (float): Speech speed multiplier
     Yields:
         tuple: (sample_rate, audio_data) pairs for each segment
     """

 def get_available_engines() -> List[str]:
     """Get a list of available TTS engines
     Returns:
         List[str]: List of available engine names
     """
     available = []
     if KOKORO_AVAILABLE:
         available.append('kokoro')
     if DIA_AVAILABLE:
         available.append('dia')
     if COSYVOICE2_AVAILABLE:
         available.append('cosyvoice2')
     # Dummy is always available
     available.append('dummy')
     return available
 def get_tts_engine(engine_type: Optional[str] = None, lang_code: str = 'z') -> TTSBase:
     """Get a TTS engine instance
     Args:
         engine_type (str, optional): Type of engine to create ('kokoro', 'dia', 'cosyvoice2', 'dummy')
                                     If None, the best available engine will be used
         lang_code (str): Language code for the engine
     Returns:
         TTSBase: An instance of a TTS engine
     """
     # Get available engines
     available_engines = get_available_engines()
     logger.info(f"Available TTS engines: {available_engines}")
     # If engine_type is specified, try to create that specific engine
     if engine_type is not None:
         if engine_type == 'kokoro' and KOKORO_AVAILABLE:
             return DummyTTS(lang_code)
         else:
             logger.warning(f"Requested engine '{engine_type}' is not available")
     # If no specific engine is requested or the requested engine is not available,
     # use the best available engine based on priority
     priority_order = ['cosyvoice2', 'kokoro', 'dia', 'dummy']
                 return CosyVoice2TTS(lang_code)
             elif engine == 'dummy':
                 return DummyTTS(lang_code)
     # Fallback to dummy engine if no engines are available
     logger.warning("No TTS engines available, falling back to dummy engine")
     return DummyTTS(lang_code)
+def generate_speech(text: str, engine_type: Optional[str] = None, lang_code: str = 'z',
                    voice: str = 'default', speed: float = 1.0) -> Optional[str]:
     """Generate speech using the specified or best available TTS engine
     Args:
         text (str): Input text to synthesize
         engine_type (str, optional): Type of engine to use
         lang_code (str): Language code
         voice (str): Voice ID to use
         speed (float): Speech speed multiplier
     Returns:
         Optional[str]: Path to the generated audio file or None if generation fails
     """
 def generate_speech_stream(text: str, engine_type: Optional[str] = None, lang_code: str = 'z',
                           voice: str = 'default', speed: float = 1.0) -> Generator[Tuple[int, np.ndarray], None, None]:
     """Generate speech stream using the specified or best available TTS engine
     Args:
         text (str): Input text to synthesize
         engine_type (str, optional): Type of engine to use
         lang_code (str): Language code
         voice (str): Voice ID to use
         speed (float): Speech speed multiplier
     Yields:
         tuple: (sample_rate, audio_data) pairs for each segment
     """

utils/tts_dia.py CHANGED Viewed

@@ -30,18 +30,18 @@ except ModuleNotFoundError as e:
 def _get_model():
     """Lazy-load the Dia model
     Returns:
         Dia or None: The Dia model or None if not available
     """
     if not DIA_AVAILABLE:
         logger.warning("Dia TTS engine is not available")
         return None
     try:
         import torch
         from dia.model import Dia
         # Initialize the model
         model = Dia.from_pretrained()
         logger.info("Dia model successfully loaded")
@@ -59,59 +59,59 @@ def _get_model():
 class DiaTTS(TTSBase):
     """Dia TTS engine implementation
     This engine uses the Dia model for TTS generation.
     """
     def __init__(self, lang_code: str = 'z'):
         """Initialize the Dia TTS engine
         Args:
             lang_code (str): Language code for the engine
         """
         super().__init__(lang_code)
         self.model = None
     def _ensure_model(self):
         """Ensure the model is loaded
         Returns:
             bool: True if model is available, False otherwise
         """
         if self.model is None:
             self.model = _get_model()
         return self.model is not None
     def generate_speech(self, text: str, voice: str = 'default', speed: float = 1.0) -> Optional[str]:
         """Generate speech using Dia TTS engine
         Args:
             text (str): Input text to synthesize
             voice (str): Voice ID (not used in Dia)
             speed (float): Speech speed multiplier (not used in Dia)
         Returns:
             Optional[str]: Path to the generated audio file or None if generation fails
         """
         logger.info(f"Generating speech with Dia for text length: {len(text)}")
         # Check if Dia is available
         if not DIA_AVAILABLE:
             logger.error("Dia TTS engine is not available")
             return None
         # Ensure model is loaded
         if not self._ensure_model():
             logger.error("Failed to load Dia model")
             return None
         try:
             import torch
             # Generate unique output path
             output_path = self._generate_output_path(prefix="dia")
             # Generate audio
             with torch.inference_mode():
                 output_audio_np = self.model.generate(
@@ -124,7 +124,7 @@ class DiaTTS(TTSBase):
                     use_torch_compile=False,
                     verbose=False
                 )
             if output_audio_np is not None:
                 logger.info(f"Successfully generated audio with Dia (length: {len(output_audio_np)})")
                 sf.write(output_path, output_audio_np, DEFAULT_SAMPLE_RATE)
@@ -133,7 +133,7 @@ class DiaTTS(TTSBase):
             else:
                 logger.error("Dia model returned None for audio output")
                 return None
         except ModuleNotFoundError as e:
             if "dac" in str(e):
                 logger.error("Dia TTS engine failed due to missing 'dac' module")
@@ -143,33 +143,33 @@ class DiaTTS(TTSBase):
         except Exception as e:
             logger.error(f"Error generating speech with Dia: {str(e)}", exc_info=True)
             return None
     def generate_speech_stream(self, text: str, voice: str = 'default', speed: float = 1.0) -> Generator[Tuple[int, np.ndarray], None, None]:
         """Generate speech stream using Dia TTS engine
         Args:
             text (str): Input text to synthesize
             voice (str): Voice ID (not used in Dia)
             speed (float): Speech speed multiplier (not used in Dia)
         Yields:
             tuple: (sample_rate, audio_data) pairs for each segment
         """
         logger.info(f"Generating speech stream with Dia for text length: {len(text)}")
         # Check if Dia is available
         if not DIA_AVAILABLE:
             logger.error("Dia TTS engine is not available")
             return
         # Ensure model is loaded
         if not self._ensure_model():
             logger.error("Failed to load Dia model")
             return
         try:
             import torch
             # Generate audio
             with torch.inference_mode():
                 output_audio_np = self.model.generate(
@@ -182,14 +182,14 @@ class DiaTTS(TTSBase):
                     use_torch_compile=False,
                     verbose=False
                 )
             if output_audio_np is not None:
                 logger.info(f"Successfully generated audio with Dia (length: {len(output_audio_np)})")
                 yield DEFAULT_SAMPLE_RATE, output_audio_np
             else:
                 logger.error("Dia model returned None for audio output")
                 return
         except ModuleNotFoundError as e:
             if "dac" in str(e):
                 logger.error("Dia TTS engine failed due to missing 'dac' module")

 def _get_model():
     """Lazy-load the Dia model
     Returns:
         Dia or None: The Dia model or None if not available
     """
     if not DIA_AVAILABLE:
         logger.warning("Dia TTS engine is not available")
         return None
     try:
         import torch
         from dia.model import Dia
         # Initialize the model
         model = Dia.from_pretrained()
         logger.info("Dia model successfully loaded")
 class DiaTTS(TTSBase):
     """Dia TTS engine implementation
     This engine uses the Dia model for TTS generation.
     """
     def __init__(self, lang_code: str = 'z'):
         """Initialize the Dia TTS engine
         Args:
             lang_code (str): Language code for the engine
         """
         super().__init__(lang_code)
         self.model = None
     def _ensure_model(self):
         """Ensure the model is loaded
         Returns:
             bool: True if model is available, False otherwise
         """
         if self.model is None:
             self.model = _get_model()
         return self.model is not None
     def generate_speech(self, text: str, voice: str = 'default', speed: float = 1.0) -> Optional[str]:
         """Generate speech using Dia TTS engine
         Args:
             text (str): Input text to synthesize
             voice (str): Voice ID (not used in Dia)
             speed (float): Speech speed multiplier (not used in Dia)
         Returns:
             Optional[str]: Path to the generated audio file or None if generation fails
         """
         logger.info(f"Generating speech with Dia for text length: {len(text)}")
         # Check if Dia is available
         if not DIA_AVAILABLE:
             logger.error("Dia TTS engine is not available")
             return None
         # Ensure model is loaded
         if not self._ensure_model():
             logger.error("Failed to load Dia model")
             return None
         try:
             import torch
             # Generate unique output path
             output_path = self._generate_output_path(prefix="dia")
             # Generate audio
             with torch.inference_mode():
                 output_audio_np = self.model.generate(
                     use_torch_compile=False,
                     verbose=False
                 )
             if output_audio_np is not None:
                 logger.info(f"Successfully generated audio with Dia (length: {len(output_audio_np)})")
                 sf.write(output_path, output_audio_np, DEFAULT_SAMPLE_RATE)
             else:
                 logger.error("Dia model returned None for audio output")
                 return None
         except ModuleNotFoundError as e:
             if "dac" in str(e):
                 logger.error("Dia TTS engine failed due to missing 'dac' module")
         except Exception as e:
             logger.error(f"Error generating speech with Dia: {str(e)}", exc_info=True)
             return None
     def generate_speech_stream(self, text: str, voice: str = 'default', speed: float = 1.0) -> Generator[Tuple[int, np.ndarray], None, None]:
         """Generate speech stream using Dia TTS engine
         Args:
             text (str): Input text to synthesize
             voice (str): Voice ID (not used in Dia)
             speed (float): Speech speed multiplier (not used in Dia)
         Yields:
             tuple: (sample_rate, audio_data) pairs for each segment
         """
         logger.info(f"Generating speech stream with Dia for text length: {len(text)}")
         # Check if Dia is available
         if not DIA_AVAILABLE:
             logger.error("Dia TTS engine is not available")
             return
         # Ensure model is loaded
         if not self._ensure_model():
             logger.error("Failed to load Dia model")
             return
         try:
             import torch
             # Generate audio
             with torch.inference_mode():
                 output_audio_np = self.model.generate(
                     use_torch_compile=False,
                     verbose=False
                 )
             if output_audio_np is not None:
                 logger.info(f"Successfully generated audio with Dia (length: {len(output_audio_np)})")
                 yield DEFAULT_SAMPLE_RATE, output_audio_np
             else:
                 logger.error("Dia model returned None for audio output")
                 return
         except ModuleNotFoundError as e:
             if "dac" in str(e):
                 logger.error("Dia TTS engine failed due to missing 'dac' module")

utils/tts_dummy.py CHANGED Viewed

@@ -12,54 +12,54 @@ logger = logging.getLogger(__name__)
 class DummyTTS(TTSBase):
     """Dummy TTS engine that generates sine wave audio
     This class is used as a fallback when no other TTS engine is available.
     """
     def generate_speech(self, text: str, voice: str = 'default', speed: float = 1.0) -> str:
         """Generate a dummy sine wave audio file
         Args:
             text (str): Input text (not used)
             voice (str): Voice ID (not used)
             speed (float): Speech speed multiplier (not used)
         Returns:
             str: Path to the generated audio file
         """
         logger.info(f"Generating dummy speech for text length: {len(text)}")
         # Generate a simple sine wave
         sample_rate = 24000
         duration = min(len(text) / 20, 10)  # Rough approximation of speech duration
         t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
         audio = 0.5 * np.sin(2 * np.pi * 440 * t)  # 440 Hz sine wave
         # Save to file
         output_path = self._generate_output_path(prefix="dummy")
         sf.write(output_path, audio, sample_rate)
         logger.info(f"Generated dummy audio: {output_path}")
         return output_path
     def generate_speech_stream(self, text: str, voice: str = 'default', speed: float = 1.0) -> Generator[Tuple[int, np.ndarray], None, None]:
         """Generate a dummy sine wave audio stream
         Args:
             text (str): Input text (not used)
             voice (str): Voice ID (not used)
             speed (float): Speech speed multiplier (not used)
         Yields:
             tuple: (sample_rate, audio_data) pairs
         """
         logger.info(f"Generating dummy speech stream for text length: {len(text)}")
         # Generate a simple sine wave
         sample_rate = 24000
         duration = min(len(text) / 20, 10)  # Rough approximation of speech duration
         t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
         audio = 0.5 * np.sin(2 * np.pi * 440 * t)  # 440 Hz sine wave
         # Yield the audio data
         yield sample_rate, audio

 class DummyTTS(TTSBase):
     """Dummy TTS engine that generates sine wave audio
     This class is used as a fallback when no other TTS engine is available.
     """
     def generate_speech(self, text: str, voice: str = 'default', speed: float = 1.0) -> str:
         """Generate a dummy sine wave audio file
         Args:
             text (str): Input text (not used)
             voice (str): Voice ID (not used)
             speed (float): Speech speed multiplier (not used)
         Returns:
             str: Path to the generated audio file
         """
         logger.info(f"Generating dummy speech for text length: {len(text)}")
         # Generate a simple sine wave
         sample_rate = 24000
         duration = min(len(text) / 20, 10)  # Rough approximation of speech duration
         t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
         audio = 0.5 * np.sin(2 * np.pi * 440 * t)  # 440 Hz sine wave
         # Save to file
         output_path = self._generate_output_path(prefix="dummy")
         sf.write(output_path, audio, sample_rate)
         logger.info(f"Generated dummy audio: {output_path}")
         return output_path
     def generate_speech_stream(self, text: str, voice: str = 'default', speed: float = 1.0) -> Generator[Tuple[int, np.ndarray], None, None]:
         """Generate a dummy sine wave audio stream
         Args:
             text (str): Input text (not used)
             voice (str): Voice ID (not used)
             speed (float): Speech speed multiplier (not used)
         Yields:
             tuple: (sample_rate, audio_data) pairs
         """
         logger.info(f"Generating dummy speech stream for text length: {len(text)}")
         # Generate a simple sine wave
         sample_rate = 24000
         duration = min(len(text) / 20, 10)  # Rough approximation of speech duration
         t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
         audio = 0.5 * np.sin(2 * np.pi * 440 * t)  # 440 Hz sine wave
         # Yield the audio data
         yield sample_rate, audio

utils/tts_kokoro.py CHANGED Viewed

@@ -25,17 +25,17 @@ except Exception as e:
 def _get_pipeline(lang_code: str = 'z'):
     """Lazy-load the Kokoro pipeline
     Args:
         lang_code (str): Language code for the pipeline
     Returns:
         KPipeline or None: The Kokoro pipeline or None if not available
     """
     if not KOKORO_AVAILABLE:
         logger.warning("Kokoro TTS engine is not available")
         return None
     try:
         pipeline = KPipeline(lang_code=lang_code)
         logger.info("Kokoro pipeline successfully loaded")
@@ -47,93 +47,93 @@ def _get_pipeline(lang_code: str = 'z'):
 class KokoroTTS(TTSBase):
     """Kokoro TTS engine implementation
     This engine uses the Kokoro library for TTS generation.
     """
     def __init__(self, lang_code: str = 'z'):
         """Initialize the Kokoro TTS engine
         Args:
             lang_code (str): Language code for the engine
         """
         super().__init__(lang_code)
         self.pipeline = None
     def _ensure_pipeline(self):
         """Ensure the pipeline is loaded
         Returns:
             bool: True if pipeline is available, False otherwise
         """
         if self.pipeline is None:
             self.pipeline = _get_pipeline(self.lang_code)
         return self.pipeline is not None
     def generate_speech(self, text: str, voice: str = 'af_heart', speed: float = 1.0) -> Optional[str]:
         """Generate speech using Kokoro TTS engine
         Args:
             text (str): Input text to synthesize
             voice (str): Voice ID to use (e.g., 'af_heart', 'af_bella', etc.)
             speed (float): Speech speed multiplier (0.5 to 2.0)
         Returns:
             Optional[str]: Path to the generated audio file or None if generation fails
         """
         logger.info(f"Generating speech with Kokoro for text length: {len(text)}")
         # Check if Kokoro is available
         if not KOKORO_AVAILABLE:
             logger.error("Kokoro TTS engine is not available")
             return None
         # Ensure pipeline is loaded
         if not self._ensure_pipeline():
             logger.error("Failed to load Kokoro pipeline")
             return None
         try:
             # Generate unique output path
             output_path = self._generate_output_path(prefix="kokoro")
             # Generate speech
             generator = self.pipeline(text, voice=voice, speed=speed)
             for _, _, audio in generator:
                 logger.info(f"Saving Kokoro audio to {output_path}")
                 sf.write(output_path, audio, 24000)
                 break
             logger.info(f"Kokoro audio generation complete: {output_path}")
             return output_path
         except Exception as e:
             logger.error(f"Error generating speech with Kokoro: {str(e)}", exc_info=True)
             return None
     def generate_speech_stream(self, text: str, voice: str = 'af_heart', speed: float = 1.0) -> Generator[Tuple[int, np.ndarray], None, None]:
         """Generate speech stream using Kokoro TTS engine
         Args:
             text (str): Input text to synthesize
             voice (str): Voice ID to use
             speed (float): Speech speed multiplier
         Yields:
             tuple: (sample_rate, audio_data) pairs for each segment
         """
         logger.info(f"Generating speech stream with Kokoro for text length: {len(text)}")
         # Check if Kokoro is available
         if not KOKORO_AVAILABLE:
             logger.error("Kokoro TTS engine is not available")
             return
         # Ensure pipeline is loaded
         if not self._ensure_pipeline():
             logger.error("Failed to load Kokoro pipeline")
             return
         try:
             # Generate speech stream
             generator = self.pipeline(text, voice=voice, speed=speed)

 def _get_pipeline(lang_code: str = 'z'):
     """Lazy-load the Kokoro pipeline
     Args:
         lang_code (str): Language code for the pipeline
     Returns:
         KPipeline or None: The Kokoro pipeline or None if not available
     """
     if not KOKORO_AVAILABLE:
         logger.warning("Kokoro TTS engine is not available")
         return None
     try:
         pipeline = KPipeline(lang_code=lang_code)
         logger.info("Kokoro pipeline successfully loaded")
 class KokoroTTS(TTSBase):
     """Kokoro TTS engine implementation
     This engine uses the Kokoro library for TTS generation.
     """
     def __init__(self, lang_code: str = 'z'):
         """Initialize the Kokoro TTS engine
         Args:
             lang_code (str): Language code for the engine
         """
         super().__init__(lang_code)
         self.pipeline = None
     def _ensure_pipeline(self):
         """Ensure the pipeline is loaded
         Returns:
             bool: True if pipeline is available, False otherwise
         """
         if self.pipeline is None:
             self.pipeline = _get_pipeline(self.lang_code)
         return self.pipeline is not None
     def generate_speech(self, text: str, voice: str = 'af_heart', speed: float = 1.0) -> Optional[str]:
         """Generate speech using Kokoro TTS engine
         Args:
             text (str): Input text to synthesize
             voice (str): Voice ID to use (e.g., 'af_heart', 'af_bella', etc.)
             speed (float): Speech speed multiplier (0.5 to 2.0)
         Returns:
             Optional[str]: Path to the generated audio file or None if generation fails
         """
         logger.info(f"Generating speech with Kokoro for text length: {len(text)}")
         # Check if Kokoro is available
         if not KOKORO_AVAILABLE:
             logger.error("Kokoro TTS engine is not available")
             return None
         # Ensure pipeline is loaded
         if not self._ensure_pipeline():
             logger.error("Failed to load Kokoro pipeline")
             return None
         try:
             # Generate unique output path
             output_path = self._generate_output_path(prefix="kokoro")
             # Generate speech
             generator = self.pipeline(text, voice=voice, speed=speed)
             for _, _, audio in generator:
                 logger.info(f"Saving Kokoro audio to {output_path}")
                 sf.write(output_path, audio, 24000)
                 break
             logger.info(f"Kokoro audio generation complete: {output_path}")
             return output_path
         except Exception as e:
             logger.error(f"Error generating speech with Kokoro: {str(e)}", exc_info=True)
             return None
     def generate_speech_stream(self, text: str, voice: str = 'af_heart', speed: float = 1.0) -> Generator[Tuple[int, np.ndarray], None, None]:
         """Generate speech stream using Kokoro TTS engine
         Args:
             text (str): Input text to synthesize
             voice (str): Voice ID to use
             speed (float): Speech speed multiplier
         Yields:
             tuple: (sample_rate, audio_data) pairs for each segment
         """
         logger.info(f"Generating speech stream with Kokoro for text length: {len(text)}")
         # Check if Kokoro is available
         if not KOKORO_AVAILABLE:
             logger.error("Kokoro TTS engine is not available")
             return
         # Ensure pipeline is loaded
         if not self._ensure_pipeline():
             logger.error("Failed to load Kokoro pipeline")
             return
         try:
             # Generate speech stream
             generator = self.pipeline(text, voice=voice, speed=speed)