import logging from typing import List, Tuple, Generator, Optional import numpy as np from utils.tts_base import TTSEngineBase, DummyTTSEngine from utils.tts_engines import create_engine # Configure logging logger = logging.getLogger(__name__) class CascadingTTSEngine(TTSEngineBase): """Cascading TTS engine implementation This engine tries multiple TTS engines in order until one succeeds. It provides a fallback mechanism to maximize the chances of getting quality speech output. """ def __init__(self, engine_types: List[str], lang_code: str = 'z'): """Initialize the cascading TTS engine Args: engine_types (List[str]): List of engine types to try in order lang_code (str): Language code for the engines """ super().__init__(lang_code) self.engine_types = engine_types self.lang_code = lang_code logger.info(f"Initialized cascading TTS engine with engines: {engine_types}") def generate_speech(self, text: str, voice: str = 'af_heart', speed: float = 1.0) -> str: """Generate speech by trying multiple engines in order Args: text (str): Input text to synthesize voice (str): Voice ID to use speed (float): Speech speed multiplier Returns: str: Path to the generated audio file """ logger.info(f"Generating speech with cascading engine for text length: {len(text)}") # Try each engine in order for engine_type in self.engine_types: try: logger.info(f"Trying TTS engine: {engine_type}") engine = create_engine(engine_type, self.lang_code) # Generate speech with the current engine result = engine.generate_speech(text, voice, speed) # If the engine returned a valid result, return it if result is not None: logger.info(f"Successfully generated speech with {engine_type}") return result logger.warning(f"TTS engine {engine_type} failed to generate speech, trying next engine") except Exception as e: logger.error(f"Error with TTS engine {engine_type}: {str(e)}") logger.error(f"Error type: {type(e).__name__}") logger.warning(f"Trying next TTS engine") # If all engines failed, fall back to dummy engine logger.warning("All TTS engines failed, falling back to dummy engine") return DummyTTSEngine(self.lang_code).generate_speech(text, voice, speed) def generate_speech_stream(self, text: str, voice: str = 'af_heart', speed: float = 1.0) -> Generator[Tuple[int, np.ndarray], None, None]: """Generate speech stream by trying multiple engines in order Args: text (str): Input text to synthesize voice (str): Voice ID to use speed (float): Speech speed multiplier Yields: tuple: (sample_rate, audio_data) pairs for each segment """ logger.info(f"Generating speech stream with cascading engine for text length: {len(text)}") # Try each engine in order for engine_type in self.engine_types: try: logger.info(f"Trying TTS engine for streaming: {engine_type}") engine = create_engine(engine_type, self.lang_code) # Create a generator for the current engine generator = engine.generate_speech_stream(text, voice, speed) # Try to get the first chunk to verify the engine works first_chunk = next(generator, None) if first_chunk is not None: # Engine produced a valid first chunk, yield it and continue with this engine logger.info(f"Successfully started speech stream with {engine_type}") yield first_chunk # Yield the rest of the chunks from this engine for chunk in generator: yield chunk # Successfully streamed all chunks, return return logger.warning(f"TTS engine {engine_type} failed to generate speech stream, trying next engine") except Exception as e: logger.error(f"Error with TTS engine {engine_type} streaming: {str(e)}") logger.error(f"Error type: {type(e).__name__}") logger.warning(f"Trying next TTS engine for streaming") # If all engines failed, fall back to dummy engine logger.warning("All TTS engines failed for streaming, falling back to dummy engine") yield from DummyTTSEngine(self.lang_code).generate_speech_stream(text, voice, speed)