import logging import numpy as np import soundfile as sf from typing import Optional, Generator, Tuple from utils.tts_base import TTSBase # Configure logging logger = logging.getLogger(__name__) # Flag to track Kokoro availability KOKORO_AVAILABLE = False # Try to import Kokoro try: from kokoro import KPipeline KOKORO_AVAILABLE = True logger.info("Kokoro TTS engine is available") except ImportError: logger.warning("Kokoro TTS engine is not available") except Exception as e: logger.error(f"Kokoro import failed with unexpected error: {str(e)}") KOKORO_AVAILABLE = False def _get_pipeline(lang_code: str = 'z'): """Lazy-load the Kokoro pipeline Args: lang_code (str): Language code for the pipeline Returns: KPipeline or None: The Kokoro pipeline or None if not available """ if not KOKORO_AVAILABLE: logger.warning("Kokoro TTS engine is not available") return None try: pipeline = KPipeline(lang_code=lang_code) logger.info("Kokoro pipeline successfully loaded") return pipeline except Exception as e: logger.error(f"Failed to initialize Kokoro pipeline: {str(e)}") return None class KokoroTTS(TTSBase): """Kokoro TTS engine implementation This engine uses the Kokoro library for TTS generation. """ def __init__(self, lang_code: str = 'z'): """Initialize the Kokoro TTS engine Args: lang_code (str): Language code for the engine """ super().__init__(lang_code) self.pipeline = None def _ensure_pipeline(self): """Ensure the pipeline is loaded Returns: bool: True if pipeline is available, False otherwise """ if self.pipeline is None: self.pipeline = _get_pipeline(self.lang_code) return self.pipeline is not None def generate_speech(self, text: str, voice: str = 'af_heart', speed: float = 1.0) -> Optional[str]: """Generate speech using Kokoro TTS engine Args: text (str): Input text to synthesize voice (str): Voice ID to use (e.g., 'af_heart', 'af_bella', etc.) speed (float): Speech speed multiplier (0.5 to 2.0) Returns: Optional[str]: Path to the generated audio file or None if generation fails """ logger.info(f"Generating speech with Kokoro for text length: {len(text)}") # Check if Kokoro is available if not KOKORO_AVAILABLE: logger.error("Kokoro TTS engine is not available") return None # Ensure pipeline is loaded if not self._ensure_pipeline(): logger.error("Failed to load Kokoro pipeline") return None try: # Generate unique output path output_path = self._generate_output_path(prefix="kokoro") # Generate speech generator = self.pipeline(text, voice=voice, speed=speed) for _, _, audio in generator: logger.info(f"Saving Kokoro audio to {output_path}") sf.write(output_path, audio, 24000) break logger.info(f"Kokoro audio generation complete: {output_path}") return output_path except Exception as e: logger.error(f"Error generating speech with Kokoro: {str(e)}", exc_info=True) return None def generate_speech_stream(self, text: str, voice: str = 'af_heart', speed: float = 1.0) -> Generator[Tuple[int, np.ndarray], None, None]: """Generate speech stream using Kokoro TTS engine Args: text (str): Input text to synthesize voice (str): Voice ID to use speed (float): Speech speed multiplier Yields: tuple: (sample_rate, audio_data) pairs for each segment """ logger.info(f"Generating speech stream with Kokoro for text length: {len(text)}") # Check if Kokoro is available if not KOKORO_AVAILABLE: logger.error("Kokoro TTS engine is not available") return # Ensure pipeline is loaded if not self._ensure_pipeline(): logger.error("Failed to load Kokoro pipeline") return try: # Generate speech stream generator = self.pipeline(text, voice=voice, speed=speed) for _, _, audio in generator: yield 24000, audio except Exception as e: logger.error(f"Error generating speech stream with Kokoro: {str(e)}", exc_info=True) return