teachingAssistant / utils /tts_kokoro.py
Michael Hu
fix circular dependency
aaa0814
raw
history blame
4.82 kB
import logging
import numpy as np
import soundfile as sf
from typing import Optional, Generator, Tuple
from utils.tts_base import TTSBase
# Configure logging
logger = logging.getLogger(__name__)
# Flag to track Kokoro availability
KOKORO_AVAILABLE = False
# Try to import Kokoro
try:
from kokoro import KPipeline
KOKORO_AVAILABLE = True
logger.info("Kokoro TTS engine is available")
except ImportError:
logger.warning("Kokoro TTS engine is not available")
except Exception as e:
logger.error(f"Kokoro import failed with unexpected error: {str(e)}")
KOKORO_AVAILABLE = False
def _get_pipeline(lang_code: str = 'z'):
"""Lazy-load the Kokoro pipeline
Args:
lang_code (str): Language code for the pipeline
Returns:
KPipeline or None: The Kokoro pipeline or None if not available
"""
if not KOKORO_AVAILABLE:
logger.warning("Kokoro TTS engine is not available")
return None
try:
pipeline = KPipeline(lang_code=lang_code)
logger.info("Kokoro pipeline successfully loaded")
return pipeline
except Exception as e:
logger.error(f"Failed to initialize Kokoro pipeline: {str(e)}")
return None
class KokoroTTS(TTSBase):
"""Kokoro TTS engine implementation
This engine uses the Kokoro library for TTS generation.
"""
def __init__(self, lang_code: str = 'z'):
"""Initialize the Kokoro TTS engine
Args:
lang_code (str): Language code for the engine
"""
super().__init__(lang_code)
self.pipeline = None
def _ensure_pipeline(self):
"""Ensure the pipeline is loaded
Returns:
bool: True if pipeline is available, False otherwise
"""
if self.pipeline is None:
self.pipeline = _get_pipeline(self.lang_code)
return self.pipeline is not None
def generate_speech(self, text: str, voice: str = 'af_heart', speed: float = 1.0) -> Optional[str]:
"""Generate speech using Kokoro TTS engine
Args:
text (str): Input text to synthesize
voice (str): Voice ID to use (e.g., 'af_heart', 'af_bella', etc.)
speed (float): Speech speed multiplier (0.5 to 2.0)
Returns:
Optional[str]: Path to the generated audio file or None if generation fails
"""
logger.info(f"Generating speech with Kokoro for text length: {len(text)}")
# Check if Kokoro is available
if not KOKORO_AVAILABLE:
logger.error("Kokoro TTS engine is not available")
return None
# Ensure pipeline is loaded
if not self._ensure_pipeline():
logger.error("Failed to load Kokoro pipeline")
return None
try:
# Generate unique output path
output_path = self._generate_output_path(prefix="kokoro")
# Generate speech
generator = self.pipeline(text, voice=voice, speed=speed)
for _, _, audio in generator:
logger.info(f"Saving Kokoro audio to {output_path}")
sf.write(output_path, audio, 24000)
break
logger.info(f"Kokoro audio generation complete: {output_path}")
return output_path
except Exception as e:
logger.error(f"Error generating speech with Kokoro: {str(e)}", exc_info=True)
return None
def generate_speech_stream(self, text: str, voice: str = 'af_heart', speed: float = 1.0) -> Generator[Tuple[int, np.ndarray], None, None]:
"""Generate speech stream using Kokoro TTS engine
Args:
text (str): Input text to synthesize
voice (str): Voice ID to use
speed (float): Speech speed multiplier
Yields:
tuple: (sample_rate, audio_data) pairs for each segment
"""
logger.info(f"Generating speech stream with Kokoro for text length: {len(text)}")
# Check if Kokoro is available
if not KOKORO_AVAILABLE:
logger.error("Kokoro TTS engine is not available")
return
# Ensure pipeline is loaded
if not self._ensure_pipeline():
logger.error("Failed to load Kokoro pipeline")
return
try:
# Generate speech stream
generator = self.pipeline(text, voice=voice, speed=speed)
for _, _, audio in generator:
yield 24000, audio
except Exception as e:
logger.error(f"Error generating speech stream with Kokoro: {str(e)}", exc_info=True)
return