Spaces:
Build error
Build error
import logging | |
import numpy as np | |
import soundfile as sf | |
from typing import Optional, Generator, Tuple | |
from utils.tts_base import TTSBase | |
# Configure logging | |
logger = logging.getLogger(__name__) | |
# Flag to track Kokoro availability | |
KOKORO_AVAILABLE = False | |
# Try to import Kokoro | |
try: | |
from kokoro import KPipeline | |
KOKORO_AVAILABLE = True | |
logger.info("Kokoro TTS engine is available") | |
except ImportError: | |
logger.warning("Kokoro TTS engine is not available") | |
except Exception as e: | |
logger.error(f"Kokoro import failed with unexpected error: {str(e)}") | |
KOKORO_AVAILABLE = False | |
def _get_pipeline(lang_code: str = 'z'): | |
"""Lazy-load the Kokoro pipeline | |
Args: | |
lang_code (str): Language code for the pipeline | |
Returns: | |
KPipeline or None: The Kokoro pipeline or None if not available | |
""" | |
if not KOKORO_AVAILABLE: | |
logger.warning("Kokoro TTS engine is not available") | |
return None | |
try: | |
pipeline = KPipeline(lang_code=lang_code) | |
logger.info("Kokoro pipeline successfully loaded") | |
return pipeline | |
except Exception as e: | |
logger.error(f"Failed to initialize Kokoro pipeline: {str(e)}") | |
return None | |
class KokoroTTS(TTSBase): | |
"""Kokoro TTS engine implementation | |
This engine uses the Kokoro library for TTS generation. | |
""" | |
def __init__(self, lang_code: str = 'z'): | |
"""Initialize the Kokoro TTS engine | |
Args: | |
lang_code (str): Language code for the engine | |
""" | |
super().__init__(lang_code) | |
self.pipeline = None | |
def _ensure_pipeline(self): | |
"""Ensure the pipeline is loaded | |
Returns: | |
bool: True if pipeline is available, False otherwise | |
""" | |
if self.pipeline is None: | |
self.pipeline = _get_pipeline(self.lang_code) | |
return self.pipeline is not None | |
def generate_speech(self, text: str, voice: str = 'af_heart', speed: float = 1.0) -> Optional[str]: | |
"""Generate speech using Kokoro TTS engine | |
Args: | |
text (str): Input text to synthesize | |
voice (str): Voice ID to use (e.g., 'af_heart', 'af_bella', etc.) | |
speed (float): Speech speed multiplier (0.5 to 2.0) | |
Returns: | |
Optional[str]: Path to the generated audio file or None if generation fails | |
""" | |
logger.info(f"Generating speech with Kokoro for text length: {len(text)}") | |
# Check if Kokoro is available | |
if not KOKORO_AVAILABLE: | |
logger.error("Kokoro TTS engine is not available") | |
return None | |
# Ensure pipeline is loaded | |
if not self._ensure_pipeline(): | |
logger.error("Failed to load Kokoro pipeline") | |
return None | |
try: | |
# Generate unique output path | |
output_path = self._generate_output_path(prefix="kokoro") | |
# Generate speech | |
generator = self.pipeline(text, voice=voice, speed=speed) | |
for _, _, audio in generator: | |
logger.info(f"Saving Kokoro audio to {output_path}") | |
sf.write(output_path, audio, 24000) | |
break | |
logger.info(f"Kokoro audio generation complete: {output_path}") | |
return output_path | |
except Exception as e: | |
logger.error(f"Error generating speech with Kokoro: {str(e)}", exc_info=True) | |
return None | |
def generate_speech_stream(self, text: str, voice: str = 'af_heart', speed: float = 1.0) -> Generator[Tuple[int, np.ndarray], None, None]: | |
"""Generate speech stream using Kokoro TTS engine | |
Args: | |
text (str): Input text to synthesize | |
voice (str): Voice ID to use | |
speed (float): Speech speed multiplier | |
Yields: | |
tuple: (sample_rate, audio_data) pairs for each segment | |
""" | |
logger.info(f"Generating speech stream with Kokoro for text length: {len(text)}") | |
# Check if Kokoro is available | |
if not KOKORO_AVAILABLE: | |
logger.error("Kokoro TTS engine is not available") | |
return | |
# Ensure pipeline is loaded | |
if not self._ensure_pipeline(): | |
logger.error("Failed to load Kokoro pipeline") | |
return | |
try: | |
# Generate speech stream | |
generator = self.pipeline(text, voice=voice, speed=speed) | |
for _, _, audio in generator: | |
yield 24000, audio | |
except Exception as e: | |
logger.error(f"Error generating speech stream with Kokoro: {str(e)}", exc_info=True) | |
return |