Spaces:
Build error
Build error
File size: 4,815 Bytes
60bd17d 7495571 aaa0814 60bd17d 7495571 60bd17d 7495571 60bd17d 7495571 60bd17d 7495571 60bd17d 7495571 60bd17d 7495571 60bd17d 7495571 60bd17d 7495571 60bd17d 7495571 60bd17d 7495571 60bd17d 7495571 60bd17d 7495571 e22e786 7495571 e22e786 7495571 e22e786 7495571 e22e786 7495571 e22e786 7495571 e22e786 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import logging
import numpy as np
import soundfile as sf
from typing import Optional, Generator, Tuple
from utils.tts_base import TTSBase
# Configure logging
logger = logging.getLogger(__name__)
# Flag to track Kokoro availability
KOKORO_AVAILABLE = False
# Try to import Kokoro
try:
from kokoro import KPipeline
KOKORO_AVAILABLE = True
logger.info("Kokoro TTS engine is available")
except ImportError:
logger.warning("Kokoro TTS engine is not available")
except Exception as e:
logger.error(f"Kokoro import failed with unexpected error: {str(e)}")
KOKORO_AVAILABLE = False
def _get_pipeline(lang_code: str = 'z'):
"""Lazy-load the Kokoro pipeline
Args:
lang_code (str): Language code for the pipeline
Returns:
KPipeline or None: The Kokoro pipeline or None if not available
"""
if not KOKORO_AVAILABLE:
logger.warning("Kokoro TTS engine is not available")
return None
try:
pipeline = KPipeline(lang_code=lang_code)
logger.info("Kokoro pipeline successfully loaded")
return pipeline
except Exception as e:
logger.error(f"Failed to initialize Kokoro pipeline: {str(e)}")
return None
class KokoroTTS(TTSBase):
"""Kokoro TTS engine implementation
This engine uses the Kokoro library for TTS generation.
"""
def __init__(self, lang_code: str = 'z'):
"""Initialize the Kokoro TTS engine
Args:
lang_code (str): Language code for the engine
"""
super().__init__(lang_code)
self.pipeline = None
def _ensure_pipeline(self):
"""Ensure the pipeline is loaded
Returns:
bool: True if pipeline is available, False otherwise
"""
if self.pipeline is None:
self.pipeline = _get_pipeline(self.lang_code)
return self.pipeline is not None
def generate_speech(self, text: str, voice: str = 'af_heart', speed: float = 1.0) -> Optional[str]:
"""Generate speech using Kokoro TTS engine
Args:
text (str): Input text to synthesize
voice (str): Voice ID to use (e.g., 'af_heart', 'af_bella', etc.)
speed (float): Speech speed multiplier (0.5 to 2.0)
Returns:
Optional[str]: Path to the generated audio file or None if generation fails
"""
logger.info(f"Generating speech with Kokoro for text length: {len(text)}")
# Check if Kokoro is available
if not KOKORO_AVAILABLE:
logger.error("Kokoro TTS engine is not available")
return None
# Ensure pipeline is loaded
if not self._ensure_pipeline():
logger.error("Failed to load Kokoro pipeline")
return None
try:
# Generate unique output path
output_path = self._generate_output_path(prefix="kokoro")
# Generate speech
generator = self.pipeline(text, voice=voice, speed=speed)
for _, _, audio in generator:
logger.info(f"Saving Kokoro audio to {output_path}")
sf.write(output_path, audio, 24000)
break
logger.info(f"Kokoro audio generation complete: {output_path}")
return output_path
except Exception as e:
logger.error(f"Error generating speech with Kokoro: {str(e)}", exc_info=True)
return None
def generate_speech_stream(self, text: str, voice: str = 'af_heart', speed: float = 1.0) -> Generator[Tuple[int, np.ndarray], None, None]:
"""Generate speech stream using Kokoro TTS engine
Args:
text (str): Input text to synthesize
voice (str): Voice ID to use
speed (float): Speech speed multiplier
Yields:
tuple: (sample_rate, audio_data) pairs for each segment
"""
logger.info(f"Generating speech stream with Kokoro for text length: {len(text)}")
# Check if Kokoro is available
if not KOKORO_AVAILABLE:
logger.error("Kokoro TTS engine is not available")
return
# Ensure pipeline is loaded
if not self._ensure_pipeline():
logger.error("Failed to load Kokoro pipeline")
return
try:
# Generate speech stream
generator = self.pipeline(text, voice=voice, speed=speed)
for _, _, audio in generator:
yield 24000, audio
except Exception as e:
logger.error(f"Error generating speech stream with Kokoro: {str(e)}", exc_info=True)
return |