Spaces:

DroolingPanda
/

teachingAssistant

Build error

teachingAssistant / utils /tts_kokoro.py

Michael Hu

fix path

b2b15db about 1 month ago

5.52 kB

	import logging
	import numpy as np
	import soundfile as sf
	from typing import Optional, Generator, Tuple

	from utils.tts import TTSBase, DummyTTS

	# Configure logging
	logger = logging.getLogger(__name__)

	# Flag to track Kokoro availability
	KOKORO_AVAILABLE = False

	# Try to import Kokoro
	try:
	from kokoro import KPipeline
	KOKORO_AVAILABLE = True
	logger.info("Kokoro TTS engine is available")
	except ImportError:
	logger.warning("Kokoro TTS engine is not available")
	except Exception as e:
	logger.error(f"Kokoro import failed with unexpected error: {str(e)}")
	KOKORO_AVAILABLE = False


	def _get_pipeline(lang_code: str = 'z'):
	"""Lazy-load the Kokoro pipeline

	Args:
	lang_code (str): Language code for the pipeline

	Returns:
	KPipeline or None: The Kokoro pipeline or None if not available
	"""
	if not KOKORO_AVAILABLE:
	logger.warning("Kokoro TTS engine is not available")
	return None

	try:
	pipeline = KPipeline(lang_code=lang_code)
	logger.info("Kokoro pipeline successfully loaded")
	return pipeline
	except Exception as e:
	logger.error(f"Failed to initialize Kokoro pipeline: {str(e)}")
	return None


	class KokoroTTS(TTSBase):
	"""Kokoro TTS engine implementation

	This engine uses the Kokoro library for TTS generation.
	"""

	def __init__(self, lang_code: str = 'z'):
	"""Initialize the Kokoro TTS engine

	Args:
	lang_code (str): Language code for the engine
	"""
	super().__init__(lang_code)
	self.pipeline = None

	def _ensure_pipeline(self):
	"""Ensure the pipeline is loaded

	Returns:
	bool: True if pipeline is available, False otherwise
	"""
	if self.pipeline is None:
	self.pipeline = _get_pipeline(self.lang_code)

	return self.pipeline is not None

	def generate_speech(self, text: str, voice: str = 'af_heart', speed: float = 1.0) -> Optional[str]:
	"""Generate speech using Kokoro TTS engine

	Args:
	text (str): Input text to synthesize
	voice (str): Voice ID to use (e.g., 'af_heart', 'af_bella', etc.)
	speed (float): Speech speed multiplier (0.5 to 2.0)

	Returns:
	Optional[str]: Path to the generated audio file or None if generation fails
	"""
	logger.info(f"Generating speech with Kokoro for text length: {len(text)}")

	# Check if Kokoro is available
	if not KOKORO_AVAILABLE:
	logger.warning("Kokoro TTS engine is not available, falling back to dummy TTS")
	return DummyTTS(self.lang_code).generate_speech(text, voice, speed)

	# Ensure pipeline is loaded
	if not self._ensure_pipeline():
	logger.warning("Failed to load Kokoro pipeline, falling back to dummy TTS")
	return DummyTTS(self.lang_code).generate_speech(text, voice, speed)

	try:
	# Generate unique output path
	output_path = self._generate_output_path(prefix="kokoro")

	# Generate speech
	generator = self.pipeline(text, voice=voice, speed=speed)
	for _, _, audio in generator:
	logger.info(f"Saving Kokoro audio to {output_path}")
	sf.write(output_path, audio, 24000)
	break

	logger.info(f"Kokoro audio generation complete: {output_path}")
	return output_path
	except Exception as e:
	logger.error(f"Error generating speech with Kokoro: {str(e)}", exc_info=True)
	logger.warning("Kokoro TTS engine failed, falling back to dummy TTS")
	return DummyTTS(self.lang_code).generate_speech(text, voice, speed)

	def generate_speech_stream(self, text: str, voice: str = 'af_heart', speed: float = 1.0) -> Generator[Tuple[int, np.ndarray], None, None]:
	"""Generate speech stream using Kokoro TTS engine

	Args:
	text (str): Input text to synthesize
	voice (str): Voice ID to use
	speed (float): Speech speed multiplier

	Yields:
	tuple: (sample_rate, audio_data) pairs for each segment
	"""
	logger.info(f"Generating speech stream with Kokoro for text length: {len(text)}")

	# Check if Kokoro is available
	if not KOKORO_AVAILABLE:
	logger.warning("Kokoro TTS engine is not available, falling back to dummy TTS")
	yield from DummyTTS(self.lang_code).generate_speech_stream(text, voice, speed)
	return

	# Ensure pipeline is loaded
	if not self._ensure_pipeline():
	logger.warning("Failed to load Kokoro pipeline, falling back to dummy TTS")
	yield from DummyTTS(self.lang_code).generate_speech_stream(text, voice, speed)
	return

	try:
	# Generate speech stream
	generator = self.pipeline(text, voice=voice, speed=speed)
	for _, _, audio in generator:
	yield 24000, audio
	except Exception as e:
	logger.error(f"Error generating speech stream with Kokoro: {str(e)}", exc_info=True)
	logger.warning("Kokoro TTS engine failed, falling back to dummy TTS")
	yield from DummyTTS(self.lang_code).generate_speech_stream(text, voice, speed)