Spaces:
Running
Running
import os | |
import time | |
import logging | |
import numpy as np | |
import soundfile as sf | |
from typing import Optional, Tuple, Generator | |
# Configure logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
# Constants | |
DEFAULT_SAMPLE_RATE = 24000 | |
# Global client instance (lazy loaded) | |
_client = None | |
def _get_client(): | |
"""Lazy-load the Kokoro Space client to avoid loading it until needed""" | |
global _client | |
if _client is None: | |
logger.info("Loading Kokoro Space client...") | |
try: | |
# Import gradio client | |
from gradio_client import Client | |
# Initialize the client | |
logger.info("Initializing Kokoro Space client") | |
_client = Client("Remsky/Kokoro-TTS-Zero") | |
# Log client details | |
logger.info("Kokoro Space client loaded successfully") | |
logger.info(f"Client type: {type(_client).__name__}") | |
except ImportError as import_err: | |
logger.error(f"Import error loading Kokoro Space client: {import_err}") | |
logger.error("This may indicate missing dependencies") | |
raise | |
except Exception as e: | |
logger.error(f"Error loading Kokoro Space client: {e}", exc_info=True) | |
logger.error(f"Error type: {type(e).__name__}") | |
raise | |
return _client | |
def generate_speech(text: str, language: str = "z", voice: str = "af_nova", speed: float = 1.0) -> str: | |
"""Public interface for TTS generation using Kokoro Space | |
This is a legacy function maintained for backward compatibility. | |
New code should use the factory pattern implementation directly. | |
Args: | |
text (str): Input text to synthesize | |
language (str): Language code (not used in Kokoro Space, kept for API compatibility) | |
voice (str): Voice ID to use (e.g., 'af_nova', 'af_bella', etc.) | |
speed (float): Speech speed multiplier (0.5 to 2.0) | |
Returns: | |
str: Path to the generated audio file | |
""" | |
logger.info(f"Legacy Kokoro Space generate_speech called with text length: {len(text)}") | |
# Use the new implementation via factory pattern | |
from utils.tts_engines import KokoroSpaceTTSEngine | |
try: | |
# Create a Kokoro Space engine and generate speech | |
kokoro_space_engine = KokoroSpaceTTSEngine(language) | |
return kokoro_space_engine.generate_speech(text, voice, speed) | |
except Exception as e: | |
logger.error(f"Error in legacy Kokoro Space generate_speech: {str(e)}", exc_info=True) | |
# Fall back to dummy TTS | |
from utils.tts_base import DummyTTSEngine | |
dummy_engine = DummyTTSEngine() | |
return dummy_engine.generate_speech(text) | |
def _create_output_dir() -> str: | |
"""Create output directory for audio files | |
Returns: | |
str: Path to the output directory | |
""" | |
output_dir = "temp/outputs" | |
os.makedirs(output_dir, exist_ok=True) | |
return output_dir | |
def _generate_output_path(prefix: str = "output") -> str: | |
"""Generate a unique output path for audio files | |
Args: | |
prefix (str): Prefix for the output filename | |
Returns: | |
str: Path to the output file | |
""" | |
output_dir = _create_output_dir() | |
timestamp = int(time.time()) | |
return f"{output_dir}/{prefix}_{timestamp}.wav" |