teachingAssistant / utils /tts_base.py
Michael Hu
remove fallback to Dummy TTS
e22e786
raw
history blame
2.17 kB
import logging
import os
import time
import numpy as np
import soundfile as sf
from typing import Optional, Generator, Tuple, List
from abc import ABC, abstractmethod
# Configure logging
logger = logging.getLogger(__name__)
class TTSBase(ABC):
"""Base class for all TTS engines
This abstract class defines the interface that all TTS engines must implement.
"""
def __init__(self, lang_code: str = 'z'):
"""Initialize the TTS engine
Args:
lang_code (str): Language code for the engine
"""
self.lang_code = lang_code
@abstractmethod
def generate_speech(self, text: str, voice: str = 'default', speed: float = 1.0) -> Optional[str]:
"""Generate speech from text
Args:
text (str): Input text to synthesize
voice (str): Voice ID to use
speed (float): Speech speed multiplier
Returns:
Optional[str]: Path to the generated audio file or None if generation fails
"""
pass
@abstractmethod
def generate_speech_stream(self, text: str, voice: str = 'default', speed: float = 1.0) -> Generator[Tuple[int, np.ndarray], None, None]:
"""Generate speech stream from text
Args:
text (str): Input text to synthesize
voice (str): Voice ID to use
speed (float): Speech speed multiplier
Yields:
tuple: (sample_rate, audio_data) pairs for each segment
"""
pass
def _generate_output_path(self, prefix: str = "tts", extension: str = "wav") -> str:
"""Generate a unique output path for the audio file
Args:
prefix (str): Prefix for the filename
extension (str): File extension
Returns:
str: Path to the output file
"""
timestamp = int(time.time() * 1000)
filename = f"{prefix}_{timestamp}.{extension}"
output_dir = os.path.join(os.getcwd(), "output")
os.makedirs(output_dir, exist_ok=True)
return os.path.join(output_dir, filename)