Spaces:
Build error
Build error
| """Dummy TTS provider implementation for testing and fallback.""" | |
| import logging | |
| import numpy as np | |
| import soundfile as sf | |
| import io | |
| from typing import Iterator, TYPE_CHECKING | |
| if TYPE_CHECKING: | |
| from ...domain.models.speech_synthesis_request import SpeechSynthesisRequest | |
| from ..base.tts_provider_base import TTSProviderBase | |
| from ...domain.exceptions import SpeechSynthesisException | |
| logger = logging.getLogger(__name__) | |
| class DummyTTSProvider(TTSProviderBase): | |
| """Dummy TTS provider that generates sine wave audio for testing.""" | |
| def __init__(self): | |
| """Initialize the Dummy TTS provider.""" | |
| super().__init__( | |
| provider_name="Dummy", | |
| supported_languages=['en', 'es', 'fr', 'de', 'it', 'pt', 'ru', 'ja', 'ko', 'zh'] | |
| ) | |
| def is_available(self) -> bool: | |
| """Dummy TTS is always available.""" | |
| return True | |
| def get_available_voices(self) -> list[str]: | |
| """Get available voices for Dummy TTS.""" | |
| return ['default', 'male', 'female', 'robot'] | |
| def _generate_audio(self, request: 'SpeechSynthesisRequest') -> tuple[bytes, int]: | |
| """Generate dummy sine wave audio.""" | |
| try: | |
| # Extract parameters from request | |
| text = request.text_content.text | |
| speed = request.voice_settings.speed | |
| # Generate a simple sine wave based on text length and speed | |
| sample_rate = 24000 | |
| # Rough approximation of speech duration adjusted by speed | |
| duration = min(len(text) / (20 * speed), 10) | |
| # Create time array | |
| t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False) | |
| # Generate sine wave (440 Hz base frequency) | |
| frequency = 440 | |
| audio = 0.5 * np.sin(2 * np.pi * frequency * t) | |
| # Add some variation based on voice setting | |
| voice = request.voice_settings.voice_id | |
| if voice == 'male': | |
| # Lower frequency for male voice | |
| audio = 0.5 * np.sin(2 * np.pi * 220 * t) | |
| elif voice == 'female': | |
| # Higher frequency for female voice | |
| audio = 0.5 * np.sin(2 * np.pi * 660 * t) | |
| elif voice == 'robot': | |
| # Square wave for robot voice | |
| audio = 0.5 * np.sign(np.sin(2 * np.pi * 440 * t)) | |
| # Convert to bytes | |
| audio_bytes = self._numpy_to_bytes(audio, sample_rate) | |
| logger.info(f"Generated dummy audio: duration={duration:.2f}s, voice={voice}") | |
| return audio_bytes, sample_rate | |
| except Exception as e: | |
| self._handle_provider_error(e, "dummy audio generation") | |
| def _generate_audio_stream(self, request: 'SpeechSynthesisRequest') -> Iterator[tuple[bytes, int, bool]]: | |
| """Generate dummy sine wave audio stream.""" | |
| try: | |
| # Extract parameters from request | |
| text = request.text_content.text | |
| speed = request.voice_settings.speed | |
| # Generate audio in chunks | |
| sample_rate = 24000 | |
| chunk_duration = 1.0 # 1 second chunks | |
| total_duration = min(len(text) / (20 * speed), 10) | |
| chunks_count = int(np.ceil(total_duration / chunk_duration)) | |
| for chunk_idx in range(chunks_count): | |
| start_time = chunk_idx * chunk_duration | |
| end_time = min((chunk_idx + 1) * chunk_duration, total_duration) | |
| actual_duration = end_time - start_time | |
| if actual_duration <= 0: | |
| break | |
| # Create time array for this chunk | |
| t = np.linspace(0, actual_duration, int(sample_rate * actual_duration), endpoint=False) | |
| # Generate sine wave | |
| frequency = 440 | |
| audio = 0.5 * np.sin(2 * np.pi * frequency * t) | |
| # Apply voice variations | |
| voice = request.voice_settings.voice_id | |
| if voice == 'male': | |
| audio = 0.5 * np.sin(2 * np.pi * 220 * t) | |
| elif voice == 'female': | |
| audio = 0.5 * np.sin(2 * np.pi * 660 * t) | |
| elif voice == 'robot': | |
| audio = 0.5 * np.sign(np.sin(2 * np.pi * 440 * t)) | |
| # Convert to bytes | |
| audio_bytes = self._numpy_to_bytes(audio, sample_rate) | |
| # Check if this is the final chunk | |
| is_final = (chunk_idx == chunks_count - 1) | |
| yield audio_bytes, sample_rate, is_final | |
| except Exception as e: | |
| self._handle_provider_error(e, "dummy streaming audio generation") | |
| def _numpy_to_bytes(self, audio_array: np.ndarray, sample_rate: int) -> bytes: | |
| """Convert numpy audio array to bytes.""" | |
| try: | |
| # Create an in-memory buffer | |
| buffer = io.BytesIO() | |
| # Write audio data to buffer as WAV | |
| sf.write(buffer, audio_array, sample_rate, format='WAV') | |
| # Get bytes from buffer | |
| buffer.seek(0) | |
| return buffer.read() | |
| except Exception as e: | |
| raise SpeechSynthesisException(f"Failed to convert audio to bytes: {str(e)}") from e |