teachingAssistant / src /domain /models /speech_synthesis_request.py
Michael Hu
Migrate existing TTS providers to infrastructure layer
1f9c751
"""SpeechSynthesisRequest value object for TTS synthesis requests."""
from dataclasses import dataclass
from typing import Optional
from .text_content import TextContent
from .voice_settings import VoiceSettings
@dataclass(frozen=True)
class SpeechSynthesisRequest:
"""Value object representing a speech synthesis request."""
text_content: TextContent
voice_settings: VoiceSettings
output_format: str = 'wav'
sample_rate: Optional[int] = None
def __post_init__(self):
"""Validate speech synthesis request after initialization."""
self._validate()
def _validate(self):
"""Validate speech synthesis request properties."""
if not isinstance(self.text_content, TextContent):
raise TypeError("Text must be a TextContent instance")
if not isinstance(self.voice_settings, VoiceSettings):
raise TypeError("Voice settings must be a VoiceSettings instance")
if not isinstance(self.output_format, str):
raise TypeError("Output format must be a string")
if self.output_format not in ['wav', 'mp3', 'flac', 'ogg']:
raise ValueError(f"Unsupported output format: {self.output_format}. Supported formats: wav, mp3, flac, ogg")
if self.sample_rate is not None:
if not isinstance(self.sample_rate, int):
raise TypeError("Sample rate must be an integer")
if self.sample_rate <= 0:
raise ValueError("Sample rate must be positive")
if self.sample_rate < 8000 or self.sample_rate > 192000:
raise ValueError("Sample rate must be between 8000 and 192000 Hz")
# Validate that text and voice settings have compatible languages
if self.text_content.language != self.voice_settings.language:
raise ValueError(f"Text language ({self.text_content.language}) must match voice language ({self.voice_settings.language})")
@property
def estimated_duration_seconds(self) -> float:
"""Estimate the duration of synthesized speech in seconds."""
# Rough estimation: average speaking rate is about 150-200 words per minute
# Adjusted by speed setting
words_per_minute = 175 / self.voice_settings.speed
return (self.text_content.word_count / words_per_minute) * 60
@property
def is_long_text(self) -> bool:
"""Check if the text is considered long for TTS processing."""
return self.text_content.character_count > 5000
@property
def effective_sample_rate(self) -> int:
"""Get the effective sample rate (default 22050 if not specified)."""
return self.sample_rate if self.sample_rate is not None else 22050
def with_output_format(self, output_format: str) -> 'SpeechSynthesisRequest':
"""Create a new SpeechSynthesisRequest with different output format."""
return SpeechSynthesisRequest(
text_content=self.text_content,
voice_settings=self.voice_settings,
output_format=output_format,
sample_rate=self.sample_rate
)
def with_sample_rate(self, sample_rate: Optional[int]) -> 'SpeechSynthesisRequest':
"""Create a new SpeechSynthesisRequest with different sample rate."""
return SpeechSynthesisRequest(
text_content=self.text_content,
voice_settings=self.voice_settings,
output_format=self.output_format,
sample_rate=sample_rate
)
def with_voice_settings(self, voice_settings: VoiceSettings) -> 'SpeechSynthesisRequest':
"""Create a new SpeechSynthesisRequest with different voice settings."""
return SpeechSynthesisRequest(
text_content=self.text_content,
voice_settings=voice_settings,
output_format=self.output_format,
sample_rate=self.sample_rate
)