Spaces:
Build error
Build error
"""SpeechSynthesisRequest value object for TTS synthesis requests.""" | |
from dataclasses import dataclass | |
from typing import Optional | |
from .text_content import TextContent | |
from .voice_settings import VoiceSettings | |
class SpeechSynthesisRequest: | |
"""Value object representing a speech synthesis request.""" | |
text_content: TextContent | |
voice_settings: VoiceSettings | |
output_format: str = 'wav' | |
sample_rate: Optional[int] = None | |
def __post_init__(self): | |
"""Validate speech synthesis request after initialization.""" | |
self._validate() | |
def _validate(self): | |
"""Validate speech synthesis request properties.""" | |
if not isinstance(self.text_content, TextContent): | |
raise TypeError("Text must be a TextContent instance") | |
if not isinstance(self.voice_settings, VoiceSettings): | |
raise TypeError("Voice settings must be a VoiceSettings instance") | |
if not isinstance(self.output_format, str): | |
raise TypeError("Output format must be a string") | |
if self.output_format not in ['wav', 'mp3', 'flac', 'ogg']: | |
raise ValueError(f"Unsupported output format: {self.output_format}. Supported formats: wav, mp3, flac, ogg") | |
if self.sample_rate is not None: | |
if not isinstance(self.sample_rate, int): | |
raise TypeError("Sample rate must be an integer") | |
if self.sample_rate <= 0: | |
raise ValueError("Sample rate must be positive") | |
if self.sample_rate < 8000 or self.sample_rate > 192000: | |
raise ValueError("Sample rate must be between 8000 and 192000 Hz") | |
# Validate that text and voice settings have compatible languages | |
if self.text_content.language != self.voice_settings.language: | |
raise ValueError(f"Text language ({self.text_content.language}) must match voice language ({self.voice_settings.language})") | |
def estimated_duration_seconds(self) -> float: | |
"""Estimate the duration of synthesized speech in seconds.""" | |
# Rough estimation: average speaking rate is about 150-200 words per minute | |
# Adjusted by speed setting | |
words_per_minute = 175 / self.voice_settings.speed | |
return (self.text_content.word_count / words_per_minute) * 60 | |
def is_long_text(self) -> bool: | |
"""Check if the text is considered long for TTS processing.""" | |
return self.text_content.character_count > 5000 | |
def effective_sample_rate(self) -> int: | |
"""Get the effective sample rate (default 22050 if not specified).""" | |
return self.sample_rate if self.sample_rate is not None else 22050 | |
def with_output_format(self, output_format: str) -> 'SpeechSynthesisRequest': | |
"""Create a new SpeechSynthesisRequest with different output format.""" | |
return SpeechSynthesisRequest( | |
text_content=self.text_content, | |
voice_settings=self.voice_settings, | |
output_format=output_format, | |
sample_rate=self.sample_rate | |
) | |
def with_sample_rate(self, sample_rate: Optional[int]) -> 'SpeechSynthesisRequest': | |
"""Create a new SpeechSynthesisRequest with different sample rate.""" | |
return SpeechSynthesisRequest( | |
text_content=self.text_content, | |
voice_settings=self.voice_settings, | |
output_format=self.output_format, | |
sample_rate=sample_rate | |
) | |
def with_voice_settings(self, voice_settings: VoiceSettings) -> 'SpeechSynthesisRequest': | |
"""Create a new SpeechSynthesisRequest with different voice settings.""" | |
return SpeechSynthesisRequest( | |
text_content=self.text_content, | |
voice_settings=voice_settings, | |
output_format=self.output_format, | |
sample_rate=self.sample_rate | |
) |