Spaces:

DroolingPanda
/

teachingAssistant

Build error

File size: 4,003 Bytes

"""SpeechSynthesisRequest value object for TTS synthesis requests."""

from dataclasses import dataclass
from typing import Optional
from .text_content import TextContent
from .voice_settings import VoiceSettings


@dataclass(frozen=True)
class SpeechSynthesisRequest:
    """Value object representing a speech synthesis request."""
    
    text_content: TextContent
    voice_settings: VoiceSettings
    output_format: str = 'wav'
    sample_rate: Optional[int] = None
    
    def __post_init__(self):
        """Validate speech synthesis request after initialization."""
        self._validate()
    
    def _validate(self):
        """Validate speech synthesis request properties."""
        if not isinstance(self.text_content, TextContent):
            raise TypeError("Text must be a TextContent instance")
        
        if not isinstance(self.voice_settings, VoiceSettings):
            raise TypeError("Voice settings must be a VoiceSettings instance")
        
        if not isinstance(self.output_format, str):
            raise TypeError("Output format must be a string")
        
        if self.output_format not in ['wav', 'mp3', 'flac', 'ogg']:
            raise ValueError(f"Unsupported output format: {self.output_format}. Supported formats: wav, mp3, flac, ogg")
        
        if self.sample_rate is not None:
            if not isinstance(self.sample_rate, int):
                raise TypeError("Sample rate must be an integer")
            
            if self.sample_rate <= 0:
                raise ValueError("Sample rate must be positive")
            
            if self.sample_rate < 8000 or self.sample_rate > 192000:
                raise ValueError("Sample rate must be between 8000 and 192000 Hz")
        
        # Validate that text and voice settings have compatible languages
        if self.text_content.language != self.voice_settings.language:
            raise ValueError(f"Text language ({self.text_content.language}) must match voice language ({self.voice_settings.language})")
    
    @property
    def estimated_duration_seconds(self) -> float:
        """Estimate the duration of synthesized speech in seconds."""
        # Rough estimation: average speaking rate is about 150-200 words per minute
        # Adjusted by speed setting
        words_per_minute = 175 / self.voice_settings.speed
        return (self.text_content.word_count / words_per_minute) * 60
    
    @property
    def is_long_text(self) -> bool:
        """Check if the text is considered long for TTS processing."""
        return self.text_content.character_count > 5000
    
    @property
    def effective_sample_rate(self) -> int:
        """Get the effective sample rate (default 22050 if not specified)."""
        return self.sample_rate if self.sample_rate is not None else 22050
    
    def with_output_format(self, output_format: str) -> 'SpeechSynthesisRequest':
        """Create a new SpeechSynthesisRequest with different output format."""
        return SpeechSynthesisRequest(
            text_content=self.text_content,
            voice_settings=self.voice_settings,
            output_format=output_format,
            sample_rate=self.sample_rate
        )
    
    def with_sample_rate(self, sample_rate: Optional[int]) -> 'SpeechSynthesisRequest':
        """Create a new SpeechSynthesisRequest with different sample rate."""
        return SpeechSynthesisRequest(
            text_content=self.text_content,
            voice_settings=self.voice_settings,
            output_format=self.output_format,
            sample_rate=sample_rate
        )
    
    def with_voice_settings(self, voice_settings: VoiceSettings) -> 'SpeechSynthesisRequest':
        """Create a new SpeechSynthesisRequest with different voice settings."""
        return SpeechSynthesisRequest(
            text_content=self.text_content,
            voice_settings=voice_settings,
            output_format=self.output_format,
            sample_rate=self.sample_rate
        )